In [1]:
import pandas as pd

In [2]:
files = {
    'diet_example': 'https://regepi.bwh.harvard.edu/health/Oxalate/files/Low%20Oxalate%20Diet%20Example.xls',
    'oxalate_content' : 'https://regepi.bwh.harvard.edu/health/Oxalate/files/Oxalate%20Content%20of%20Foods.xls',
    'table_of_foods' : 'https://regepi.bwh.harvard.edu/health/Oxalate/files/Oxalate%20Table%20of%20Foods.xls'
}

In [96]:
raw_data = pd.read_excel(files['oxalate_content'])

In [97]:
df = raw_data.copy()

In [98]:
df = df[['Food Group', 'Food Item', 'Serving size','OxalateCategory','Oxalate Value']]

In [99]:
#get rid of rows with null Food Item's
df = df[df['Food Item'].notnull()]

In [100]:
#forward fill Food Groups
df['Food Group'] = df['Food Group'].fillna(method='ffill')

In [101]:
#removes the "mg" suffix if applicable
def remove_mg(value):
    if value.endswith('mg'):
        return value[:-2]


In [102]:
#Apply removal of "mg" suffix
df['Oxalate Value'] = df['Oxalate Value'].apply(remove_mg)

#convert to numeric data type
df['Oxalate Value'] = df['Oxalate Value'].apply(pd.to_numeric)

In [103]:
#List of all Food Groups
food_groups = list(df['Food Group'].unique())
food_groups

['Whole Fruits',
 'Canned Fruits',
 'Dried Fruits',
 'Vegetables',
 'Potatoes',
 'Cream Products',
 'Ice Creams',
 'Yogurt Products',
 'Cheese Products',
 'Eggs',
 'Dairy Spreads',
 'Milk ',
 'Breads',
 'Pastas, Rice & Grains',
 'Meat & Meat Alternatives',
 'Fish',
 'Nuts and Seeds',
 'Cakes, Candies, Cookies & Pudding Snacks',
 'Crackers, Chips & Miscellaneous',
 'Beverages',
 'Dairy Beverages',
 'Alcoholic Beverages',
 'Spreads, Sauces & Toppings',
 'Ingredients',
 'Fast Food Items or Meals',
 'Soups',
 'Breakfast Items',
 "Kellogg's",
 'Post Cereals',
 'General Mills',
 'Quaker',
 'Other Cereal Brands']

In [104]:
food_items = list(df['Food Item'])

In [105]:
#Harvard Oxalate Value Categories

# Little or no oxalate = 0 - 1 mg
#
# Low Oxalate = 2 - 4 mg
#
# Moderate Oxalate = 5 - 9 mg
#
# High Oxalate = 10 - 12 mg
#
# Very High Oxalate = 12 + mg

In [106]:
def food_groups_by_mean_oxalate_value(min_mean, max_mean):
    for group in food_groups:
        df_group = df[df['Food Group'] == group]
        mean = df_group['Oxalate Value'].mean()
        if min_mean <= mean and mean <= max_mean:
            print(group)

In [107]:
#Food Groups with Very High Oxalates
food_groups_by_mean_oxalate_value(12, 999)

Vegetables
Potatoes
Pastas, Rice & Grains
Nuts and Seeds
Fast Food Items or Meals
Soups
Kellogg's
Post Cereals
Other Cereal Brands


In [108]:
#Food Groups with High Oxalates
food_groups_by_mean_oxalate_value(10, 12)

Dried Fruits
Ingredients
Breakfast Items
General Mills


In [109]:
#Food Groups with Moderate Oxalates
food_groups_by_mean_oxalate_value(5, 9)

Whole Fruits
Canned Fruits
Breads
Cakes, Candies, Cookies & Pudding Snacks
Beverages
Spreads, Sauces & Toppings
Quaker


In [110]:
#Food Groups with Low Oxalates
food_groups_by_mean_oxalate_value(2, 4)

Milk 
Meat & Meat Alternatives
Dairy Beverages


In [111]:
#Food Groups with Little or no Oxalate
food_groups_by_mean_oxalate_value(0, 1)

Cream Products
Ice Creams
Cheese Products
Eggs
Dairy Spreads
Fish


In [82]:
#First Quartile
q1 = df['Oxalate Value'].quantile(.25)

#Third Quartile
q3 = df['Oxalate Value'].quantile(.75)

#Interquartile Range
iqr = q3 - q1

upper_threshold = q3 + 1.5 * iqr
extreme_upper_threshold = q3 + 3 * iqr
print(upper_threshold, extreme_upper_threshold)

31.0 49.0


In [88]:
#outliers (but not extreme outliers)
mild_outliers = df[df['Oxalate Value'] >= upper_threshold]
mild_outliers = mild_outliers[mild_outliers['Oxalate Value'] < extreme_upper_threshold]
mild_outliers

Unnamed: 0,Food Group,Food Item,Serving size,OxalateCategory,Oxalate Value
7,Whole Fruits,Raspberries,1 cup,Very High,48
58,Vegetables,Bamboo Shoots,1 cup,Very High,35
68,Vegetables,Rutabaga,1/2 cup mashed,Very High,31
73,Vegetables,Yams,"1/2 cup, cubed",Very High,40
211,"Pastas, Rice & Grains",Miso,1 cup,Very High,40
286,Nuts and Seeds,Candies with Nuts (ex Snickers),2 oz,Very High,38
290,Nuts and Seeds,Mixed Nuts (with Peanuts),1 oz,Very High,39
293,Nuts and Seeds,Walnuts,1 cup or 7 nuts,Very High,31
300,"Cakes, Candies, Cookies & Pudding Snacks",Brownies,1 oz or 1/2 brownie,Very High,31
303,"Cakes, Candies, Cookies & Pudding Snacks",Candies with Nuts (ex Snickers),2 oz,Very High,38


In [85]:
#Extreme Outliers 
extreme_outliers = df[df['Oxalate Value'] >= extreme_upper_threshold]
extreme_outliers

Unnamed: 0,Food Group,Food Item,Serving size,OxalateCategory,Oxalate Value
59,Vegetables,Beets,1/2 cup,Very High,76
61,Vegetables,Navy Beans,1/2 cup,Very High,76
62,Vegetables,Okra,1/2 cup,Very High,57
67,Vegetables,Rhubarb,1/2 cup,Very High,541
69,Vegetables,"Spinach, cooked",1/2 cup,Very High,755
70,Vegetables,"Spinach, raw",1 cup,Very High,656
119,Potatoes,French Fries (homemade or fast food),4 oz or 1/2 cup,Very High,51
120,Potatoes,Baked Potato with Skin,1 medium,Very High,97
203,"Pastas, Rice & Grains",Brown Rice Flour,1 cup,Very High,65
204,"Pastas, Rice & Grains",Buckwheat Groats,1 cup cooked,Very High,133


In [95]:
very_high = df[df['OxalateCategory'] == "Very High"]

for group in food_groups:
    very_high_food_group = very_high[very_high['Food Group'] == group]
    print(very_high_food_group.count())
    #print(group)
    #print(very_high_food_group['Food Item'])
    print()

Food Group         6
Food Item          6
Serving size       6
OxalateCategory    6
Oxalate Value      6
dtype: int64

Food Group         1
Food Item          1
Serving size       1
OxalateCategory    1
Oxalate Value      1
dtype: int64

Food Group         2
Food Item          2
Serving size       2
OxalateCategory    2
Oxalate Value      2
dtype: int64

Food Group         16
Food Item          16
Serving size       16
OxalateCategory    16
Oxalate Value      16
dtype: int64

Food Group         6
Food Item          6
Serving size       6
OxalateCategory    6
Oxalate Value      6
dtype: int64

Food Group         0
Food Item          0
Serving size       0
OxalateCategory    0
Oxalate Value      0
dtype: int64

Food Group         0
Food Item          0
Serving size       0
OxalateCategory    0
Oxalate Value      0
dtype: int64

Food Group         0
Food Item          0
Serving size       0
OxalateCategory    0
Oxalate Value      0
dtype: int64

Food Group         0
Food Item          0
S