In [2]:
import numpy as np
import scipy as sp
import random
from collections import Counter
import copy
import sigfig

# Problem 1

With Object Oriented Programming in mind, create the following simulation environment in python:

A 100x100 grid world with 10 randomly placed pieces of food. Note that food cannot share the same coordinate.
An ant class that randomly moves up, down, left, or right at each time step. If an ant tries to step beyond the bounds of the box, it does nothing. Note that food is consumed if an ant shares the same coordinate as the food. Ants can share the same coordinate. Ants starting locations should be random.
Run 1000 simulations with 100 ants, each for 100-time steps. Report the mean and standard deviation of the total food eaten. Feel free to use any Python packages as needed.

In [17]:
#prevent moves
def in_lim(x,x_min,x_max):
    if x<x_min:
        return x_min
    if x>x_max:
        return x_max
    return x


class Ant:
    def __init__(self,grid):
        #place ant in a random location
        self.position=(random.randint(0,(grid.x_range-1)),random.randint(0,(grid.y_range-1)))
        
    def move(self,grid):
        #move one of four directions, unless at an edge
        movement=random.choice([[-1,0],[1,0],[0,1],[0,-1]])
        self.position=(in_lim(self.position[0]+movement[0],0,(grid.x_range-1)),\
                       in_lim(self.position[1]+movement[1],0,(grid.y_range-1)))
        

        
class Grid:
    def __init__(self,x_range,y_range):
        self.x_range=x_range
        self.y_range=y_range
        self.locations=[(x,y) for x in list(range(0,self.x_range)) for y in list(range(0,self.y_range))]
        self.has_food={}
        
    def drop_food(self,pieces_of_food):
        self.has_food=set(random.sample(self.locations, k=pieces_of_food))
            
    def ants_eat(self,colony):
        self.has_food-=set([ant.position for ant in colony]) 

        
        
class Colony:
    def __init__(self,grid,colony_size):
        self.grid=grid
        self.colony_size=colony_size
        #fill the colony with ants
        self.members=[Ant(self.grid) for i in range(0,self.colony_size)]
        
    def move(self):
        #move all ants in the colony
        [ant.move(grid=self.grid) for ant in self.members]

        
        
class Simulation:
    def __init__(self,x_range,y_range,pieces_of_food,colony_size):
        #at the initial state, no food has been eaten
        self.starting_food=pieces_of_food
        self.food_eaten=0
        #create a grid and drop food on it
        self.grid=Grid(x_range=x_range,y_range=y_range)
        self.grid.drop_food(pieces_of_food=pieces_of_food)
        #create an ant colony 
        self.colony=Colony(self.grid,colony_size=colony_size)
        
    def run(self,timesteps):
        for i in range(timesteps):
            #ants eat the food on the spots of the grid they occupy
            self.grid.ants_eat(colony=self.colony.members)
            #ants move randomly
            self.colony.move()
        self.food_eaten=self.starting_food-len(self.grid.has_food)
        

In [18]:
%%time
simulation_stats=[]
simulated_food_eaten=[]
for rep in range(5):
    for i in range(1000):
        simulation=Simulation(x_range=100,y_range=100,pieces_of_food=10,colony_size=100)
        simulation.run(timesteps=100)
        simulated_food_eaten+=[simulation.food_eaten]
    
    simulation_stats+=[{'mean': round(np.array(simulated_food_eaten).mean(),2),\
        'std': round(np.array(simulated_food_eaten).std(),2)}]

simulation_stats

CPU times: user 2min 25s, sys: 1.59 s, total: 2min 27s
Wall time: 2min 34s


[{'mean': 3.8, 'std': 1.5},
 {'mean': 3.8, 'std': 1.54},
 {'mean': 3.81, 'std': 1.53},
 {'mean': 3.8, 'std': 1.52},
 {'mean': 3.81, 'std': 1.54}]

# Problem 2

Using Python and the pandas package write a Jupyter notebook to process the following cereal data, and, based on the US Governments Dietary Guidelines’ Executive Summary, recommend 5 cereals that meet those guidelines.

 

Plot at least 3 data visualizations to help communicate your recommendations. Feel free to use common Python packages such as numpy as needed.

 

Briefly explain your thought process for processing the data and determining the recommended cereals in the comments.

Cereal Data: https://www.kaggle.com/crawford/80-cereals
Dietary Guidelines: https://www.dietaryguidelines.gov/resources/2020-2025-dietary-guidelines-online-materials/top-10-things-you-need-know-about-dietary

In [5]:
#import bokeh function
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.layouts import row, column
from bokeh.io import export_png
from bokeh.models import DatetimeTickFormatter, TickFormatter, ColumnDataSource, Legend, Range1d, FactorRange
from bokeh.transform import dodge
from bokeh.core.properties import Dict, Int, String

output_notebook()


The guidelines say: "Limiting added sugars* to less than 10% of calories per day for ages 2 and older and to avoid added sugars for infants and toddlers;
Limiting saturated fat to less than 10% of calories per day starting at age 2;
Limiting sodium intake to less than 2,300mg per day (or even less if younger than 14);"

In [6]:
import pandas as pd

#STEP 1. Import the data
cereal_details=pd.read_csv('cereal.csv')[['name','protein','fat',\
    'sodium','sugars','potass','vitamins']]\
    .applymap(lambda x: np.nan if (type(x)!=str and x<0) else x).dropna()

In [7]:
#STEP 2. assign recommendation to the five with the lowest sugar and sodium

#normalize the nutrients so that they can be on the same plot
stds=cereal_details.describe().loc['std',:].apply(lambda x: sigfig.round(x, sigfigs=1)).to_dict()
normalized_cereal_details=copy.deepcopy(cereal_details)
for col in cereal_details.columns[1:]:
    normalized_cereal_details[col]=cereal_details[col]/stds.get(col)
    
sorted_cereals=normalized_cereal_details\
    .sort_values(by=['sugars','sodium','fat']).reset_index(drop=True)
sorted_cereals['recommended']=False
sorted_cereals.loc[:5,'recommended']=True
sorted_cereals['norm_sodium']=sorted_cereals['sodium'].apply(lambda x: round(x))

In [8]:
#STEP 3. visualize composition of cereals
df=sorted_cereals.sort_values(by=['sugars','sodium'], ascending=False)
df['recommended_name']=df.apply(lambda x: '**'+x['name']+'**' if x['recommended']\
    else x['name'], axis=1)

data={'recommended_name': df['recommended_name'],
     'protein': df['protein'],
     'sodium': -1*df['sodium'],
     'sugars': -1*df['sugars'],
     'vitamins': df['vitamins'],
     'potass': df['potass'],
     'fat': -1*df['fat']}

source=ColumnDataSource(data=data)
width=0.1
delta=0.12

nutrient_colors={'potass':'gold','vitamins':'salmon','protein':'crimson',\
                 'fat':'lightgreen','sodium':'limegreen','sugars':'deepskyblue'}
nutrient_position={'fat':0,'sodium':-1,'sugars':-2, 'protein':1, 'vitamins':2, 'potass':3,}
nutrient_labels={'protein':'protein (g)', 'potass':'potassium (70mg)','vitamins':'vitamins (20%)', \
                 'fat':'fat (g)', 'sugars':'sugars (x4g)','sodium':'sodium (80mg)'}

p=figure(width=800, y_range=df['recommended_name'], height=2000, toolbar_location=None,\
        x_range=(-4.1,4.5))

for nutrient in nutrient_position:
    p.hbar(y=dodge('recommended_name',nutrient_position.get(nutrient)*delta, range=p.y_range),\
           height=width, right=nutrient, \
           color=nutrient_colors.get(nutrient),\
           legend_label=nutrient_labels.get(nutrient), source=source)

p.xaxis.ticker=list(range(-7,7))
p.xaxis.major_label_overrides={-6:'',-5:'',-4:'',-3:'',-2:'',-1:'',0:'',\
                              1:'',2:'',3:'',4:'',5:'',6:''}
p.legend.location='top_left'
p.xaxis.axis_label='bad substances to limit --- good nutrients to include'
p.title='nutrient composition of each cereal'
show(p)

**Recommended Cereals**  Shredded Wheat spoon size, Shredded Wheat'n'Bran, Shredded Wheat, Puffed Wheat, and Puffed Rice.  These cereals have no sugar, sodium, or fat, (bad things the guidelines recommend against), but do have some protein and potassium, which are good nutrients. 

The above plot shows the nutrient composition of each cereal, sorted by sugar and sodium levels.  Grid lines indicate multiples of the indicated amount (70mg of potassium, 140mg of potassium, etc..).

In [9]:
def rank_values(df,col):
    df[col]=df[col].apply(lambda x: round(x,0))
    counts=dict(Counter(df[col]))
    ranked=[]
    for x in list(df[col]):
        ranked+=[counts.get(x)]
        counts[x]+=-1
    rank_col=col+'_ranked'
    df[rank_col]=ranked
    return df

In [41]:
colors={True: 'hotpink', False: 'lightgray'}
labels={'fat':'fat (g)','sodium':'sodium (mg)','sugars':'sugars (g)',\
        'protein':'protein (g)','potass':'potassium (mg)'}
def plot_dist_nutrients(sorted_cereals,nutrient):
    p=figure(height=300, width=200, x_range=(-1,7), y_range=(0,37), toolbar_location=None)
    df=rank_values(sorted_cereals,nutrient)
    p.circle(x=df[nutrient],y=df[nutrient+'_ranked'],\
             color=df['recommended'].apply(lambda x: nutrient_colors.get(nutrient,colors.get(True))\
                        if x else colors.get(False)), alpha=.7, size=8)
    p.xaxis.axis_label=labels.get(nutrient)
    
    p.xaxis.ticker=[0,1,2,3,4,5,6]
    p.yaxis.ticker=[5, 10, 15, 20, 25, 30, 35, 40, 45]
    p.yaxis.axis_label='number of cereals'
    p.title='# cereals x '+nutrient
    return p, df

In [42]:
ps={}
dfs={}

label_overrides={'sodium':{0:'0', 1:'', 2:'160', 3:'', 4:'320', 5:'', 6:'480'},\
                 'sugars':{0:'0', 1:'', 2:'8', 3:'', 4:'16', 5:'', 6:'24'},\
                 'potass':{0:'0', 1:'', 2:'140', 3:'', 4:'280', 5:'', 6:'420'}}
for nutrient in nutrient_position:
    if nutrient!='vitamins':
        ps[nutrient], dfs[nutrient] = plot_dist_nutrients(sorted_cereals, nutrient)
        ps[nutrient].xaxis.major_label_overrides=label_overrides.get(nutrient,{})

The recommended cereals don't have any fat.  All of the recommended cereals are shown (in pink) within the distribution of cereal's fat.

In [43]:
show(row(list(ps.values())))

The recommended cereals don't have any sugar. In contrast, the other cereals have 4-16g of sugar/serving.

The recommended cereals have very little sodium.  Much less sodium than the other cereals.

The recommended cereals vary in protein, and can be observed over the entire distribution of protein composition.