In [27]:
import numpy as np
import scipy as sp
import random
from collections import Counter
import copy
import sigfig

# Problem 1

With Object Oriented Programming in mind, create the following simulation environment in python:

A 100x100 grid world with 10 randomly placed pieces of food. Note that food cannot share the same coordinate.
An ant class that randomly moves up, down, left, or right at each time step. If an ant tries to step beyond the bounds of the box, it does nothing. Note that food is consumed if an ant shares the same coordinate as the food. Ants can share the same coordinate. Ants starting locations should be random.
Run 1000 simulations with 100 ants, each for 100-time steps. Report the mean and standard deviation of the total food eaten. Feel free to use any Python packages as needed.

In [2]:
#prevent moves
def lim(x,x_min,x_max):
    return min(max(x,x_min),x_max)

class Ant:
    
    def __init__(self,grid):
        self.position=(random.randint(0,(grid.x_range-1)),random.randint(0,(grid.y_range-1)))
        
    def move(self,grid):
        movement=np.array(random.choice([[-1,0],[1,0],[0,1],[0,-1]]))
        position=np.array(self.position)+movement
        self.position=(lim(position[0],0,(grid.x_range-1)), lim(position[1],0,(grid.y_range-1)))
        

class Grid:
    
    def __init__(self,x_range,y_range):
        self.x_range=x_range
        self.y_range=y_range
        self.locations=[(x,y) for x in list(range(0,self.x_range)) for y in list(range(0,self.y_range))]
        self.has_food={location:False for location in self.locations}
        self.food_count=0
        
    def update_food_count(self):
        self.food_count=np.array(list(self.has_food.values())).sum()
        
    def drop_food(self,pieces_of_food):
        locations=random.sample(self.locations, k=pieces_of_food)
        for location in locations:
            self.has_food[location]=True
        self.update_food_count()
            
    def ants_eat(self,colony):
        for ant in colony:
            self.has_food[ant.position]=False
        self.update_food_count()
        
        
class Colony:
    
    def __init__(self,grid,size):
        self.grid=grid
        self.size=size
        self.members=[Ant(self.grid) for i in range(0,self.size)]
        
    def move(self):
        [ant.move(grid=self.grid) for ant in self.members]

        
        
class Simulation:
    
    def __init__(self,x_range,y_range,pieces_of_food,colony_size):
        self.starting_food=pieces_of_food
        self.food_eaten=0
        self.grid=Grid(x_range=x_range,y_range=y_range)
        self.grid.drop_food(pieces_of_food=pieces_of_food)
        self.colony=Colony(self.grid,size=colony_size)
        self.food_count_record=[pieces_of_food]
        self.colony_record=[x.position for x in self.colony.members]
        
    def update_food_eaten(self):
        self.food_eaten=self.starting_food-self.grid.food_count
        
    def run(self,timesteps):
        for i in range(timesteps):
            self.food_count_record+=[self.grid.food_count]
            self.grid.ants_eat(colony=self.colony.members)
            self.colony.move()
        self.update_food_eaten()
        

In [3]:
simulated_food_eaten=[]
simulation_stats=[]

for i in range(1000):
    simulation=Simulation(x_range=100,y_range=100,pieces_of_food=10,colony_size=100)
    simulation.run(timesteps=100)
    simulated_food_eaten+=[simulation.food_eaten]

simulation_stats+=[{'mean': round(np.array(simulated_food_eaten).mean(),2),\
    'std': round(np.array(simulated_food_eaten).std(),2)}]

In [4]:
simulation_stats

[{'mean': 3.85, 'std': 1.53}]

# Problem 2

Using Python and the pandas package write a Jupyter notebook to process the following cereal data, and, based on the US Governments Dietary Guidelines’ Executive Summary, recommend 5 cereals that meet those guidelines.

 

Plot at least 3 data visualizations to help communicate your recommendations. Feel free to use common Python packages such as numpy as needed.

 

Briefly explain your thought process for processing the data and determining the recommended cereals in the comments.

Cereal Data: https://www.kaggle.com/crawford/80-cereals
Dietary Guidelines: https://www.dietaryguidelines.gov/resources/2020-2025-dietary-guidelines-online-materials/top-10-things-you-need-know-about-dietary

In [28]:
#import bokeh function
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.layouts import row, column
from bokeh.io import export_png
from bokeh.models import DatetimeTickFormatter, TickFormatter, ColumnDataSource, Legend, Range1d, FactorRange
from bokeh.transform import dodge
from bokeh.core.properties import Dict, Int, String

output_notebook()


The guidelines say: "Limiting added sugars* to less than 10% of calories per day for ages 2 and older and to avoid added sugars for infants and toddlers;
Limiting saturated fat to less than 10% of calories per day starting at age 2;
Limiting sodium intake to less than 2,300mg per day (or even less if younger than 14);"

In [29]:
import pandas as pd

#STEP 1. Import the data
cereal_details=pd.read_csv('cereal.csv')[['name','protein','fat',\
    'sodium','sugars','potass','vitamins']]\
    .applymap(lambda x: np.nan if (type(x)!=str and x<0) else x).dropna()

In [30]:
#STEP 2. assign recommendation to the five with the lowest sugar and sodium

#normalize the nutrients so that they can be on the same plot
stds=cereal_details.describe().loc['std',:].apply(lambda x: sigfig.round(x, sigfigs=1)).to_dict()
normalized_cereal_details=copy.deepcopy(cereal_details)
for col in cereal_details.columns[1:]:
    normalized_cereal_details[col]=cereal_details[col]/stds.get(col)
    
sorted_cereals=normalized_cereal_details\
    .sort_values(by=['sugars','sodium','fat']).reset_index(drop=True)
sorted_cereals['recommended']=False
sorted_cereals.loc[:5,'recommended']=True
sorted_cereals['norm_sodium']=sorted_cereals['sodium'].apply(lambda x: round(x))

In [31]:
#STEP 3. visualize composition of cereals
df=sorted_cereals.sort_values(by=['sugars','sodium'], ascending=False)
df['recommended_name']=df.apply(lambda x: '**'+x['name']+'**' if x['recommended']\
    else x['name'], axis=1)

data={'recommended_name': df['recommended_name'],
     'protein': df['protein'],
     'sodium': -1*df['sodium'],
     'sugars': -1*df['sugars'],
     'vitamins': df['vitamins'],
     'potass': df['potass'],
     'fat': -1*df['fat']}

source=ColumnDataSource(data=data)
width=0.05
delta=0.1

p=figure(y_range=df['recommended_name'], height=2000, toolbar_location=None,\
        x_range=(-8,8))
p.hbar(y=dodge('recommended_name',3*delta, range=p.y_range), height=width, right='potass', \
       color='gold', legend_label='potassium (70mg)', source=source)
p.hbar(y=dodge('recommended_name',2*delta, range=p.y_range), height=width, right='vitamins', \
       color='lightsalmon', legend_label='vitamins (20%)', source=source)
p.hbar(y=dodge('recommended_name',delta, range=p.y_range), height=width, right='protein', \
       color='tomato', legend_label='protein (g)', source=source)
p.hbar(y=dodge('recommended_name',0, range=p.y_range), height=width, right='fat', \
       color='lightgreen', legend_label='fat (g)', source=source)
p.hbar(y=dodge('recommended_name',-delta, range=p.y_range), height=width, right='sodium', \
       color='limegreen', legend_label='sodium (80mg)', source=source)
p.hbar(y=dodge('recommended_name',-2*delta, range=p.y_range), height=width, right='sugars', \
       color='deepskyblue', legend_label='sugars (4g)', source=source)

p.xaxis.ticker=list(range(-6,7))
p.xaxis.major_label_overrides={-6:'',-5:'',-4:'',-3:'',-2:'',-1:'',0:'',\
                              1:'',2:'',3:'',4:'',5:'',6:''}
p.legend.location='top_left'
p.xaxis.axis_label='bad substances to limit --- good nutrients to include'
p.title='Nutrient composition of each cereal'
show(p)

**Recommended Cereals**  Shredded Wheat spoon size, Shredded Wheat'n'Bran, Shredded Wheat, Puffed Wheat, and Puffed Rice.  These cereals have no sugar, sodium, or fat, (bad things the guidelines recommend against), but do have some protein and potassium, which are good nutrients. 

The above plot shows the nutrient composition of each cereal, sorted by sugar and sodium levels.  Grid lines indicate multiples of the indicated amount (70mg of potassium, 140mg of potassium, etc..).

In [32]:
def rank_values(df,col):
    counts=dict(Counter(df[col]))
    ranked=[]
    for x in df[col]:
        ranked+=[counts.get(x)]
        counts[x]+=-1
    df['ranked']=ranked
    return df

In [33]:
labels={'protein':'protein (g)', 'potass':'potassium (70mg)', 'fat':'fat (g)', 'sugars':'sugars (x4g)',\
       'vitamins':'vitamins', 'sodium':'sodium (80mg)'}

In [34]:
colors={True: 'hotpink', False: 'lightgray'}
p={}
dfs={}

def plot_dist_nutrients(nutrient):
    p=figure(height=250, width=200, x_range=(-1,7), toolbar_location=None)
    df=rank_values(sorted_cereals,nutrient)
    p.circle(x=df[nutrient],y=df['ranked'],\
             color=df['recommended'].apply(lambda x: colors.get(x)), alpha=.5, size=10)
    p.xaxis.axis_label=labels.get(nutrient)
    
    p.xaxis.ticker=[0,1,2,3,4,5,6]
    p.yaxis.ticker=[5, 10, 15, 20, 25, 30, 35, 40]
    p.yaxis.axis_label='number of cereals'
    p.title='# cereals x '+nutrient
    return p

In [35]:
show(plot_dist_nutrients('fat'))

The recommended cereals don't have any fat.  All of the recommended cereals are shown (in pink) within the distribution of cereal's fat.

In [36]:
p=plot_dist_nutrients('sugars')
show(p)

The recommended cereals don't have any sugar. In contrast, the other cereals have 4-16g of sugar/serving.

In [37]:
p=plot_dist_nutrients('sodium')
show(p)

The recommended cereals have very little sodium.  Much less sodium than the other cereals.

In [38]:
p=plot_dist_nutrients('protein')
show(p)

The recommended cereals vary in protein, and can be observed over the entire distribution of protein composition.