##### imports

In [1]:
import numpy as np
import pandas as pd

### Lecture 1 - What Is A DataFrame

In [2]:
# create some python lists
names = ['Pesho', 'Gosho', 'Ivan', 'Maria', 'Poly']
ages = [20, 30, 40, 50, 60]
married = [False, True, True, True, False]

In [3]:
# pandas Series
ser = pd.Series(names, name='names')
ser

0    Pesho
1    Gosho
2     Ivan
3    Maria
4     Poly
Name: names, dtype: object

In [4]:
# pandas dataFrame
df = pd.DataFrame({'names': names, 'ages': ages, 'married': married})
df

Unnamed: 0,names,ages,married
0,Pesho,20,False
1,Gosho,30,True
2,Ivan,40,True
3,Maria,50,True
4,Poly,60,False


In [5]:
# if we want to specify a cell with name Ivan
df.iloc[2, 0]
# we must select row and col (2, 0)

'Ivan'

In [6]:
# if we woant to specify a row
df.iloc[2]

names      Ivan
ages         40
married    True
Name: 2, dtype: object

In [7]:
# get a col
df.names # or dr['names']

0    Pesho
1    Gosho
2     Ivan
3    Maria
4     Poly
Name: names, dtype: object

### Lecture 2 - Create a DataFrame

In [8]:
# we can create df with dict (lists must be with same length)
pd.DataFrame({'names': names, 'ages': ages,})

Unnamed: 0,names,ages
0,Pesho,20
1,Gosho,30
2,Ivan,40
3,Maria,50
4,Poly,60


### BONUS - Four More Ways To Build DataFrames

In [9]:
# dict of tupels
tuple_names = tuple(names)
tuple_ages = tuple(ages)
tuple_married = tuple(married)

pd.DataFrame({'name': tuple_names, 'ages': tuple_ages, 'married': tuple_married})

Unnamed: 0,name,ages,married
0,Pesho,20,False
1,Gosho,30,True
2,Ivan,40,True
3,Maria,50,True
4,Poly,60,False


In [10]:
# dict of dicts
pd.DataFrame({'names': {0: 'Pesho', 1: 'Gosho', 2: 'Ivan', 3: 'Maria', 4: 'Poly'}})

Unnamed: 0,names
0,Pesho
1,Gosho
2,Ivan
3,Maria
4,Poly


In [11]:
# we can create dict from list with enumerate
list(enumerate(names))

[(0, 'Pesho'), (1, 'Gosho'), (2, 'Ivan'), (3, 'Maria'), (4, 'Poly')]

In [12]:
dict_names = {k:v for k, v in enumerate(names)}
dict_names

{0: 'Pesho', 1: 'Gosho', 2: 'Ivan', 3: 'Maria', 4: 'Poly'}

In [13]:
dict_ages = {k:v for k, v in enumerate(ages)}
dict_ages

{0: 20, 1: 30, 2: 40, 3: 50, 4: 60}

In [14]:
dict_married = {k:v for k, v in enumerate(married)}
dict_married

{0: False, 1: True, 2: True, 3: True, 4: False}

In [15]:
pd.DataFrame({'name': dict_names, 'ages': dict_ages, 'married': dict_married})

Unnamed: 0,name,ages,married
0,Pesho,20,False
1,Gosho,30,True
2,Ivan,40,True
3,Maria,50,True
4,Poly,60,False


In [16]:
# dict of series
ser_names = pd.Series(names, name='names')
ser_ages = pd.Series(ages, name='ages')
ser_married = pd.Series(married, name='married')

pd.DataFrame({"names": ser_names, 'ages': ser_ages, 'married': ser_married}) 

Unnamed: 0,names,ages,married
0,Pesho,20,False
1,Gosho,30,True
2,Ivan,40,True
3,Maria,50,True
4,Poly,60,False


In [17]:
# list of dicts
pd.DataFrame([{'names': 'Pesho', 'ages': 20, 'married': False}])

Unnamed: 0,names,ages,married
0,Pesho,20,False


In [18]:
# using zip method
list(zip(names, ages, married))

[('Pesho', 20, False),
 ('Gosho', 30, True),
 ('Ivan', 40, True),
 ('Maria', 50, True),
 ('Poly', 60, False)]

In [19]:
rowwise = [{'names': name, 'ages': ages, 'married': married} for name, ages, married in zip(names, ages, married)]
rowwise

[{'names': 'Pesho', 'ages': 20, 'married': False},
 {'names': 'Gosho', 'ages': 30, 'married': True},
 {'names': 'Ivan', 'ages': 40, 'married': True},
 {'names': 'Maria', 'ages': 50, 'married': True},
 {'names': 'Poly', 'ages': 60, 'married': False}]

In [20]:
pd.DataFrame(rowwise)

Unnamed: 0,names,ages,married
0,Pesho,20,False
1,Gosho,30,True
2,Ivan,40,True
3,Maria,50,True
4,Poly,60,False


### Lecture 3 - The info() Method

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   names    5 non-null      object
 1   ages     5 non-null      int64 
 2   married  5 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 213.0+ bytes


In [22]:
df.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Columns: 3 entries, names to married
dtypes: bool(1), int64(1), object(1)
memory usage: 213.0+ bytes


In [23]:
df.info(max_cols=2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Columns: 3 entries, names to married
dtypes: bool(1), int64(1), object(1)
memory usage: 213.0+ bytes


In [24]:
df.info(memory_usage=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   names    5 non-null      object
 1   ages     5 non-null      int64 
 2   married  5 non-null      bool  
dtypes: bool(1), int64(1), object(1)

In [25]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   names    5 non-null      object
 1   ages     5 non-null      int64 
 2   married  5 non-null      bool  
dtypes: bool(1), int64(1), object(1)
memory usage: 481.0 bytes


### Lecture 4 - Reading In Nutrition Data

In [26]:
nutrition = pd.read_csv('data/nutrition.csv', index_col=False)
nutrition.head()

Unnamed: 0.1,Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [27]:
nutrition.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Columns: 77 entries, Unnamed: 0 to water
dtypes: int64(3), object(74)
memory usage: 5.2+ MB


### Lecture 5 - Some Cleanup: Removing The Duplicated Index

In [28]:
# let drop Unnamed col
nutrition.drop('Unnamed: 0', axis=1)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [29]:
# other way to remove Unnamed col
nutrition.set_index('Unnamed: 0')

Unnamed: 0_level_0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


In [30]:
# third way -> best way
nutrition = pd.read_csv('data/nutrition.csv', index_col=[0])
nutrition

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,3.50 g,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,22.74 g,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,3.04 g,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g


### Lecture 6 - The sample() Method

In [31]:
nutrition.sample()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
5106,"Mushrooms, raw, exposed to ultraviolet light, ...",100 g,22,0.4g,0.1g,0,9.00 mg,21.2 mg,28.00 mcg,0.00 mcg,...,0.35 g,0.060 g,0.020 g,0.117 g,0.00 mg,0,0.85 g,0,0,92.82 g


In [32]:
nutrition.sample(random_state=12)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
3713,"Thuringer, pork, beef, summer sausage, cervelat",100 g,362,30g,12g,74mg,1300.00 mg,78.9 mg,2.00 mcg,0.00 mcg,...,30.43 g,11.510 g,12.970 g,1.200 g,74.00 mg,0.0 g,3.63 g,0.00 mg,0.00 mg,45.18 g


In [33]:
# set how many obj to return
nutrition.sample(n=2)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
1627,"Lasagna, prepared, frozen, cheese",100 g,130,5.3g,2.1g,13mg,284.00 mg,10.3 mg,32.00 mcg,26.00 mcg,...,5.33 g,2.110 g,1.573 g,1.022 g,13.00 mg,0.0 g,1.51 g,0.00 mg,0.00 mg,72.78 g
3274,"Cereals ready-to-eat, KASHI Berry Blossom",100 g,322,3g,0.4g,0,419.00 mg,0,11.00 mcg,0.00 mcg,...,3.00 g,0.400 g,0.890 g,1.390 g,0.00 mg,0,3.90 g,0,0,2.50 g


In [34]:
# set how many obj to return in %
nutrition.sample(frac=0.01)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
1931,"Babyfood, toddler, dices, peas",100 g,64,0.8g,0.1g,0,48.00 mg,14.7 mg,35.00 mcg,0.00 mcg,...,0.80 g,0.140 g,0.070 g,0.370 g,0.00 mg,0.0 g,0.35 g,0.00 mg,0.00 mg,84.60 g
5180,"Cherries, solids and liquids, water pack, cann...",100 g,46,0.1g,,0,1.00 mg,4.1 mg,4.00 mcg,0.00 mcg,...,0.13 g,0.028 g,0.034 g,0.038 g,0.00 mg,0.0 g,0.29 g,0.00 mg,0.00 mg,87.05 g
66,"Spices, garlic powder",100 g,331,0.7g,0.2g,0,60.00 mg,67.5 mg,47.00 mcg,0.00 mcg,...,0.73 g,0.249 g,0.115 g,0.178 g,0.00 mg,0.0 g,3.54 g,0.00 mg,0.00 mg,6.45 g
6771,"Seeds, with salt added, roasted, pumpkin and s...",100 g,574,49g,8.5g,0,256.00 mg,63.0 mg,57.00 mcg,0.00 mcg,...,49.05 g,8.544 g,15.734 g,19.856 g,0.00 mg,0.0 g,4.37 g,0.00 mg,0.00 mg,2.03 g
3576,"Pear nectar, without added ascorbic acid, canned",100 g,60,0g,,0,4.00 mg,2.0 mg,1.00 mcg,0.00 mcg,...,0.01 g,0.001 g,0.003 g,0.003 g,0.00 mg,0.0 g,0.10 g,0.00 mg,0.00 mg,84.01 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5692,"Margarine-like, soybean oil and butter, margar...",100 g,727,80g,14g,12mg,719.00 mg,6.5 mg,2.00 mcg,0.00 mcg,...,80.32 g,14.198 g,30.292 g,24.170 g,12.00 mg,0.0 g,1.53 g,0.00 mg,0.00 mg,17.07 g
88,"Mushrooms, raw, enoki",100 g,37,0.3g,,0,3.00 mg,47.7 mg,48.00 mcg,0.00 mcg,...,0.29 g,0.020 g,0.000 g,0.090 g,0.00 mg,0.0 g,0.91 g,0.00 mg,0.00 mg,88.34 g
244,"Fish, raw, bluefish",100 g,124,4.2g,0.9g,59mg,60.00 mg,0,2.00 mcg,0.00 mcg,...,4.24 g,0.915 g,1.793 g,1.060 g,59.00 mg,0.0 g,1.04 g,0,0,70.86 g
6394,"Artichokes, without salt, drained, boiled, coo...",100 g,53,0.3g,0.1g,0,60.00 mg,34.4 mg,89.00 mcg,0.00 mcg,...,0.34 g,0.079 g,0.011 g,0.145 g,0.00 mg,0.0 g,0.74 g,0.00 mg,0.00 mg,84.08 g


### BONUS - Sampling With Replacement Or Weights

In [35]:
#with or without replacement
nutrition.sample(n=3, replace=True)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
715,"Candies, ALMOND JOY Candy Bar",100 g,479,27g,18g,4mg,142.00 mg,0,0,0,...,26.93 g,17.590 g,5.260 g,1.180 g,4.00 mg,0,1.23 g,0,0,8.20 g
3037,"Borage, with salt, drained, boiled, cooked",100 g,25,0.8g,0.2g,0,324.00 mg,0,10.00 mcg,0.00 mcg,...,0.81 g,0.197 g,0.245 g,0.127 g,0.00 mg,0,1.67 g,0,0,91.88 g
4310,"Cereals ready-to-eat, QUAKER WHOLE HEARTS oat ...",100 g,376,5.6g,0.8g,0,554.00 mg,0,1427.00 mcg,0,...,5.61 g,0.839 g,2.689 g,1.465 g,0.00 mg,0,3.92 g,0,0,2.75 g


In [36]:
# weighted sampling
weights = pd.Series(data=[10, 10, 10, 1, 2], index=[7, 17, 29, 5, 6])
weights

7     10
17    10
29    10
5      1
6      2
dtype: int64

In [37]:
nutrition.sample(n=3, weights=weights)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
29,"Nuts, dried, pine nuts",100 g,673,68g,4.9g,0,2.00 mg,55.8 mg,34.00 mcg,0.00 mcg,...,68.37 g,4.899 g,18.764 g,34.071 g,0.00 mg,0.0 g,2.59 g,0.00 mg,0.00 mg,2.28 g
17,"Peppers, raw, jalapeno",100 g,29,0.4g,0.1g,0,3.00 mg,7.5 mg,27.00 mcg,0.00 mcg,...,0.37 g,0.092 g,0.029 g,0.112 g,0.00 mg,0.0 g,0.53 g,0.00 mg,0.00 mg,91.69 g
6,"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g


### BONUS - How Are Random Numbers Generated?

In [38]:
nutrition.sample(random_state=19)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
4176,"Babyfood, apple and sweet potato, fruit and ve...",100 g,64,0.2g,,0,3.00 mg,4.4 mg,2.00 mcg,0.00 mcg,...,0.22 g,0.041 g,0.008 g,0.079 g,0.00 mg,0.0 g,0.30 g,0.00 mg,0.00 mg,84.00 g


In [39]:
nutrition.sample(random_state=13)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
4453,"KFC, Skin and Breading, EXTRA CRISPY, Fried Ch...",100 g,464,37g,7.5g,59mg,828.00 mg,0,0,0,...,36.61 g,7.541 g,11.816 g,14.238 g,59.00 mg,0,2.92 g,0,0,26.98 g


In [40]:
nutrition.sample(random_state=46)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
2693,"Candies, soft fruit and nut squares",100 g,390,9.5g,0.9g,0,131.00 mg,5.5 mg,12.00 mcg,0.00 mcg,...,9.52 g,0.898 g,1.297 g,6.840 g,0.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,13.88 g


### Lecture 7 - DataFrame Axes

In [41]:
nutrition.axes

[Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
             ...
             8779, 8780, 8781, 8782, 8783, 8784, 8785, 8786, 8787, 8788],
            dtype='int64', length=8789),
 Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
        'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
        'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
        'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
        'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
        'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
        'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
        'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
        'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
        'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
        'methionine', 'phenylalanine', 'proline', 'ser

In [42]:
# rows axes -> 0
nutrition.axes[0]

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            8779, 8780, 8781, 8782, 8783, 8784, 8785, 8786, 8787, 8788],
           dtype='int64', length=8789)

In [43]:
# get row on pos 5
nutrition.axes[0][5]
# or

5

In [44]:
nutrition.index[3]

3

In [45]:
# cols axes -> 1
nutrition.axes[1]

Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
       'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
       'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
       'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
       'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
       'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose',
       'fat', 'saturated_fatty_acids', 'mon

In [46]:
# get col on pos 2
nutrition.axes[1][2]
# or

'calories'

In [47]:
nutrition.columns[2]

'calories'

### Lecture 8 - Changing the Index

In [48]:
# nutrition.index = pd.RangeIndex(start=0, stop=8789, step=1)
# type(nutrition.index)

### Lecture 9 - Extracting From DataFrames By Label

In [49]:
nutrition.set_index('name', inplace=True)
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [50]:
nutrition.loc['Teff, uncooked']

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
alcohol               0
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
Name: Teff, uncooked, Length: 75, dtype: object

In [51]:
nutrition.loc['Teff, uncooked']['calories']
# or

367

In [52]:
nutrition.loc['Eggplant, raw': 'Sherbet, orange' ,'calories': 'cholesterol']

Unnamed: 0_level_0,calories,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Eggplant, raw",25,0.2g,,0
"Teff, uncooked",367,2.4g,0.4g,0
"Sherbet, orange",144,2g,1.2g,1mg


In [53]:
nutrition.loc[
    ['Teff, uncooked'],
    ['protein', 'vitamin_b6']
]

Unnamed: 0_level_0,protein,vitamin_b6
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Teff, uncooked",13.30 g,0.482 mg


### Lecture 10 - DataFrame Extraction by Position

In [54]:
nutrition.iloc[3]

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
alcohol               0
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
Name: Teff, uncooked, Length: 75, dtype: object

In [55]:
nutrition.iloc[3, :]

serving_size      100 g
calories            367
total_fat          2.4g
saturated_fat      0.4g
cholesterol           0
                  ...  
alcohol               0
ash              2.37 g
caffeine              0
theobromine           0
water            8.82 g
Name: Teff, uncooked, Length: 75, dtype: object

In [56]:
nutrition.iloc[[4, 6, 9], :]

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [57]:
nutrition.iloc[[4, 6, 9]]

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g
"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,1.513 mg,...,0.74 g,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g
Vegetarian fillets,100 g,290,18g,2.8g,0,490.00 mg,82.0 mg,102.00 mcg,0.00 mcg,12.000 mg,...,18.00 g,2.849 g,4.376 g,9.332 g,0.00 mg,0.0 g,5.00 g,0.00 mg,0.00 mg,45.00 g


In [58]:
nutrition.iloc[[4, 6, 9], 2]

name
Sherbet, orange         2g
Taro leaves, raw      0.7g
Vegetarian fillets     18g
Name: total_fat, dtype: object

In [59]:
nutrition.iloc[[4, 6, 9], 2:5]

Unnamed: 0_level_0,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Sherbet, orange",2g,1.2g,1mg
"Taro leaves, raw",0.7g,0.2g,0
Vegetarian fillets,18g,2.8g,0


In [60]:
nutrition.iloc[4:9, 2:5]

Unnamed: 0_level_0,total_fat,saturated_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Sherbet, orange",2g,1.2g,1mg
"Cauliflower, raw",0.3g,0.1g,0
"Taro leaves, raw",0.7g,0.2g,0
"Lamb, raw, ground",23g,10g,73mg
"Cheese, camembert",24g,15g,72mg


In [61]:
# boolean mask -> first comprehension is for rows
# and second is for cols
nutrition.iloc[
    [True if i%2==0 else False for i in range(len(nutrition))],
    [True if i%2==0 else False for i in range(75)]
]

Unnamed: 0_level_0,serving_size,total_fat,cholesterol,choline,folic_acid,pantothenic_acid,thiamin,vitamin_a_rae,carotene_beta,lutein_zeaxanthin,...,carbohydrate,sugars,galactose,lactose,sucrose,saturated_fatty_acids,polyunsaturated_fatty_acids,alcohol,caffeine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,0.1g,0,0.4 mg,0.00 mcg,0.000 mg,0.000 mg,0.00 mcg,0.00 mcg,0.00 mcg,...,91.27 g,0.00 g,0,0,0,0.009 g,0.025 g,0.0 g,0.00 mg,8.32 g
"Eggplant, raw",100 g,0.2g,0,6.9 mg,0.00 mcg,0.281 mg,0.039 mg,1.00 mcg,14.00 mcg,36.00 mcg,...,5.88 g,3.53 g,0,0,0.26 g,0.034 g,0.076 g,0.0 g,0.00 mg,92.30 g
"Sherbet, orange",100 g,2g,1mg,7.7 mg,0.00 mcg,0.224 mg,0.027 mg,12.00 mcg,1.00 mcg,7.00 mcg,...,30.40 g,24.32 g,0,0,0,1.160 g,0.080 g,0.0 g,0.00 mg,66.10 g
"Taro leaves, raw",100 g,0.7g,0,12.8 mg,0.00 mcg,0.084 mg,0.209 mg,241.00 mcg,2895.00 mcg,1932.00 mcg,...,6.70 g,3.01 g,0,0,0,0.151 g,0.307 g,0.0 g,0.00 mg,85.66 g
"Cheese, camembert",100 g,24g,72mg,15.4 mg,0.00 mcg,1.364 mg,0.028 mg,241.00 mcg,12.00 mcg,0.00 mcg,...,0.46 g,0.46 g,0,0,0,15.259 g,0.724 g,0.0 g,0.00 mg,51.80 g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, select, trimmed to 1/8"" fat, separable lean only, lip-on, boneless, rib eye steak/roast",100 g,6.4g,70mg,49.4 mg,0.00 mcg,0.530 mg,0.100 mg,2.00 mcg,0.00 mcg,0.00 mcg,...,0.00 g,0.00 g,0,0,0,2.313 g,0.396 g,0.0 g,0.00 mg,70.89 g
"Oil, uses similar to 95 degree hard butter, confection fat, palm kernel (hydrogenated), industrial",100 g,100g,0,0.2 mg,0.00 mcg,0.000 mg,0.000 mg,0.00 mcg,0.00 mcg,0.00 mcg,...,0.00 g,0.00 g,0,0,0,93.701 g,0.000 g,0.0 g,0.00 mg,0.05 g
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",100 g,3.5g,62mg,64.5 mg,0.00 mcg,0.356 mg,0.063 mg,3.00 mcg,0.00 mcg,0.00 mcg,...,0.00 g,0.00 g,0,0,0,1.353 g,0.244 g,0.0 g,0.00 mg,72.51 g
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",100 g,23g,78mg,0,0.00 mcg,0.520 mg,0.130 mg,0.00 mcg,0,0,...,0.00 g,0,0,0,0,11.570 g,0.980 g,0,0,59.80 g


In [62]:
# for spec cell
nutrition.iloc[9, 1]

290

### Lecture 11 - Single Value Access With .at And .iat

In [63]:
# its good to use at and iat couse they are faster in single value access
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [64]:
# by label
nutrition.loc['Eggplant, raw', 'pantothenic_acid']

'0.281 mg'

In [65]:
# by pos
nutrition.iloc[2, 10]

'0.281 mg'

In [66]:
# by label with at (only for single value)
nutrition.at['Eggplant, raw', 'pantothenic_acid']

'0.281 mg'

In [67]:
# by label with iat (only for single value)
nutrition.iat[2, 10]

'0.281 mg'

In [68]:
%timeit nutrition.loc['Eggplant, raw', 'pantothenic_acid']

5.12 µs ± 34.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [69]:
%timeit nutrition.at['Eggplant, raw', 'pantothenic_acid']

2.53 µs ± 24.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [70]:
%timeit nutrition.iloc[2, 10]

19.9 µs ± 646 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [71]:
%timeit nutrition.iat[2, 10]

15.2 µs ± 493 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


### BONUS - The get_loc() Method

In [72]:
# get label from pos
index_label = nutrition.index[2]
index_label

'Eggplant, raw'

In [73]:
col_label = 'vitamin_k'
col_label

'vitamin_k'

In [74]:
nutrition.at[index_label, col_label]

'3.5 mcg'

In [75]:
# get int location from label
col_loc = nutrition.columns.get_loc('vitamin_k')

In [76]:
index_loc = 2

In [77]:
nutrition.iat[index_loc, col_loc]

'3.5 mcg'

### Lecture 12 - Skill Challenge

In [78]:
# 1. Randomly select 10 food items and assign the resulting df to a new variable nutr_mini
nutr_mini = nutrition.sample(n=10, random_state=1)
nutr_mini

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,7.680 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
"Beef, grilled, cooked, choice, trimmed to 1/8"" fat, separable lean and fat, lip-on, bone-in, rib eye steak",100 g,313,25g,11g,83mg,60.00 mg,48.6 mg,6.00 mcg,0.00 mcg,4.752 mg,...,24.70 g,10.566 g,11.620 g,1.103 g,83.00 mg,0.0 g,0.90 g,0.00 mg,0.00 mg,52.36 g
"Beverages, all with morro seeds, variety of brands, unprepared, dry mix, Horchata",100 g,413,7.5g,2.1g,0,3.00 mg,11.6 mg,12.00 mcg,0.00 mcg,2.200 mg,...,7.46 g,2.086 g,3.174 g,2.069 g,0.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,4.89 g
"Pork, unheated, separable lean and fat, bone-in, rump, ham, cured",100 g,176,9.4g,3g,62mg,722.00 mg,84.0 mg,1.00 mcg,0.00 mcg,6.417 mg,...,9.38 g,3.008 g,3.966 g,1.356 g,62.00 mg,0.0 g,2.65 g,0.00 mg,0.00 mg,65.19 g
"Veal, braised, cooked, separable lean only, blade, shoulder",100 g,198,6.5g,1.8g,158mg,101.00 mg,126.8 mg,15.00 mcg,0.00 mcg,5.680 mg,...,6.48 g,1.810 g,2.310 g,0.580 g,158.00 mg,0.0 g,1.24 g,0.00 mg,0.00 mg,59.24 g
"Tomatoes, canned, crushed",100 g,32,0.3g,,0,186.00 mg,12.9 mg,13.00 mcg,0.00 mcg,1.222 mg,...,0.28 g,0.040 g,0.043 g,0.113 g,0.00 mg,0.0 g,1.35 g,0.00 mg,0.00 mg,89.44 g
"Fish oil, fully hydrogenated, menhaden",100 g,902,100g,96g,500mg,0.00 mg,0,0.00 mcg,0.00 mcg,0.000 mg,...,100.00 g,95.600 g,0.000 g,0.000 g,500.00 mg,0,0.00 g,0,0,0.00 g
"Cereals ready-to-eat, POST SELECTS Maple Pecan Crunch",100 g,413,8.7g,1g,0,239.00 mg,25.0 mg,192.00 mcg,173.00 mcg,9.600 mg,...,8.70 g,1.000 g,4.400 g,2.600 g,0.00 mg,0.0 g,2.40 g,0.00 mg,0.00 mg,3.00 g
"Chicken, roasted, cooked, meat only, back, broilers or fryers",100 g,239,13g,3.6g,90mg,96.00 mg,0,7.00 mcg,0,7.069 mg,...,13.16 g,3.600 g,4.830 g,3.050 g,90.00 mg,0.0 g,1.08 g,0.00 mg,0.00 mg,58.75 g
"Frostings, dry mix, creamy, chocolate",100 g,389,5.2g,,0,76.00 mg,0,3.00 mcg,0.00 mcg,0.142 mg,...,5.20 g,0,0,0,0.00 mg,0,0.60 g,6.00 mg,188.00 mg,0.60 g


In [79]:
# 2. From nutr_mini extract the total_fat and cholesterol cols for all rows
nutr_mini.loc[:, ['total_fat', 'cholesterol']]

Unnamed: 0_level_0,total_fat,cholesterol
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",8.9g,109mg
"Beef, grilled, cooked, choice, trimmed to 1/8"" fat, separable lean and fat, lip-on, bone-in, rib eye steak",25g,83mg
"Beverages, all with morro seeds, variety of brands, unprepared, dry mix, Horchata",7.5g,0
"Pork, unheated, separable lean and fat, bone-in, rump, ham, cured",9.4g,62mg
"Veal, braised, cooked, separable lean only, blade, shoulder",6.5g,158mg
"Tomatoes, canned, crushed",0.3g,0
"Fish oil, fully hydrogenated, menhaden",100g,500mg
"Cereals ready-to-eat, POST SELECTS Maple Pecan Crunch",8.7g,0
"Chicken, roasted, cooked, meat only, back, broilers or fryers",13g,90mg
"Frostings, dry mix, creamy, chocolate",5.2g,0


In [80]:
# 3. Extract all cols from vitamin_b12 to the end, for the first, second and third rows
col_loc = nutr_mini.columns.get_loc('vitamin_b12')
nutr_mini.iloc[:3, col_loc:]

Unnamed: 0_level_0,vitamin_b12,vitamin_b6,vitamin_c,vitamin_d,vitamin_e,tocopherol_alpha,vitamin_k,calcium,copper,irom,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",2.95 mcg,0.140 mg,0.0 mg,0,0.19 mg,0.19 mg,0,13.00 mg,0.114 mg,2.35 mg,...,8.86 g,3.860 g,3.480 g,0.520 g,109.00 mg,0,1.60 g,0,0,59.95 g
"Beef, grilled, cooked, choice, trimmed to 1/8"" fat, separable lean and fat, lip-on, bone-in, rib eye steak",2.00 mcg,0.450 mg,0.0 mg,7.00 IU,0.10 mg,0.10 mg,1.6 mcg,22.00 mg,0.073 mg,2.29 mg,...,24.70 g,10.566 g,11.620 g,1.103 g,83.00 mg,0.0 g,0.90 g,0.00 mg,0.00 mg,52.36 g
"Beverages, all with morro seeds, variety of brands, unprepared, dry mix, Horchata",0.00 mcg,0.179 mg,0.3 mg,0.00 IU,0.37 mg,0.37 mg,1.1 mcg,60.00 mg,0.470 mg,5.80 mg,...,7.46 g,2.086 g,3.174 g,2.069 g,0.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,4.89 g


In [81]:
# 4. Get the calories for the third food in nutr_mini using an attribute-based approach that is faster than .loc or .iloc
col_loc = nutr_mini.columns.get_loc('calories')
nutr_mini.iat[2, col_loc]

413

### Lecture 13 - More Cleanup: Going Numeric

In [82]:
# most of the cols are with obj dtaype. we must convert them into numeri vals in some steps
nutrition.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Data columns (total 75 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   serving_size                 8789 non-null   object
 1   calories                     8789 non-null   int64 
 2   total_fat                    8789 non-null   object
 3   saturated_fat                7199 non-null   object
 4   cholesterol                  8789 non-null   object
 5   sodium                       8789 non-null   object
 6   choline                      8789 non-null   object
 7   folate                       8789 non-null   object
 8   folic_acid                   8789 non-null   object
 9   niacin                       8789 non-null   object
 10  pantothenic_acid             8789 non-null   object
 11  riboflavin                   8789 non-null   obje

### Lecture 14 - The astype() Method

In [83]:
df = pd.DataFrame(
    {
        'age': [12, 13, 14, 15],
        'weight': [41.5, 51.4, 60.7, 76.9],
        'height': ['1.65', '1.69', '1.72', '1.84']
    }
)
df

Unnamed: 0,age,weight,height
0,12,41.5,1.65
1,13,51.4,1.69
2,14,60.7,1.72
3,15,76.9,1.84


In [84]:
df = df.astype(float)
df

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
1,13.0,51.4,1.69
2,14.0,60.7,1.72
3,15.0,76.9,1.84


In [85]:
df.astype({'age': np.int16})

Unnamed: 0,age,weight,height
0,12,41.5,1.65
1,13,51.4,1.69
2,14,60.7,1.72
3,15,76.9,1.84


### Lecture 15 - DataFrame replace() + A Glimpse At Regex

In [86]:
dfm = nutrition.iloc[:6, :1]
dfm.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, Cornstarch to Cauliflower, raw
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   serving_size  6 non-null      object
dtypes: object(1)
memory usage: 96.0+ bytes


In [87]:
dfm.replace(to_replace='100 g', value=100)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


In [88]:
# use regex
dfm.replace('\sg', '', regex=True).astype(np.int64)

Unnamed: 0_level_0,serving_size
name,Unnamed: 1_level_1
Cornstarch,100
"Nuts, pecans",100
"Eggplant, raw",100
"Teff, uncooked",100
"Sherbet, orange",100
"Cauliflower, raw",100


### Lecture 16 - Part 1: Collecting the Units

In [89]:
nutrition.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [90]:
units = nutrition.astype(str).replace('[^a-zA-Z]', '', regex=True)
units.head()

Unnamed: 0_level_0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,g,,g,,,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g
"Nuts, pecans",g,,g,g,,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g
"Eggplant, raw",g,,g,,,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g
"Teff, uncooked",g,,g,g,,mg,mg,,,mg,...,g,g,g,g,,,g,,,g
"Sherbet, orange",g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g


In [91]:
units['saturated_fat'].value_counts()

g      7199
nan    1590
Name: saturated_fat, dtype: int64

In [92]:
units['saturated_fat'].mode()

0    g
dtype: object

In [93]:
units.mode()

Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g


In [94]:
units = units.mode()

### Lecture 17 - The rename() Method

In [95]:
df

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
1,13.0,51.4,1.69
2,14.0,60.7,1.72
3,15.0,76.9,1.84


In [96]:
df.rename(index={0: 'Pesho', 1: 'Ivan'})

Unnamed: 0,age,weight,height
Pesho,12.0,41.5,1.65
Ivan,13.0,51.4,1.69
2,14.0,60.7,1.72
3,15.0,76.9,1.84


In [97]:
df.rename(columns={'weight': 'Weight (kg)'})

Unnamed: 0,age,Weight (kg),height
0,12.0,41.5,1.65
1,13.0,51.4,1.69
2,14.0,60.7,1.72
3,15.0,76.9,1.84


In [98]:
df.rename(mapper={'height': 'Height (m)'}, axis=1)

Unnamed: 0,age,weight,Height (m)
0,12.0,41.5,1.65
1,13.0,51.4,1.69
2,14.0,60.7,1.72
3,15.0,76.9,1.84


### Lecture 18 - DataFrame dropna()

In [99]:
df

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
1,13.0,51.4,1.69
2,14.0,60.7,1.72
3,15.0,76.9,1.84


In [100]:
df.loc[2, 'weight'] = np.nan
df

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
1,13.0,51.4,1.69
2,14.0,,1.72
3,15.0,76.9,1.84


In [101]:
df.loc[1, :] = np.nan
df

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
1,,,
2,14.0,,1.72
3,15.0,76.9,1.84


In [102]:
# drop any row that has NaN
df.dropna()

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
3,15.0,76.9,1.84


In [103]:
# this is the same (default)
df.dropna(how='any', axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
3,15.0,76.9,1.84


In [104]:
# drop where all vals are NaN
df.dropna(how='all', axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
2,14.0,,1.72
3,15.0,76.9,1.84


In [105]:
# spec how many real vals to have in the row
df.dropna(thresh=2, axis=0)

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
2,14.0,,1.72
3,15.0,76.9,1.84


### BONUS - dropna() With Subset

In [106]:
df

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
1,,,
2,14.0,,1.72
3,15.0,76.9,1.84


In [107]:
df.dropna(how='any', axis=0, subset=['height'])

Unnamed: 0,age,weight,height
0,12.0,41.5,1.65
2,14.0,,1.72
3,15.0,76.9,1.84


In [108]:
df.dropna(how='any', axis=1, subset=[0, 2])

Unnamed: 0,age,height
0,12.0,1.65
1,,
2,14.0,1.72
3,15.0,1.84


### Lecture 19 - Part 2: Merging Units With Column Names

In [109]:
units

Unnamed: 0,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,g,,g,g,mg,mg,mg,mcg,mcg,mg,...,g,g,g,g,mg,g,g,mg,mg,g


In [110]:
# check all col labels
for k in units:
    print(k)

serving_size
calories
total_fat
saturated_fat
cholesterol
sodium
choline
folate
folic_acid
niacin
pantothenic_acid
riboflavin
thiamin
vitamin_a
vitamin_a_rae
carotene_alpha
carotene_beta
cryptoxanthin_beta
lutein_zeaxanthin
lucopene
vitamin_b12
vitamin_b6
vitamin_c
vitamin_d
vitamin_e
tocopherol_alpha
vitamin_k
calcium
copper
irom
magnesium
manganese
phosphorous
potassium
selenium
zink
protein
alanine
arginine
aspartic_acid
cystine
glutamic_acid
glycine
histidine
hydroxyproline
isoleucine
leucine
lysine
methionine
phenylalanine
proline
serine
threonine
tryptophan
tyrosine
valine
carbohydrate
fiber
sugars
fructose
galactose
glucose
lactose
maltose
sucrose
fat
saturated_fatty_acids
monounsaturated_fatty_acids
polyunsaturated_fatty_acids
fatty_acids_total_trans
alcohol
ash
caffeine
theobromine
water


In [111]:
# check all units
for k in units:
    print(units[k].at[0])

g

g
g
mg
mg
mg
mcg
mcg
mg
mg
mg
mg
IU
mcg
mcg
mcg
mcg
mcg

mcg
mg
mg
IU
mg
mg
mcg
mg
mg
mg
mg
mg
mg
mg
mcg
mg
g
g
g
g
g
g
g
g

g
g
g
g
g
g
g
g
g
g
g
g
g
g






g
g
g
g
mg
g
g
mg
mg
g


In [112]:
# drop all nans and check the result
units = units.replace('', np.nan).dropna(axis=1)
for k in units:
    print(units[k].at[0])

g
g
g
mg
mg
mg
mcg
mcg
mg
mg
mg
mg
IU
mcg
mcg
mcg
mcg
mcg
mcg
mg
mg
IU
mg
mg
mcg
mg
mg
mg
mg
mg
mg
mg
mcg
mg
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
g
mg
g
g
mg
mg
g


In [113]:
# create dict comprehension to create a mapper
mapper = {k:k +'[' + units[k].at[0] + ']' for k in units}
mapper

{'serving_size': 'serving_size[g]',
 'total_fat': 'total_fat[g]',
 'saturated_fat': 'saturated_fat[g]',
 'cholesterol': 'cholesterol[mg]',
 'sodium': 'sodium[mg]',
 'choline': 'choline[mg]',
 'folate': 'folate[mcg]',
 'folic_acid': 'folic_acid[mcg]',
 'niacin': 'niacin[mg]',
 'pantothenic_acid': 'pantothenic_acid[mg]',
 'riboflavin': 'riboflavin[mg]',
 'thiamin': 'thiamin[mg]',
 'vitamin_a': 'vitamin_a[IU]',
 'vitamin_a_rae': 'vitamin_a_rae[mcg]',
 'carotene_alpha': 'carotene_alpha[mcg]',
 'carotene_beta': 'carotene_beta[mcg]',
 'cryptoxanthin_beta': 'cryptoxanthin_beta[mcg]',
 'lutein_zeaxanthin': 'lutein_zeaxanthin[mcg]',
 'vitamin_b12': 'vitamin_b12[mcg]',
 'vitamin_b6': 'vitamin_b6[mg]',
 'vitamin_c': 'vitamin_c[mg]',
 'vitamin_d': 'vitamin_d[IU]',
 'vitamin_e': 'vitamin_e[mg]',
 'tocopherol_alpha': 'tocopherol_alpha[mg]',
 'vitamin_k': 'vitamin_k[mcg]',
 'calcium': 'calcium[mg]',
 'copper': 'copper[mg]',
 'irom': 'irom[mg]',
 'magnesium': 'magnesium[mg]',
 'manganese': 'manganese[

In [114]:
# map the result
nutrition.rename(columns=mapper, inplace=True)
nutrition.head()

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,0.000 mg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,1.167 mg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,0.649 mg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,3.363 mg,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,0.063 mg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


### Lecture 20 - Part 3: Removing Units From Values

In [115]:
# remove all chars from the values
nutrition.replace('[a-zA-Z\s]', '', regex=True, inplace=True)
nutrition.head()

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100,381,0.1,,0,9.0,0.4,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100,691,72.0,6.2,0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100,25,0.2,,0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Teff, uncooked",100,367,2.4,0.4,0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100,144,2.0,1.2,1,46.0,7.7,4.0,0.0,0.063,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [116]:
# check types
nutrition.dtypes

serving_size[g]     object
calories             int64
total_fat[g]        object
saturated_fat[g]    object
cholesterol[mg]     object
                     ...  
alcohol[g]          object
ash[g]              object
caffeine[mg]        object
theobromine[mg]     object
water[g]            object
Length: 75, dtype: object

In [117]:
# change types
nutrition = nutrition.astype(np.float64)
nutrition

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,0.000,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.180,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.30
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,2.00,1.160,0.530,0.080,1.0,0.0,0.40,0.0,0.0,66.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",100.0,125.0,3.5,1.4,62.0,54.0,64.5,4.0,0.0,6.422,...,3.50,1.353,1.554,0.244,62.0,0.0,1.11,0.0,0.0,72.51
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",100.0,206.0,8.9,3.9,109.0,50.0,0.0,0.0,0.0,7.680,...,8.86,3.860,3.480,0.520,109.0,0.0,1.60,0.0,0.0,59.95
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",100.0,277.0,23.0,12.0,78.0,39.0,0.0,1.0,0.0,6.550,...,22.74,11.570,8.720,0.980,78.0,0.0,0.92,0.0,0.0,59.80
"Beef, raw, all grades, trimmed to 0"" fat, separable lean only, boneless, eye of round roast, round",100.0,121.0,3.0,1.1,60.0,53.0,64.2,4.0,0.0,6.720,...,3.04,1.086,1.266,0.233,60.0,0.0,1.10,0.0,0.0,73.43


In [118]:
# check again
nutrition.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8789 entries, Cornstarch to Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round
Data columns (total 75 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   serving_size[g]                 8789 non-null   float64
 1   calories                        8789 non-null   float64
 2   total_fat[g]                    8789 non-null   float64
 3   saturated_fat[g]                7199 non-null   float64
 4   cholesterol[mg]                 8789 non-null   float64
 5   sodium[mg]                      8789 non-null   float64
 6   choline[mg]                     8789 non-null   float64
 7   folate[mcg]                     8789 non-null   float64
 8   folic_acid[mcg]                 8789 non-null   float64
 9   niacin[mg]                      8789 non-null   float64
 10  pantothenic_acid[mg]            8789 non-null   float64
 1

### Lecture 21 - Filtering in 2D

In [119]:
nutrition.filter(like='Octopus', axis=0)

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.8,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0


In [120]:
# case sensitive
nutrition.filter(like='octopus', axis=0)

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [121]:
nutrition.filter(regex='octopus', axis=0)

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [122]:
nutrition.filter(regex='[Oo]ctopus', axis=0)

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.8,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [123]:
nutrition.filter(regex='(?i)octopus', axis=0)

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Octopus (Alaska Native),100.0,56.0,0.8,0.2,41.0,0.0,0.0,0.0,0.0,2.0,...,0.8,0.2,0.0,0.2,41.0,0.0,1.5,0.0,0.0,84.0
"Mollusks, raw, common, octopus",100.0,82.0,1.0,0.2,48.0,230.0,65.0,16.0,0.0,2.1,...,1.04,0.227,0.162,0.239,48.0,0.0,1.6,0.0,0.0,80.25
"Mollusks, moist heat, cooked, common, octopus",100.0,164.0,2.1,0.5,96.0,460.0,81.0,24.0,0.0,3.78,...,2.08,0.453,0.324,0.477,96.0,0.0,3.2,0.0,0.0,60.5


In [124]:
# filter along both dimensions
# use 2 filter methods
nutrition.filter(regex='(?i)octopus', axis=0)\
.filter(items=['calories', 'total_fat[g]', 'sodium[mg]'], axis=1)

Unnamed: 0_level_0,calories,total_fat[g],sodium[mg]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopus (Alaska Native),56.0,0.8,0.0
"Mollusks, raw, common, octopus",82.0,1.0,230.0
"Mollusks, moist heat, cooked, common, octopus",164.0,2.1,460.0


In [125]:
# use loc but not second filter
nutrition.filter(regex='(?i)octopus', axis=0)\
.loc[:, ['calories', 'total_fat[g]', 'sodium[mg]']]

Unnamed: 0_level_0,calories,total_fat[g],sodium[mg]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopus (Alaska Native),56.0,0.8,0.0
"Mollusks, raw, common, octopus",82.0,1.0,230.0
"Mollusks, moist heat, cooked, common, octopus",164.0,2.1,460.0


### Lecture 22 - DataFrame Sorting

In [126]:
# sort values Series
nutrition['vitamin_b12[mcg]'].sort_values()

name
Cornstarch                                                                           0.00
Apricots, stewed, sulfured, dehydrated (low-moisture)                                0.00
Cocoa, processed with alkali, unsweetened, dry powder                                0.00
Tomato products, with herbs and cheese, sauce, canned                                0.00
Mothbeans, without salt, boiled, cooked, mature seeds                                0.00
                                                                                    ...  
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Lamb, raw, liver, variety meats and by-products                                     90.05
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Name:

In [127]:
# sort values df
nutrition.sort_values(by='calories', ascending=False)

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Fat, mutton tallow",100.0,902.0,100.0,47.0,102.0,0.0,79.8,0.0,0.0,0.00,...,100.0,47.300,40.600,7.800,102.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, salmon",100.0,902.0,100.0,20.0,485.0,0.0,0.0,0.0,0.0,0.00,...,100.0,19.872,29.037,40.324,485.0,0.0,0.00,0.0,0.0,0.00
Lard,100.0,902.0,100.0,39.0,95.0,0.0,49.7,0.0,0.0,0.00,...,100.0,39.200,45.100,11.200,95.0,0.0,0.00,0.0,0.0,0.00
"Fat, beef tallow",100.0,902.0,100.0,50.0,109.0,0.0,79.8,0.0,0.0,0.00,...,100.0,49.800,41.800,4.000,109.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, cod liver",100.0,902.0,100.0,23.0,570.0,0.0,0.0,0.0,0.0,0.00,...,100.0,22.608,46.711,22.541,570.0,0.0,0.00,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, decaffeinated, brewed, green, tea",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.000,0.000,0.000,0.0,0.0,0.00,0.0,0.0,99.93
"Beverages, caffeine free, cola, ZEVIA",100.0,0.0,0.0,,0.0,6.0,0.0,0.0,0.0,0.00,...,0.0,0.000,0.000,0.000,0.0,0.0,0.01,0.0,0.0,98.87
"Carbonated beverage, without caffeine, with sodium saccharin, other than cola or pepper, low calorie",100.0,0.0,0.0,,0.0,16.0,0.0,0.0,0.0,0.00,...,0.0,0.000,0.000,0.000,0.0,0.0,0.10,0.0,0.0,99.80
"Beverages, unsweetened, ready to drink, green, tea",100.0,0.0,0.0,,0.0,7.0,0.0,0.0,0.0,0.00,...,0.0,0.000,0.000,0.000,0.0,0.0,0.12,12.0,0.0,99.88


In [128]:
# can sort by more than one col
nutrition.sort_values(by=['calories', 'total_fat[g]'], ascending=False)

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Fish oil, cod liver",100.0,902.0,100.0,23.0,570.0,0.0,0.0,0.0,0.0,0.000,...,100.0,22.608,46.711,22.541,570.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, menhaden",100.0,902.0,100.0,30.0,521.0,0.0,0.0,0.0,0.0,0.000,...,100.0,30.427,26.694,34.197,521.0,0.0,0.00,0.0,0.0,0.00
"Fat, mutton tallow",100.0,902.0,100.0,47.0,102.0,0.0,79.8,0.0,0.0,0.000,...,100.0,47.300,40.600,7.800,102.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, sardine",100.0,902.0,100.0,30.0,710.0,0.0,0.0,0.0,0.0,0.000,...,100.0,29.892,33.841,31.867,710.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, herring",100.0,902.0,100.0,21.0,766.0,0.0,0.0,0.0,0.0,0.000,...,100.0,21.290,56.564,15.604,766.0,0.0,0.00,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, contains caffeine, with aspartame, other than cola or pepper, low calorie, carbonated",100.0,0.0,0.0,,0.0,6.0,0.0,0.0,0.0,0.000,...,0.0,0.000,0.000,0.000,0.0,0.0,0.10,15.0,0.0,99.80
"Beverages,,Gerolsteiner naturally sparkling mineral water, GEROLSTEINER BRUNNEN GmbH & Co. KG",100.0,0.0,0.0,,0.0,13.0,0.0,0.0,0.0,0.000,...,0.0,0.000,0.000,0.000,0.0,0.0,0.05,0.0,0.0,99.95
"Beverages, fortified, Revive Fruit Punch, Glaceau Vitamin Water, The COCA-COLA company",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,3.384,...,0.0,0.000,0.000,0.000,0.0,0.0,0.15,0.0,0.0,99.17
"Beverages, mineral bottled water, naturally sparkling, GEROLSTEINER BRUNNEN GmbH & Co. KG (Gerolsteiner)",100.0,0.0,0.0,,0.0,13.0,0.0,0.0,0.0,0.000,...,0.0,0.000,0.000,0.000,0.0,0.0,0.05,0.0,0.0,99.95


In [129]:
# can sort by more than one col and in diff order
nutrition.sort_values(by=['calories', 'total_fat[g]'], ascending=[False, True])

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Fish oil, cod liver",100.0,902.0,100.0,23.0,570.0,0.0,0.0,0.0,0.0,0.000,...,100.0,22.608,46.711,22.541,570.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, menhaden",100.0,902.0,100.0,30.0,521.0,0.0,0.0,0.0,0.0,0.000,...,100.0,30.427,26.694,34.197,521.0,0.0,0.00,0.0,0.0,0.00
"Fat, mutton tallow",100.0,902.0,100.0,47.0,102.0,0.0,79.8,0.0,0.0,0.000,...,100.0,47.300,40.600,7.800,102.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, sardine",100.0,902.0,100.0,30.0,710.0,0.0,0.0,0.0,0.0,0.000,...,100.0,29.892,33.841,31.867,710.0,0.0,0.00,0.0,0.0,0.00
"Fish oil, herring",100.0,902.0,100.0,21.0,766.0,0.0,0.0,0.0,0.0,0.000,...,100.0,21.290,56.564,15.604,766.0,0.0,0.00,0.0,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, contains caffeine, with aspartame, other than cola or pepper, low calorie, carbonated",100.0,0.0,0.0,,0.0,6.0,0.0,0.0,0.0,0.000,...,0.0,0.000,0.000,0.000,0.0,0.0,0.10,15.0,0.0,99.80
"Beverages,,Gerolsteiner naturally sparkling mineral water, GEROLSTEINER BRUNNEN GmbH & Co. KG",100.0,0.0,0.0,,0.0,13.0,0.0,0.0,0.0,0.000,...,0.0,0.000,0.000,0.000,0.0,0.0,0.05,0.0,0.0,99.95
"Beverages, fortified, Revive Fruit Punch, Glaceau Vitamin Water, The COCA-COLA company",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,3.384,...,0.0,0.000,0.000,0.000,0.0,0.0,0.15,0.0,0.0,99.17
"Beverages, mineral bottled water, naturally sparkling, GEROLSTEINER BRUNNEN GmbH & Co. KG (Gerolsteiner)",100.0,0.0,0.0,,0.0,13.0,0.0,0.0,0.0,0.000,...,0.0,0.000,0.000,0.000,0.0,0.0,0.05,0.0,0.0,99.95


### Lecture 23 - Using Series between() With DataFrames

In [130]:
# we need 1d for use bertween, its return boolean mask
nutrition['calories'].between(20, 60)

name
Cornstarch                                                                                            False
Nuts, pecans                                                                                          False
Eggplant, raw                                                                                          True
Teff, uncooked                                                                                        False
Sherbet, orange                                                                                       False
                                                                                                      ...  
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    False
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    False
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    False
Beef, raw, all grades, 

In [131]:
nutrition['calories'].between(20, 60).shape

(8789,)

In [132]:
# let use it in df (return only True)
nutrition[nutrition['calories'].between(20, 60)]

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.30
"Cauliflower, raw",100.0,25.0,0.3,0.1,0.0,30.0,44.3,57.0,0.0,0.507,...,0.28,0.130,0.034,0.031,0.0,0.0,0.76,0.0,0.0,92.07
"Taro leaves, raw",100.0,42.0,0.7,0.2,0.0,3.0,12.8,126.0,0.0,1.513,...,0.74,0.151,0.060,0.307,0.0,0.0,1.92,0.0,0.0,85.66
"PACE, Picante Sauce",100.0,25.0,0.0,,0.0,781.0,0.0,0.0,0.0,0.000,...,0.00,0.000,0.000,0.000,0.0,0.0,3.85,0.0,0.0,89.90
"Mango nectar, canned",100.0,51.0,0.1,,0.0,5.0,1.5,7.0,0.0,0.080,...,0.06,0.014,0.022,0.011,0.0,0.0,0.08,0.0,0.0,86.63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, added calcium, high vitamin C, greater than 3% fruit juice, reduced sugar, Fruit flavored drink",100.0,29.0,0.4,,0.0,25.0,0.1,2.0,0.0,0.000,...,0.37,0.000,0.000,0.000,0.0,0.0,0.06,0.0,0.0,93.00
"Ruby Red grapefruit juice blend (grapefruit, grape, apple), with added vitamin C, bottled, OCEAN SPRAY",100.0,44.0,0.1,,0.0,8.0,0.0,0.0,0.0,0.000,...,0.10,0.008,0.008,0.014,0.0,0.0,0.27,0.0,0.0,88.60
"Beverages, prepared with water, frozen concentrate, with juice and pulp, breakfast type, Orange drink",100.0,45.0,0.0,,0.0,10.0,0.0,0.0,0.0,0.253,...,0.00,0.001,0.001,0.001,0.0,0.0,0.48,0.0,0.0,88.08
"Apple juice, diluted with 3 volume water without added ascorbic acid, unsweetened, frozen concentrate",100.0,47.0,0.1,,0.0,7.0,1.8,0.0,0.0,0.038,...,0.10,0.018,0.002,0.031,0.0,0.0,0.32,0.0,0.0,87.90


In [133]:
# let combine it with sample
nutrition[nutrition['calories'].between(20, 60)].sample(4)

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Cress, with salt, drained, boiled, cooked, garden",100.0,23.0,0.6,,0.0,244.0,13.8,37.0,0.0,0.8,...,0.6,0.02,0.205,0.196,0.0,0.0,1.2,0.0,0.0,92.5
"Beverages, assorted fruit flavors, sweetened, bottles, Water with added vitamins and minerals",100.0,22.0,0.0,,0.0,0.0,0.0,8.0,8.0,0.844,...,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,94.44
"Carrot juice, canned",100.0,40.0,0.2,,0.0,66.0,9.9,4.0,0.0,0.386,...,0.15,0.027,0.007,0.071,0.0,0.0,0.75,0.0,0.0,88.87
"Peas, without salt, drained, boiled, edible-podded",100.0,42.0,0.2,,0.0,4.0,17.4,29.0,0.0,0.539,...,0.23,0.044,0.023,0.1,0.0,0.0,0.54,0.0,0.0,88.91


### BONUS - Min, Max and Idx[MinMax], And Good Foods

In [134]:
nutrition.min(axis=1)

name
Cornstarch                                                                                            0.0
Nuts, pecans                                                                                          0.0
Eggplant, raw                                                                                         0.0
Teff, uncooked                                                                                        0.0
Sherbet, orange                                                                                       0.0
                                                                                                     ... 
Beef, raw, all grades, trimmed to 0" fat, separable lean and fat, boneless, top round roast, round    0.0
Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand    0.0
Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand    0.0
Beef, raw, all grades, trimmed to 0" fat,

In [135]:
# check one value max
nutrition['potassium[mg]'].max()

16500.0

In [136]:
# check which is the food with that value
nutrition['potassium[mg]'].idxmax()

'Leavening agents, cream of tartar'

In [137]:
# let sort some foods
nutrition['potassium[mg]'].sort_values(ascending=False).head(10)

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Beverages, powder, unsweetened, instant, tea               6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium[mg], dtype: float64

In [138]:
# targrting potassium to sodium == 16
k_to_na = (nutrition['potassium[mg]'].replace(0,1)/nutrition['sodium[mg]'].replace(0,1))\
.sort_values(ascending=False)
k_to_na

name
Peanut flour, low fat                                         1358.000000
Nuts, raw, pistachio nuts                                     1025.000000
Beverages, reduced calorie, with whitener, instant, coffee     909.000000
Soybeans, raw, mature seeds                                    898.500000
Soy meal, raw, defatted                                        830.000000
                                                                 ...     
Seasoning mix, original, chili, dry                              0.000217
Salt, table                                                      0.000206
PACE, Dry Taco Seasoning Mix                                     0.000124
Seasoning mix, coriander & annatto, sazon, dry                   0.000059
Leavening agents, baking soda                                    0.000037
Length: 8789, dtype: float64

In [139]:
k_to_na[k_to_na.between(14, 18)].sample(10)

name
Waterchestnuts, solids and liquids, canned, chinese                                                                  14.750000
Finger snacks, apple and cinnamon, GERBER GRADUATE PUFFS                                                             15.076923
Grapes, solids and liquids, water pack, thompson seedless, canned                                                    17.833333
Spices, dried, dill weed                                                                                             15.903846
Peppers, without salt, drained, boiled, chopped, frozen, red, sweet                                                  18.000000
Peppers, without salt, drained, boiled, chopped, frozen, green, sweet                                                18.000000
Nuts, canned (liquid expressed from grated meat and water), coconut milk                                             16.923077
Brussels sprouts, raw                                                                                     

### Lecture 24 DataFrame nlargest() And nsmallest()

In [140]:
# to return df
nutrition.nlargest(10, columns='potassium[mg]')

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Leavening agents, cream of tartar",100.0,258.0,0.0,,0.0,52.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,36.8,0.0,0.0,1.7
"Leavening agents, low-sodium, baking powder",100.0,97.0,0.4,0.1,0.0,90.0,0.0,0.0,0.0,0.0,...,0.4,0.073,0.006,0.121,0.0,0.0,46.4,0.0,0.0,6.2
"Parsley, freeze-dried",100.0,271.0,5.2,,0.0,391.0,0.0,194.0,0.0,10.4,...,5.2,0.0,0.0,0.0,0.0,0.0,19.12,0.0,0.0,2.0
"Beverages, powder, unsweetened, instant, tea",100.0,315.0,0.0,,0.0,72.0,118.3,103.0,0.0,10.8,...,0.0,0.0,0.0,0.0,0.0,0.0,16.04,5714.0,71.0,5.09
"Beverages, unsweetened, decaffeinated, instant, tea",100.0,315.0,0.0,,0.0,72.0,118.3,103.0,0.0,10.8,...,0.0,0.0,0.0,0.0,0.0,0.0,16.04,169.0,11.0,5.09
"Spices, dried, chervil",100.0,237.0,3.9,0.2,0.0,83.0,0.0,274.0,0.0,5.4,...,3.9,0.169,1.399,1.8,0.0,0.0,16.6,0.0,0.0,7.2
"Spices, dried, coriander leaf",100.0,279.0,4.8,0.1,0.0,211.0,97.1,274.0,0.0,10.707,...,4.78,0.115,2.232,0.328,0.0,0.0,14.08,0.0,0.0,7.3
"Celery flakes, dried",100.0,319.0,2.1,0.6,0.0,1435.0,122.3,107.0,0.0,4.64,...,2.1,0.555,0.405,1.035,0.0,0.0,13.9,0.0,0.0,9.0
"Beverages, powder, regular, instant, coffee",100.0,353.0,0.5,0.2,0.0,37.0,101.9,0.0,0.0,28.173,...,0.5,0.197,0.041,0.196,0.0,0.0,8.8,3142.0,0.0,3.1
"Beverages, half the caffeine, regular, instant, coffee",100.0,352.0,0.5,0.2,0.0,37.0,101.9,0.0,0.0,28.173,...,0.5,0.197,0.041,0.196,0.0,0.0,8.8,1571.0,0.0,3.1


In [141]:
# to return Series
nutrition['potassium[mg]'].nlargest(10)

name
Leavening agents, cream of tartar                         16500.0
Leavening agents, low-sodium, baking powder               10100.0
Parsley, freeze-dried                                      6300.0
Beverages, powder, unsweetened, instant, tea               6040.0
Beverages, unsweetened, decaffeinated, instant, tea        6040.0
Spices, dried, chervil                                     4740.0
Spices, dried, coriander leaf                              4466.0
Celery flakes, dried                                       4388.0
Beverages, powder, regular, instant, coffee                3535.0
Beverages, half the caffeine, regular, instant, coffee     3535.0
Name: potassium[mg], dtype: float64

In [142]:
# can pass more than one
nutrition.nsmallest(10, ['potassium[mg]', 'calories'])

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Leavening agents, baking soda",100.0,0.0,0.0,,0.0,27360.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,36.9,0.0,0.0,0.2
"Beverages, well, tap, water",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,99.9
"Water, generic, bottled",100.0,0.0,0.0,,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,99.98
"Beverages, cola, ZEVIA",100.0,0.0,0.0,,0.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,98.36
"Beverages, PERRIER, bottled, water",100.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,99.9
"Beverages, drinking, tap, water",100.0,0.0,0.0,,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,99.9
"Beverages, POLAND SPRING, bottled, water",100.0,0.0,0.0,,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0
"Beverages, caffeine free, cola, ZEVIA",100.0,0.0,0.0,,0.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,98.87
"Beverages, ready to drink, black, tea",100.0,0.0,0.0,,0.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.11,11.0,0.0,97.89
"Water, NAYA, non-carbonated, bottled",100.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,100.0


### Lecture 25 - Skill Chalenge

In [143]:
# 1. Find the 10 foods that have the most vitamin b12. What do they have in common?
# nutrition.nlargest(10, columns=['vitamin_b12[mcg]'])['vitamin_b12[mcg]']
# nutrition.loc[:, 'vitamin_b12[mcg]'].nlargest(10)
nutrition['vitamin_b12[mcg]'].nlargest(10)
# the common is that the meat has most vitamin b12

name
Mollusks, moist heat, cooked, mixed species, clam                                   98.89
Beef, boiled, cooked, variety meats and by-products liver, imported, New Zealand    96.00
Lamb, raw, liver, variety meats and by-products                                     90.05
Lamb, pan-fried, cooked, liver, variety meats and by-products                       85.70
Veal, braised, cooked, liver, variety meats and by-products                         84.60
Beef, raw, liver, variety meats and by-products, imported, New Zealand              84.50
Beef, pan-fried, cooked, liver, variety meats and by-products                       83.13
Lamb, braised, cooked, kidneys, variety meats and by-products                       78.90
Lamb, braised, cooked, liver, variety meats and by-products                         76.50
Veal, pan-fried, cooked, liver, variety meats and by-products                       72.50
Name: vitamin_b12[mcg], dtype: float64

In [144]:
# 2. Isolate the foods in the dataset that contains or are based on eggplant.
# Which of them has the most sodium?
# nutrition.filter(regex='(?i)eggplant', axis=0).nlargest(1, columns=['sodium[mg]'])
eggplant_foods = nutrition.filter(regex='(?i)eggplant', axis=0)
eggplant_foods

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,0.649,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
"Eggplant, pickled",100.0,49.0,0.7,0.1,0.0,1674.0,11.9,20.0,0.0,0.66,...,0.7,0.14,0.063,0.294,0.0,0.0,1.73,0.0,0.0,86.9
"Eggplant, with salt, drained, boiled, cooked",100.0,33.0,0.2,,0.0,239.0,9.4,14.0,0.0,0.6,...,0.23,0.044,0.02,0.093,0.0,0.0,1.13,0.0,0.0,89.67
"Eggplant, without salt, drained, boiled, cooked",100.0,35.0,0.2,,0.0,1.0,9.4,14.0,0.0,0.6,...,0.23,0.044,0.02,0.093,0.0,0.0,0.54,0.0,0.0,89.67


In [145]:
# eggplant_foods.nlargest(1, columns=['sodium[mg]'])
eggplant_foods['sodium[mg]'].nlargest(1)

name
Eggplant, pickled    1674.0
Name: sodium[mg], dtype: float64

In [146]:
# 3. Select a slice of the dataframe that contains 4 random rows and 2 random cols
nutrition.sample(n=4, axis=0).sample(n=2, axis=1)

Unnamed: 0_level_0,tocopherol_alpha[mg],vitamin_b6[mg]
name,Unnamed: 1_level_1,Unnamed: 2_level_1
"Pie fillings, cherry, canned",0.0,0.037
"Infant formula, with ARA and DHA, powder, with iron, LACTOFREE LIPIL, ENFAMIL, MEAD JOHNSON",4.7,0.31
"Milk, with added vitamin A and vitamin D, 2% milkfat, fluid, reduced fat",0.03,0.038
"Lamb, raw, choice, trimmed to 1/4"" fat, separable lean only, rib, domestic",0.19,0.16


### Lecture 26 - Skill Chalenge

In [147]:
# 1. Remove all the food items that contain at least one NaN.
# Do this in a way that modifies the dataframe, i.e. the changes stick.
# How many food items remain after exclusions?
skill_nutr = nutrition
skill_nutr.dropna(how='any', axis=0, inplace=True)
skill_nutr

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,...,71.97,6.180,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,...,2.00,1.160,0.530,0.080,1.0,0.0,0.40,0.0,0.0,66.10
"Cauliflower, raw",100.0,25.0,0.3,0.1,0.0,30.0,44.3,57.0,0.0,0.507,...,0.28,0.130,0.034,0.031,0.0,0.0,0.76,0.0,0.0,92.07
"Taro leaves, raw",100.0,42.0,0.7,0.2,0.0,3.0,12.8,126.0,0.0,1.513,...,0.74,0.151,0.060,0.307,0.0,0.0,1.92,0.0,0.0,85.66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beef, raw, all grades, trimmed to 0"" fat, separable lean and fat, boneless, top round roast, round",100.0,125.0,3.5,1.4,62.0,54.0,64.5,4.0,0.0,6.422,...,3.50,1.353,1.554,0.244,62.0,0.0,1.11,0.0,0.0,72.51
"Lamb, cooked, separable lean only, composite of trimmed retail cuts, frozen, imported, New Zealand",100.0,206.0,8.9,3.9,109.0,50.0,0.0,0.0,0.0,7.680,...,8.86,3.860,3.480,0.520,109.0,0.0,1.60,0.0,0.0,59.95
"Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand",100.0,277.0,23.0,12.0,78.0,39.0,0.0,1.0,0.0,6.550,...,22.74,11.570,8.720,0.980,78.0,0.0,0.92,0.0,0.0,59.80
"Beef, raw, all grades, trimmed to 0"" fat, separable lean only, boneless, eye of round roast, round",100.0,121.0,3.0,1.1,60.0,53.0,64.2,4.0,0.0,6.720,...,3.04,1.086,1.266,0.233,60.0,0.0,1.10,0.0,0.0,73.43


In [148]:
len(skill_nutr)
# skill_nutr.shape

7199

In [149]:
# 2. From the remaining records, isolate those that have between 20 and 40 mg of vitamin c per 100g serving.
# Of these foods, which one is the least caloric, i.e. has the minimum calories?
vit_c = skill_nutr[skill_nutr['vitamin_c[mg]'].between(20, 40)]
vit_c

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Broccoli, raw, chinese",100.0,30.0,0.8,0.1,0.0,7.0,26.5,104.0,0.0,0.459,...,0.76,0.116,0.053,0.347,0.0,0.0,0.83,0.0,0.0,92.55
"Broccoli raab, cooked",100.0,33.0,0.5,0.1,0.0,56.0,33.6,71.0,0.0,2.015,...,0.52,0.057,0.030,0.150,0.0,0.0,1.11,0.0,0.0,91.41
"Horseradish, prepared",100.0,48.0,0.7,0.1,0.0,420.0,6.5,57.0,0.0,0.386,...,0.69,0.090,0.130,0.339,0.0,0.0,1.76,0.0,0.0,85.08
"Spices, white, pepper",100.0,296.0,2.1,0.6,0.0,5.0,0.0,10.0,0.0,0.212,...,2.12,0.626,0.789,0.616,0.0,0.0,1.59,0.0,0.0,11.42
"Dandelion greens, raw",100.0,45.0,0.7,0.2,0.0,76.0,35.3,27.0,0.0,0.806,...,0.70,0.170,0.014,0.306,0.0,0.0,1.80,0.0,0.0,85.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Beverages, fortified, ready to drink, milk and soy based, chocolate drink",100.0,101.0,1.7,0.4,4.0,63.0,1.6,42.0,42.0,1.688,...,1.69,0.422,1.154,0.057,4.0,0.0,0.97,0.0,13.0,75.82
"Cereals ready-to-eat, Peanut Butter, Multi Grain CHEERIOS, GENERAL MILLS",100.0,390.0,6.2,0.9,0.0,447.0,0.0,714.0,0.0,17.890,...,6.19,0.890,2.700,2.290,0.0,0.0,3.09,0.0,0.0,2.00
"Infant formula, with ARA and DHA, ready-to-feed, ADVANCE, NATURAL CARE, SIMILAC, ABBOTT NUTRITION",100.0,78.0,4.2,2.5,2.0,34.0,6.0,29.0,29.0,3.902,...,4.24,2.544,0.402,0.842,2.0,0.0,0.75,0.0,0.0,85.07
"Cereals ready-to-eat, KELLOGG'S SMART START Strong Heart Antioxidants Cereal, KELLOGG",100.0,371.0,1.5,0.3,0.0,398.0,14.9,800.0,781.0,40.000,...,1.50,0.300,0.100,0.500,0.0,0.0,1.70,0.0,0.0,2.50


In [150]:
vit_c['calories'].nsmallest(1)

name
Asparagus, with salt, drained, boiled, cooked, frozen    18.0
Name: calories, dtype: float64

In [151]:
# 3. How many food items in the dataframe have vitamin c levels of between 2 and 3 standard deviations 
# (inclusive) above the mean
mean_vit_c = skill_nutr['vitamin_c[mg]'].mean()
mean_vit_c

5.553368523406029

In [152]:
mp2sd = mean_vit_c + skill_nutr['vitamin_c[mg]'].std() * 2
mp2sd

97.76213896818041

In [153]:
mp3sd = mean_vit_c + skill_nutr['vitamin_c[mg]'].std() * 3
mp3sd

143.8665241905676

In [154]:
skill_nutr[skill_nutr['vitamin_c[mg]'].between(mp2sd, mp3sd)]

Unnamed: 0_level_0,serving_size[g],calories,total_fat[g],saturated_fat[g],cholesterol[mg],sodium[mg],choline[mg],folate[mcg],folic_acid[mcg],niacin[mg],...,fat[g],saturated_fatty_acids[g],monounsaturated_fatty_acids[g],polyunsaturated_fatty_acids[g],fatty_acids_total_trans[mg],alcohol[g],ash[g],caffeine[mg],theobromine[mg],water[g]
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Peppers, raw, jalapeno",100.0,29.0,0.4,0.1,0.0,3.0,7.5,27.0,0.0,1.28,...,0.37,0.092,0.029,0.112,0.0,0.0,0.53,0.0,0.0,91.69
"Kale, raw, scotch",100.0,42.0,0.6,0.1,0.0,70.0,0.0,28.0,0.0,1.3,...,0.6,0.078,0.045,0.289,0.0,0.0,1.28,0.0,0.0,87.0
"Parsley, fresh",100.0,36.0,0.8,0.1,0.0,56.0,12.8,152.0,0.0,1.313,...,0.79,0.132,0.295,0.124,0.0,0.0,2.2,0.0,0.0,87.71
Tomato powder,100.0,302.0,0.4,0.1,0.0,134.0,0.0,120.0,0.0,9.133,...,0.44,0.062,0.066,0.179,0.0,0.0,8.91,0.0,0.0,3.06
"Kale, raw",100.0,49.0,0.9,0.1,0.0,38.0,0.8,141.0,0.0,1.0,...,0.93,0.091,0.052,0.338,0.0,0.0,2.01,0.0,0.0,84.04
"Snacks, rolls, fruit leather",100.0,371.0,3.0,0.7,0.0,317.0,13.2,2.0,0.0,0.1,...,3.0,0.672,1.48,0.552,0.0,0.0,0.9,0.0,0.0,10.2
"Spices, dried, parsley",100.0,292.0,5.5,1.4,0.0,452.0,97.1,180.0,0.0,9.943,...,5.48,1.378,0.761,3.124,0.0,0.0,11.36,0.0,0.0,5.89
"Tomatoes, drained, packed in oil, sun-dried",100.0,213.0,14.0,1.9,0.0,266.0,0.0,23.0,0.0,3.63,...,14.08,1.893,8.663,2.06,0.0,0.0,3.7,0.0,0.0,53.83
"Snacks, with vitamin C, pieces, fruit leather",100.0,373.0,3.5,1.0,0.0,317.0,0.0,14.0,0.0,0.1,...,3.5,0.99,1.724,0.093,0.0,0.0,1.0,0.0,0.0,10.2
"Cereals ready-to-eat, HEALTH VALLEY, OAT BRAN FLAKES",100.0,380.0,3.0,1.0,0.0,380.0,22.9,200.0,181.0,10.0,...,3.0,1.0,0.704,0.897,0.0,0.0,5.53,0.0,0.0,3.4
