In [1]:
import pandas as pd

## Loading Data

Let's take a look at some data and see how pandas can help us navigate.  

This dataset about cereal lives on [Kaggle](https://www.kaggle.com/crawford/80-cereals) and originally comes from [these project datasets](https://perso.telecom-paristech.fr/eagan/class/igr204/datasets).  

In [2]:
df = pd.read_csv('https://bit.ly/2JRmGC2')

In [4]:
df.shape

(77, 16)

In [14]:
df.columns

Index(['mfr', 'type', 'calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo',
       'sugars', 'potass', 'vitamins', 'shelf', 'weight', 'cups', 'rating'],
      dtype='object')

In [6]:
df.sample(3, random_state = 9)

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
17,Corn Pops,K,C,110,1,0,90,1.0,13.0,12,20,25,2,1.0,1.0,35.782791
20,Cream of Wheat (Quick),N,H,100,3,0,80,1.0,21.0,0,-1,0,2,1.0,1.0,64.533816
28,Fruitful Bran,K,C,120,3,0,240,5.0,14.0,12,190,25,3,1.33,0.67,41.015492


In [8]:
df.tail()

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
72,Triples,G,C,110,2,1,250,0.0,21.0,3,60,25,3,1.0,0.75,39.106174
73,Trix,G,C,110,1,1,140,0.0,13.0,12,25,25,2,1.0,1.0,27.753301
74,Wheat Chex,R,C,100,3,1,230,3.0,17.0,3,115,25,1,1.0,0.67,49.787445
75,Wheaties,G,C,100,3,1,200,3.0,17.0,3,110,25,1,1.0,1.0,51.592193
76,Wheaties Honey Gold,G,C,110,2,1,200,1.0,16.0,8,60,25,1,1.0,0.75,36.187559


In [7]:
df.head()

Unnamed: 0,name,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
0,100% Bran,N,C,70,4,1,130,10.0,5.0,6,280,25,3,1.0,0.33,68.402973
1,100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8,135,0,3,1.0,1.0,33.983679
2,All-Bran,K,C,70,4,1,260,9.0,7.0,5,320,25,3,1.0,0.33,59.425505
3,All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0,330,25,3,1.0,0.5,93.704912
4,Almond Delight,R,C,110,2,2,200,1.0,14.0,8,-1,25,3,1.0,0.75,34.384843


### Notice row 4 of Potass == -1 meaning it is NULL
#### Re-import specifying -1 as null and cerial names as Index

In [9]:
df = pd.read_csv('https://bit.ly/2JRmGC2', na_values = -1, index_col = 0)

#### Note, one less feature because Ceral Name is an index not a Feature

In [10]:
df.shape

(77, 15)

#### Note, Row 4 now contains "NaN" instead of -1

In [11]:
df.head()

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6.0,280.0,25,3,1.0,0.33,68.402973
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8.0,135.0,0,3,1.0,1.0,33.983679
All-Bran,K,C,70,4,1,260,9.0,7.0,5.0,320.0,25,3,1.0,0.33,59.425505
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0.0,330.0,25,3,1.0,0.5,93.704912
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843


In [15]:
df.tail()

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Triples,G,C,110,2,1,250,0.0,21.0,3.0,60.0,25,3,1.0,0.75,39.106174
Trix,G,C,110,1,1,140,0.0,13.0,12.0,25.0,25,2,1.0,1.0,27.753301
Wheat Chex,R,C,100,3,1,230,3.0,17.0,3.0,115.0,25,1,1.0,0.67,49.787445
Wheaties,G,C,100,3,1,200,3.0,17.0,3.0,110.0,25,1,1.0,1.0,51.592193
Wheaties Honey Gold,G,C,110,2,1,200,1.0,16.0,8.0,60.0,25,1,1.0,0.75,36.187559


In [18]:
df.sample(3, random_state=9)

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Corn Pops,K,C,110,1,0,90,1.0,13.0,12.0,20.0,25,2,1.0,1.0,35.782791
Cream of Wheat (Quick),N,H,100,3,0,80,1.0,21.0,0.0,,0,2,1.0,1.0,64.533816
Fruitful Bran,K,C,120,3,0,240,5.0,14.0,12.0,190.0,25,3,1.33,0.67,41.015492


In [17]:
df.columns

Index(['mfr', 'type', 'calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo',
       'sugars', 'potass', 'vitamins', 'shelf', 'weight', 'cups', 'rating'],
      dtype='object')

In [16]:
df.dtypes

mfr          object
type         object
calories      int64
protein       int64
fat           int64
sodium        int64
fiber       float64
carbo       float64
sugars      float64
potass      float64
vitamins      int64
shelf         int64
weight      float64
cups        float64
rating      float64
dtype: object

In [19]:
df.fat.head

name
100% Bran                    1
100% Natural Bran            5
All-Bran                     1
All-Bran with Extra Fiber    0
Almond Delight               2
                            ..
Triples                      1
Trix                         1
Wheat Chex                   1
Wheaties                     1
Wheaties Honey Gold          1
Name: fat, Length: 77, dtype: int64

In [20]:
df['fat']

name
100% Bran                    1
100% Natural Bran            5
All-Bran                     1
All-Bran with Extra Fiber    0
Almond Delight               2
                            ..
Triples                      1
Trix                         1
Wheat Chex                   1
Wheaties                     1
Wheaties Honey Gold          1
Name: fat, Length: 77, dtype: int64

#### NaN can now be noticed in features 7,8,9

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 77 entries, 100% Bran to Wheaties Honey Gold
Data columns (total 15 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   mfr       77 non-null     object 
 1   type      77 non-null     object 
 2   calories  77 non-null     int64  
 3   protein   77 non-null     int64  
 4   fat       77 non-null     int64  
 5   sodium    77 non-null     int64  
 6   fiber     77 non-null     float64
 7   carbo     76 non-null     float64
 8   sugars    76 non-null     float64
 9   potass    75 non-null     float64
 10  vitamins  77 non-null     int64  
 11  shelf     77 non-null     int64  
 12  weight    77 non-null     float64
 13  cups      77 non-null     float64
 14  rating    77 non-null     float64
dtypes: float64(7), int64(6), object(2)
memory usage: 11.7+ KB


### Summary Statistics

In [22]:
df.describe()

Unnamed: 0,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
count,77.0,77.0,77.0,77.0,77.0,76.0,76.0,75.0,77.0,77.0,77.0,77.0,77.0
mean,106.883117,2.545455,1.012987,159.675325,2.151948,14.802632,7.026316,98.666667,28.246753,2.207792,1.02961,0.821039,42.665705
std,19.484119,1.09479,1.006473,83.832295,2.383364,3.907326,4.378656,70.410636,22.342523,0.832524,0.150477,0.232716,14.047289
min,50.0,1.0,0.0,0.0,0.0,5.0,0.0,15.0,0.0,1.0,0.5,0.25,18.042851
25%,100.0,2.0,0.0,130.0,1.0,12.0,3.0,42.5,25.0,1.0,1.0,0.67,33.174094
50%,110.0,3.0,1.0,180.0,2.0,14.5,7.0,90.0,25.0,2.0,1.0,0.75,40.400208
75%,110.0,3.0,2.0,210.0,3.0,17.0,11.0,120.0,25.0,3.0,1.0,1.0,50.828392
max,160.0,6.0,5.0,320.0,14.0,23.0,15.0,330.0,100.0,3.0,1.5,1.5,93.704912


In [24]:
df['fat'].mean()

1.0129870129870129

In [25]:
df['fat'].median()

1.0

In [26]:
df['fat'].std()

1.0064725594803927

In [27]:
df['fat'].max()

5

In [28]:
df['fat'].min()

0

In [29]:
df['fat'].count()

77

### Unique Values of a Feature

In [30]:
df['rating'].unique()

array([68.402973, 33.983679, 59.425505, 93.704912, 34.384843, 29.509541,
       33.174094, 37.038562, 49.120253, 53.313813, 18.042851, 50.764999,
       19.823573, 40.400208, 22.736446, 41.445019, 45.863324, 35.782791,
       22.396513, 40.448772, 64.533816, 46.895644, 36.176196, 44.330856,
       32.207582, 31.435973, 58.345141, 40.917047, 41.015492, 28.025765,
       35.252444, 23.804043, 52.076897, 53.371007, 45.811716, 21.871292,
       31.072217, 28.742414, 36.523683, 36.471512, 39.241114, 45.328074,
       26.734515, 54.850917, 37.136863, 34.139765, 30.313351, 40.105965,
       29.924285, 40.69232 , 59.642837, 30.450843, 37.840594, 41.50354 ,
       60.756112, 63.005645, 49.511874, 50.828392, 39.259197, 39.7034  ,
       55.333142, 41.998933, 40.560159, 68.235885, 74.472949, 72.801787,
       31.230054, 53.131324, 59.363993, 38.839746, 28.592785, 46.658844,
       39.106174, 27.753301, 49.787445, 51.592193, 36.187559])

## Count the number of times a value is in the Feature
### Sorted by Descending

In [31]:
df['rating'].value_counts()

68.402973    1
40.692320    1
49.511874    1
63.005645    1
60.756112    1
            ..
31.435973    1
32.207582    1
44.330856    1
36.176196    1
36.187559    1
Name: rating, Length: 77, dtype: int64

## Double Brackets to list Features

In [33]:
df[['fat', 'sugars']].head()

Unnamed: 0_level_0,fat,sugars
name,Unnamed: 1_level_1,Unnamed: 2_level_1
100% Bran,1,6.0
100% Natural Bran,5,8.0
All-Bran,1,5.0
All-Bran with Extra Fiber,0,0.0
Almond Delight,2,8.0


## Loc to use Index Name or Index Numbers [0:22] which is inclusive

In [36]:
df.loc['Wheat Chex'].head()

mfr           R
type          C
calories    100
protein       3
fat           1
Name: Wheat Chex, dtype: object

In [38]:
df.loc[['Wheat Chex', 'Almond Delight']].head()

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Wheat Chex,R,C,100,3,1,230,3.0,17.0,3.0,115.0,25,1,1.0,0.67,49.787445
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843


## Loc use Index Name & Feature Name

In [40]:
df.loc['Wheat Chex', 'sugars']

3.0

#### one index name & multiple features non-consecutive

In [41]:
df.loc['Wheat Chex', ['sugars', 'rating']]

sugars          3.0
rating    49.787445
Name: Wheat Chex, dtype: object

### multiple index names in slices & multiple Feature slices

In [44]:
df.loc['Wheat Chex' : 'Wheaties Honey Gold' , 'sugars':'rating']

Unnamed: 0_level_0,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Wheat Chex,3.0,115.0,25,1,1.0,0.67,49.787445
Wheaties,3.0,110.0,25,1,1.0,1.0,51.592193
Wheaties Honey Gold,8.0,60.0,25,1,1.0,0.75,36.187559


### all of the index names & specific multiple Feature names

In [45]:
df.loc[: , 'sugars':'rating']

Unnamed: 0_level_0,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
100% Bran,6.0,280.0,25,3,1.0,0.33,68.402973
100% Natural Bran,8.0,135.0,0,3,1.0,1.00,33.983679
All-Bran,5.0,320.0,25,3,1.0,0.33,59.425505
All-Bran with Extra Fiber,0.0,330.0,25,3,1.0,0.50,93.704912
Almond Delight,8.0,,25,3,1.0,0.75,34.384843
...,...,...,...,...,...,...,...
Triples,3.0,60.0,25,3,1.0,0.75,39.106174
Trix,12.0,25.0,25,2,1.0,1.00,27.753301
Wheat Chex,3.0,115.0,25,1,1.0,0.67,49.787445
Wheaties,3.0,110.0,25,1,1.0,1.00,51.592193


## iloc to call by Integer Positions is NON-INCLUSIVE

In [47]:
df.iloc[:3]

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6.0,280.0,25,3,1.0,0.33,68.402973
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8.0,135.0,0,3,1.0,1.0,33.983679
All-Bran,K,C,70,4,1,260,9.0,7.0,5.0,320.0,25,3,1.0,0.33,59.425505


#### iloc multiple integer positions and multiple features

In [49]:
df.iloc[:5, 2:8]

Unnamed: 0_level_0,calories,protein,fat,sodium,fiber,carbo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100% Bran,70,4,1,130,10.0,5.0
100% Natural Bran,120,3,5,15,2.0,8.0
All-Bran,70,4,1,260,9.0,7.0
All-Bran with Extra Fiber,50,4,0,140,14.0,8.0
Almond Delight,110,2,2,200,1.0,14.0


## Chaining loc & iloc together

In [50]:
df.iloc[:5]

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6.0,280.0,25,3,1.0,0.33,68.402973
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8.0,135.0,0,3,1.0,1.0,33.983679
All-Bran,K,C,70,4,1,260,9.0,7.0,5.0,320.0,25,3,1.0,0.33,59.425505
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0.0,330.0,25,3,1.0,0.5,93.704912
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843


In [52]:
df.iloc[:5].loc['All-Bran' : 'Almond Delight']

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
All-Bran,K,C,70,4,1,260,9.0,7.0,5.0,320.0,25,3,1.0,0.33,59.425505
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0.0,330.0,25,3,1.0,0.5,93.704912
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843


In [53]:
df.iloc[:5].loc['All-Bran' : 'Almond Delight'].sodium

name
All-Bran                     260
All-Bran with Extra Fiber    140
Almond Delight               200
Name: sodium, dtype: int64

In [54]:
df.iloc[:5].loc['All-Bran' : 'Almond Delight'].sodium.mean()

200.0

## Mathematical Operations

In [55]:
df['sugars_double']  = df['sugars'] *2
df.head()

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6.0,280.0,25,3,1.0,0.33,68.402973,12.0
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8.0,135.0,0,3,1.0,1.0,33.983679,16.0
All-Bran,K,C,70,4,1,260,9.0,7.0,5.0,320.0,25,3,1.0,0.33,59.425505,10.0
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0.0,330.0,25,3,1.0,0.5,93.704912,0.0
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843,16.0


In [56]:
df['sugars_double']/df['sugars'] + 5

name
100% Bran                    7.0
100% Natural Bran            7.0
All-Bran                     7.0
All-Bran with Extra Fiber    NaN
Almond Delight               7.0
                            ... 
Triples                      7.0
Trix                         7.0
Wheat Chex                   7.0
Wheaties                     7.0
Wheaties Honey Gold          7.0
Length: 77, dtype: float64

In [57]:
df['sugars_double'] >= 11

name
100% Bran                     True
100% Natural Bran             True
All-Bran                     False
All-Bran with Extra Fiber    False
Almond Delight                True
                             ...  
Triples                      False
Trix                          True
Wheat Chex                   False
Wheaties                     False
Wheaties Honey Gold           True
Name: sugars_double, Length: 77, dtype: bool

In [58]:
(df['sugars_double'] >= 11) & (df['sugars_double'] == 5)

name
100% Bran                    False
100% Natural Bran            False
All-Bran                     False
All-Bran with Extra Fiber    False
Almond Delight               False
                             ...  
Triples                      False
Trix                         False
Wheat Chex                   False
Wheaties                     False
Wheaties Honey Gold          False
Name: sugars_double, Length: 77, dtype: bool

In [59]:
df['high_fat'] = (df['sugars_double'] >= 11) & (df['fat'] > 3)
df.head()

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double,high_fat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6.0,280.0,25,3,1.0,0.33,68.402973,12.0,False
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8.0,135.0,0,3,1.0,1.0,33.983679,16.0,True
All-Bran,K,C,70,4,1,260,9.0,7.0,5.0,320.0,25,3,1.0,0.33,59.425505,10.0,False
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0.0,330.0,25,3,1.0,0.5,93.704912,0.0,False
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843,16.0,False


## Sorting occurs by VALUES or INDEX

## use inplace=True to save the original DF sorted

## Sort By VALUES

In [60]:
df.sort_values('sugars', ascending=False)

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double,high_fat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Smacks,K,C,110,2,1,70,1.0,9.0,15.0,40.0,25,2,1.00,0.75,31.230054,30.0,False
Golden Crisp,P,C,100,2,0,45,0.0,11.0,15.0,40.0,25,1,1.00,0.88,35.252444,30.0,False
Total Raisin Bran,G,C,140,3,1,190,4.0,15.0,14.0,230.0,100,3,1.50,1.00,28.592785,28.0,False
Post Nat. Raisin Bran,P,C,120,3,1,200,6.0,11.0,14.0,260.0,25,3,1.33,0.67,37.840594,28.0,False
Apple Jacks,K,C,110,2,0,125,1.0,11.0,14.0,30.0,25,2,1.00,1.00,33.174094,28.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Puffed Wheat,Q,C,50,2,0,0,1.0,10.0,0.0,50.0,0,3,0.50,1.00,63.005645,0.0,False
Puffed Rice,Q,C,50,1,0,0,0.0,13.0,0.0,15.0,0,3,0.50,1.00,60.756112,0.0,False
Cream of Wheat (Quick),N,H,100,3,0,80,1.0,21.0,0.0,,0,2,1.00,1.00,64.533816,0.0,False
Shredded Wheat 'n'Bran,N,C,90,3,0,0,4.0,19.0,0.0,140.0,0,1,1.00,0.67,74.472949,0.0,False


In [61]:
df.sort_values(['sugars', 'fat'], ascending=False)

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double,high_fat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Smacks,K,C,110,2,1,70,1.0,9.0,15.0,40.0,25,2,1.00,0.75,31.230054,30.0,False
Golden Crisp,P,C,100,2,0,45,0.0,11.0,15.0,40.0,25,1,1.00,0.88,35.252444,30.0,False
Post Nat. Raisin Bran,P,C,120,3,1,200,6.0,11.0,14.0,260.0,25,3,1.33,0.67,37.840594,28.0,False
Total Raisin Bran,G,C,140,3,1,190,4.0,15.0,14.0,230.0,100,3,1.50,1.00,28.592785,28.0,False
Apple Jacks,K,C,110,2,0,125,1.0,11.0,14.0,30.0,25,2,1.00,1.00,33.174094,28.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Puffed Wheat,Q,C,50,2,0,0,1.0,10.0,0.0,50.0,0,3,0.50,1.00,63.005645,0.0,False
Shredded Wheat,N,C,80,2,0,0,3.0,16.0,0.0,95.0,0,1,0.83,1.00,68.235885,0.0,False
Shredded Wheat 'n'Bran,N,C,90,3,0,0,4.0,19.0,0.0,140.0,0,1,1.00,0.67,74.472949,0.0,False
Shredded Wheat spoon size,N,C,90,3,0,0,3.0,20.0,0.0,120.0,0,1,1.00,0.67,72.801787,0.0,False


In [62]:
df.sort_values(['sugars', 'fat'], ascending= [False, True])

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double,high_fat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Golden Crisp,P,C,100,2,0,45,0.0,11.0,15.0,40.0,25,1,1.00,0.88,35.252444,30.0,False
Smacks,K,C,110,2,1,70,1.0,9.0,15.0,40.0,25,2,1.00,0.75,31.230054,30.0,False
Apple Jacks,K,C,110,2,0,125,1.0,11.0,14.0,30.0,25,2,1.00,1.00,33.174094,28.0,False
Post Nat. Raisin Bran,P,C,120,3,1,200,6.0,11.0,14.0,260.0,25,3,1.33,0.67,37.840594,28.0,False
Total Raisin Bran,G,C,140,3,1,190,4.0,15.0,14.0,230.0,100,3,1.50,1.00,28.592785,28.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Puffed Wheat,Q,C,50,2,0,0,1.0,10.0,0.0,50.0,0,3,0.50,1.00,63.005645,0.0,False
Shredded Wheat,N,C,80,2,0,0,3.0,16.0,0.0,95.0,0,1,0.83,1.00,68.235885,0.0,False
Shredded Wheat 'n'Bran,N,C,90,3,0,0,4.0,19.0,0.0,140.0,0,1,1.00,0.67,74.472949,0.0,False
Shredded Wheat spoon size,N,C,90,3,0,0,3.0,20.0,0.0,120.0,0,1,1.00,0.67,72.801787,0.0,False


## SORT BY INDEX

In [63]:
df.head()

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double,high_fat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6.0,280.0,25,3,1.0,0.33,68.402973,12.0,False
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8.0,135.0,0,3,1.0,1.0,33.983679,16.0,True
All-Bran,K,C,70,4,1,260,9.0,7.0,5.0,320.0,25,3,1.0,0.33,59.425505,10.0,False
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0.0,330.0,25,3,1.0,0.5,93.704912,0.0,False
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843,16.0,False


In [64]:
df.sort_index(ascending=False)

Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double,high_fat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Wheaties Honey Gold,G,C,110,2,1,200,1.0,16.0,8.0,60.0,25,1,1.0,0.75,36.187559,16.0,False
Wheaties,G,C,100,3,1,200,3.0,17.0,3.0,110.0,25,1,1.0,1.00,51.592193,6.0,False
Wheat Chex,R,C,100,3,1,230,3.0,17.0,3.0,115.0,25,1,1.0,0.67,49.787445,6.0,False
Trix,G,C,110,1,1,140,0.0,13.0,12.0,25.0,25,2,1.0,1.00,27.753301,24.0,False
Triples,G,C,110,2,1,250,0.0,21.0,3.0,60.0,25,3,1.0,0.75,39.106174,6.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843,16.0,False
All-Bran with Extra Fiber,K,C,50,4,0,140,14.0,8.0,0.0,330.0,25,3,1.0,0.50,93.704912,0.0,False
All-Bran,K,C,70,4,1,260,9.0,7.0,5.0,320.0,25,3,1.0,0.33,59.425505,10.0,False
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8.0,135.0,0,3,1.0,1.00,33.983679,16.0,True


## Filtering Data using data mask
### Data mask provides Boolean values from conditional statements


## Bitwise Operators

## & --> and 
## | --> or
## ~ --> Complement

In [67]:
df.shape

(77, 17)

In [66]:
over_sugar = df[df.sugars_double > 10]
print(over_sugar.shape)
over_sugar.head()

(46, 17)


Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double,high_fat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6.0,280.0,25,3,1.0,0.33,68.402973,12.0,False
100% Natural Bran,Q,C,120,3,5,15,2.0,8.0,8.0,135.0,0,3,1.0,1.0,33.983679,16.0,True
Almond Delight,R,C,110,2,2,200,1.0,14.0,8.0,,25,3,1.0,0.75,34.384843,16.0,False
Apple Cinnamon Cheerios,G,C,110,2,2,180,1.5,10.5,10.0,70.0,25,1,1.0,0.75,29.509541,20.0,False
Apple Jacks,K,C,110,2,0,125,1.0,11.0,14.0,30.0,25,2,1.0,1.0,33.174094,28.0,False


In [73]:
over_sugar2 = df[(df.sugars_double > 10) | (df.fat > 4)].sort_values('potass', ascending = False)
print(over_sugar2.shape)
over_sugar2.head()

(46, 17)


Unnamed: 0_level_0,mfr,type,calories,protein,fat,sodium,fiber,carbo,sugars,potass,vitamins,shelf,weight,cups,rating,sugars_double,high_fat
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100% Bran,N,C,70,4,1,130,10.0,5.0,6.0,280.0,25,3,1.0,0.33,68.402973,12.0,False
Post Nat. Raisin Bran,P,C,120,3,1,200,6.0,11.0,14.0,260.0,25,3,1.33,0.67,37.840594,28.0,False
Raisin Bran,K,C,120,3,1,210,5.0,14.0,12.0,240.0,25,2,1.33,0.75,39.259197,24.0,False
Total Raisin Bran,G,C,140,3,1,190,4.0,15.0,14.0,230.0,100,3,1.5,1.0,28.592785,28.0,False
Fruit & Fibre Dates; Walnuts; and Oats,P,C,120,3,2,160,5.0,12.0,10.0,200.0,25,3,1.25,0.67,40.917047,20.0,False
