# Data Manipulation and Summary Statistics with pandas

![](https://upload.wikimedia.org/wikipedia/commons/thumb/f/fd/RMS_Titanic_3.jpg/1920px-RMS_Titanic_3.jpg)

![](./img/Pandas_Cheat_Sheet.jpg)

In [1]:
import pandas as pd

In [2]:
#Load Data
PATH = "./data/titanic.csv"

df = pd.read_csv(PATH)

#Show the the count of rows and columns
df.shape

(891, 15)

#### Top 5 Rows
[Reference](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.head.html#pandas-dataframe-head)

In [3]:
df.head(5)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


#### Bottom 5 Rows
[Reference](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.tail.html#pandas-dataframe-tail)

In [4]:
df.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True


#### Columns

In [5]:
# Select a single columns
# Note: you can also use the dot operator "but it is not the default"
# df.
df["survived"]

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: survived, Length: 891, dtype: int64

In [6]:
# List of columns to select
col_ls = ["sex", "fare", "survived"]

# Subset the columns
df[col_ls]

Unnamed: 0,sex,fare,survived
0,male,7.2500,0
1,female,71.2833,1
2,female,7.9250,1
3,female,53.1000,1
4,male,8.0500,0
...,...,...,...
886,male,13.0000,0
887,female,30.0000,1
888,female,23.4500,0
889,male,30.0000,1


In [7]:
# Create a new column and set values to one
df["new_column"] = 1
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,1
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,1
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,1
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True,1


In [8]:
# Create a new column from other columns
df["new_column2"] = df["class"] + " " + df["embark_town"]
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column,new_column2
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,1,Third Southampton
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1,First Cherbourg
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,1,Third Southampton
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,1,First Southampton
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True,1,Third Southampton


In [9]:
# Renaming the column names

df.rename(columns= {'new_column': 'something'})


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,something,new_column2
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,1,Third Southampton
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1,First Cherbourg
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,1,Third Southampton
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,1,First Southampton
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,1,Third Southampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,1,Second Southampton
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,1,First Southampton
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,1,Third Southampton
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,1,First Cherbourg


In [10]:
# Renaming more than one column names
# add them in a dictionary 
# key --> old title
# value --> new title

df.rename(columns= {'new_column': 'something', 'new_column2': 'something2' })

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,something,something2
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,1,Third Southampton
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1,First Cherbourg
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,1,Third Southampton
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,1,First Southampton
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,1,Third Southampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,1,Second Southampton
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,1,First Southampton
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,1,Third Southampton
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,1,First Cherbourg


In [11]:
# Note: Another way to renaming more than one column 

edit_names = {'new_column': 'something', 'new_column2': 'something2'}

df.rename(columns= edit_names)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,something,something2
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,1,Third Southampton
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1,First Cherbourg
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,1,Third Southampton
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,1,First Southampton
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,1,Third Southampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,1,Second Southampton
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,1,First Southampton
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,1,Third Southampton
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,1,First Cherbourg


#### Slicing

- [loc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.loc.html)
    - Label based slicing ---> use the name of the column for selection
- [iloc](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html)
    - Index based slicing ---> use the index of the column for selection

In [12]:
# Grab first 5 rows
df[0:5]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column,new_column2
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,1,Third Southampton
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1,First Cherbourg
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,1,Third Southampton
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,1,First Southampton
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True,1,Third Southampton


In [13]:
# Look at bottom five rows
df[-5:]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column,new_column2
886,0,2,male,27.0,0,0,13.0,S,Second,man,True,,Southampton,no,True,1,Second Southampton
887,1,1,female,19.0,0,0,30.0,S,First,woman,False,B,Southampton,yes,True,1,First Southampton
888,0,3,female,,1,2,23.45,S,Third,woman,False,,Southampton,no,False,1,Third Southampton
889,1,1,male,26.0,0,0,30.0,C,First,man,True,C,Cherbourg,yes,True,1,First Cherbourg
890,0,3,male,32.0,0,0,7.75,Q,Third,man,True,,Queenstown,no,True,1,Third Queenstown


In [14]:
# Use index based slicing to grab first 5 rows and first 5 columns
df.iloc[0:5,0:5]

Unnamed: 0,survived,pclass,sex,age,sibsp
0,0,3,male,22.0,1
1,1,1,female,38.0,1
2,1,3,female,26.0,0
3,1,1,female,35.0,1
4,0,3,male,35.0,0


In [15]:
# Use index based slicing to grab last 5 rows and last 5 columns
df.iloc[-5:,-5:]

Unnamed: 0,embark_town,alive,alone,new_column,new_column2
886,Southampton,no,True,1,Second Southampton
887,Southampton,yes,True,1,First Southampton
888,Southampton,no,False,1,Third Southampton
889,Cherbourg,yes,True,1,First Cherbourg
890,Queenstown,no,True,1,Third Queenstown


In [16]:
# Use label based slicing to grab first 5 rows and first 5 columns "from survived until sibsp"
df.loc[0:5,"survived":"sibsp"]

Unnamed: 0,survived,pclass,sex,age,sibsp
0,0,3,male,22.0,1
1,1,1,female,38.0,1
2,1,3,female,26.0,0
3,1,1,female,35.0,1
4,0,3,male,35.0,0
5,0,3,male,,0


In [17]:
# Use label based slicing to grab "a range of columns" the last 5 rows and last 5 columns
df.loc[-5:,"adult_male":"alone"]

Unnamed: 0,adult_male,deck,embark_town,alive,alone
0,True,,Southampton,no,False
1,False,C,Cherbourg,yes,False
2,False,,Southampton,yes,True
3,False,C,Southampton,yes,False
4,True,,Southampton,no,True
...,...,...,...,...,...
886,True,,Southampton,no,True
887,False,B,Southampton,yes,True
888,False,,Southampton,no,False
889,True,C,Cherbourg,yes,True


In [18]:
# Use label based slicing to grab all the rows and specific columns 
# Note: the columns are arranged based on your listing NOT based on the order in the dataset

df.loc[:,["adult_male","alone", "survived"]]

Unnamed: 0,adult_male,alone,survived
0,True,False,0
1,False,False,1
2,False,True,1
3,False,False,1
4,True,True,0
...,...,...,...
886,True,True,0
887,False,True,1
888,False,False,0
889,True,True,1


## Sampling from the dataset 

## you can sample data by:

* specific number of rows
* specific fraction of rows 

In [19]:
df.shape

(891, 17)

In [20]:
# Note: Used to get a specific number of data points

df.sample(n = 100)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column,new_column2
137,0,1,male,37.0,1,0,53.1000,S,First,man,True,C,Southampton,no,False,1,First Southampton
278,0,3,male,7.0,4,1,29.1250,Q,Third,child,False,,Queenstown,no,False,1,Third Queenstown
830,1,3,female,15.0,1,0,14.4542,C,Third,child,False,,Cherbourg,yes,False,1,Third Cherbourg
695,0,2,male,52.0,0,0,13.5000,S,Second,man,True,,Southampton,no,True,1,Second Southampton
815,0,1,male,,0,0,0.0000,S,First,man,True,B,Southampton,no,True,1,First Southampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,0,1,female,50.0,0,0,28.7125,C,First,woman,False,C,Cherbourg,no,True,1,First Cherbourg
261,1,3,male,3.0,4,2,31.3875,S,Third,child,False,,Southampton,yes,False,1,Third Southampton
872,0,1,male,33.0,0,0,5.0000,S,First,man,True,B,Southampton,no,True,1,First Southampton
390,1,1,male,36.0,1,2,120.0000,S,First,man,True,B,Southampton,yes,False,1,First Southampton


In [21]:
# Note: Used to get a specific portion/percentage of data points

df.sample(frac = 0.3)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column,new_column2
435,1,1,female,14.0,1,2,120.0000,S,First,child,False,B,Southampton,yes,False,1,First Southampton
577,1,1,female,39.0,1,0,55.9000,S,First,woman,False,E,Southampton,yes,False,1,First Southampton
147,0,3,female,9.0,2,2,34.3750,S,Third,child,False,,Southampton,no,False,1,Third Southampton
676,0,3,male,24.5,0,0,8.0500,S,Third,man,True,,Southampton,no,True,1,Third Southampton
860,0,3,male,41.0,2,0,14.1083,S,Third,man,True,,Southampton,no,False,1,Third Southampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
823,1,3,female,27.0,0,1,12.4750,S,Third,woman,False,E,Southampton,yes,False,1,Third Southampton
455,1,3,male,29.0,0,0,7.8958,C,Third,man,True,,Cherbourg,yes,True,1,Third Cherbourg
297,0,1,female,2.0,1,2,151.5500,S,First,child,False,C,Southampton,no,False,1,First Southampton
380,1,1,female,42.0,0,0,227.5250,C,First,woman,False,,Cherbourg,yes,True,1,First Cherbourg


#### Filtering DataFrames

To Filter DataFrames based on codintional statement we will pass the condition into the DataFrame using the following syntax.

```python
my_condition = condition
df[my_condition]
```

In [22]:
# Filter DataFrame for rows of Males only (1 condition)

condition = df["sex"] == "male"

df[condition]


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column,new_column2
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,1,Third Southampton
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,1,Third Southampton
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True,1,Third Queenstown
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True,1,First Southampton
7,0,3,male,2.0,3,1,21.0750,S,Third,child,False,,Southampton,no,False,1,Third Southampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
883,0,2,male,28.0,0,0,10.5000,S,Second,man,True,,Southampton,no,True,1,Second Southampton
884,0,3,male,25.0,0,0,7.0500,S,Third,man,True,,Southampton,no,True,1,Third Southampton
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,1,Second Southampton
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,1,First Cherbourg


In [23]:
# Filter DataFrame for rows of Males and Survived only (2 conditions)
condition2 = (df["sex"] == "male") & (df["survived"] == 1)
df[condition2]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column,new_column2
17,1,2,male,,0,0,13.0000,S,Second,man,True,,Southampton,yes,True,1,Second Southampton
21,1,2,male,34.0,0,0,13.0000,S,Second,man,True,D,Southampton,yes,True,1,Second Southampton
23,1,1,male,28.0,0,0,35.5000,S,First,man,True,A,Southampton,yes,True,1,First Southampton
36,1,3,male,,0,0,7.2292,C,Third,man,True,,Cherbourg,yes,True,1,Third Cherbourg
55,1,1,male,,0,0,35.5000,S,First,man,True,C,Southampton,yes,True,1,First Southampton
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838,1,3,male,32.0,0,0,56.4958,S,Third,man,True,,Southampton,yes,True,1,Third Southampton
839,1,1,male,,0,0,29.7000,C,First,man,True,C,Cherbourg,yes,True,1,First Cherbourg
857,1,1,male,51.0,0,0,26.5500,S,First,man,True,E,Southampton,yes,True,1,First Southampton
869,1,3,male,4.0,1,1,11.1333,S,Third,child,False,,Southampton,yes,False,1,Third Southampton


In [24]:
# Filter DataFrame for rows of Males and Survived and Fare > 15 only (3 conditions)
# Only look at columns: age, class
condition3 = (df["sex"] == "male") & (df["survived"] == 1) & (df["fare"] > 15)

# apply the conditions --> return all the rows but only 2 columns (age and class)
df[condition3].loc[:, ["age", "class"]]
#df[condition3]

Unnamed: 0,age,class
23,28.00,First
55,,First
65,,Third
74,32.00,Third
78,0.83,Second
...,...,...
831,0.83,Second
838,32.00,Third
839,,First
857,51.00,First


In [41]:
# The total number of males who survived 
condition3 = (df["sex"] == "male") & (df["survived"] == 1) 

# Important Note: Why did the sum work in the column and did not work for other columns
df[condition3].loc[:, ["survived"]].sum()

survived    109
dtype: int64

In [42]:
# Note: this will give me the sum of all the coulmns (numerical and categorical) that satisfy condition3 

df[condition3].sum()

  df[condition3].sum()


survived                                                     109
pclass                                                       220
sex            malemalemalemalemalemalemalemalemalemalemalema...
age                                                      2536.67
sibsp                                                         42
parch                                                         39
fare                                                   4449.5418
embarked       SSSCSCSSSCSCSSSSSSSCCSSSSSSSSSSSQSSSSCSSSSSSSS...
class          SecondSecondFirstThirdFirstThirdThirdSecondThi...
who            manmanmanmanmanmanmanchildmanmanmanchildmanman...
adult_male                                                    88
embark_town    SouthamptonSouthamptonSouthamptonCherbourgSout...
alive          yesyesyesyesyesyesyesyesyesyesyesyesyesyesyesy...
alone                                                         64
new_column                                                   109
new_column2    Second Sou

In [27]:
# The total number of males who did not survive
condition3 = (df["sex"] == "male") & (df["survived"] == 0) 

df[condition3].loc[:, ["survived"]].count()

survived    468
dtype: int64

#### Numerical Summary Statistics
[Reference](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.describe.html)

In [28]:

df.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare,new_column
count,891.0,891.0,714.0,891.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208,1.0
std,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429,0.0
min,0.0,1.0,0.42,0.0,0.0,0.0,1.0
25%,0.0,2.0,20.125,0.0,0.0,7.9104,1.0
50%,0.0,3.0,28.0,0.0,0.0,14.4542,1.0
75%,1.0,3.0,38.0,1.0,0.0,31.0,1.0
max,1.0,3.0,80.0,8.0,6.0,512.3292,1.0


#### Categorical Summary Statistics
[Reference](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.describe.html)

In [29]:
df.describe(exclude="number")

Unnamed: 0,sex,embarked,class,who,adult_male,deck,embark_town,alive,alone,new_column2
count,891,889,891,891,891,203,889,891,891,889
unique,2,3,3,3,2,7,3,2,2,9
top,male,S,Third,man,True,C,Southampton,no,True,Third Southampton
freq,577,644,491,537,537,59,644,549,537,353


#### Info about the DataFrame

This method prints information about a DataFrame including the index dtype and columns, non-null values and memory usage. 

[Reference](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.info.html)

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 17 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   survived     891 non-null    int64  
 1   pclass       891 non-null    int64  
 2   sex          891 non-null    object 
 3   age          714 non-null    float64
 4   sibsp        891 non-null    int64  
 5   parch        891 non-null    int64  
 6   fare         891 non-null    float64
 7   embarked     889 non-null    object 
 8   class        891 non-null    object 
 9   who          891 non-null    object 
 10  adult_male   891 non-null    bool   
 11  deck         203 non-null    object 
 12  embark_town  889 non-null    object 
 13  alive        891 non-null    object 
 14  alone        891 non-null    bool   
 15  new_column   891 non-null    int64  
 16  new_column2  889 non-null    object 
dtypes: bool(2), float64(2), int64(5), object(8)
memory usage: 106.3+ KB


In [31]:
# How many missing values in each column

df.isnull().sum()


# Example on sub-sectioning for more accurate missing values replacement
# replace missing value with mean 
# two condition survived, sex

# 1st subset: survived = 0  sex = female, ---> mean
# 2nd subset: survived = 1  sex = female, ---> mean
# 3rd subset: survived = 0  sex = male, ---> mean
# 4th subset: survived = 1  sex = male, ---> mean



survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
new_column       0
new_column2      2
dtype: int64

#### Aggregation Methods

##### Refrences
- [Mean](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mean.html)
- [Median](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mean.html)
- [Mode](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mean.html)
- [Value Counts](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.value_counts.html)
- [Standard Deviation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.mean.html)
- [Sum](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sum.html)
- [Count](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.count.html)

In [32]:
# Mean
df["fare"].mean()

32.2042079685746

In [33]:
# Median
df["fare"].median()

14.4542

In [34]:
# Mode 
df["fare"].mode()

0    8.05
dtype: float64

In [35]:
# Value Counts (Value counts shows the counts of each values in your column)
df["class"].value_counts()

Third     491
First     216
Second    184
Name: class, dtype: int64

In [36]:
df["sex"].value_counts()

male      577
female    314
Name: sex, dtype: int64

In [37]:
df["survived"].value_counts()

0    549
1    342
Name: survived, dtype: int64

In [38]:
# Stanadard Deviation
df["fare"].std()

49.693428597180905

In [39]:
# Sum (Add all fares together)
df["fare"].sum()

28693.9493

In [40]:
# Count (Count all instances of a fare)
df["fare"].count()

891