### Data Wrangling

In [61]:
import pandas as pd
import numpy as np

In [62]:
url = 'https://tinyurl.com/titanic-csv'

In [63]:
df = pd.read_csv(url, na_values=[np.nan,'None', -999])

In [64]:
df.head()

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
4,"Allison, Master Hudson Trevor",1st,0.92,male,1,0


In [65]:
df1 = pd.DataFrame()

In [66]:
df1['Name']=['Jacky Jackson', 'Steven Stephenson']

In [67]:
df1['Age']=[36, 25]

In [68]:
df1['Driver']=[True, False]

In [69]:
df1

Unnamed: 0,Name,Age,Driver
0,Jacky Jackson,36,True
1,Steven Stephenson,25,False


In [70]:
new_person = pd.Series(['Molly', 40, False], index=['Name', 'Age', 'Driver'])

In [71]:
df1.append(new_person, ignore_index=True)

Unnamed: 0,Name,Age,Driver
0,Jacky Jackson,36,True
1,Steven Stephenson,25,False
2,Molly,40,False


In [72]:
df.shape

(1313, 6)

In [73]:
df.describe()

Unnamed: 0,Age,Survived,SexCode
count,756.0,1313.0,1313.0
mean,30.397989,0.342727,0.351866
std,14.259049,0.474802,0.477734
min,0.17,0.0,0.0
25%,21.0,0.0,0.0
50%,28.0,0.0,0.0
75%,39.0,1.0,1.0
max,71.0,1.0,1.0


In [74]:
df.iloc[0]

Name        Allen, Miss Elisabeth Walton
PClass                               1st
Age                                   29
Sex                               female
Survived                               1
SexCode                                1
Name: 0, dtype: object

In [75]:
df.iloc[1:4]

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1


In [76]:
df['Name'].iloc[:4]

0                     Allen, Miss Elisabeth Walton
1                      Allison, Miss Helen Loraine
2              Allison, Mr Hudson Joshua Creighton
3    Allison, Mrs Hudson JC (Bessie Waldo Daniels)
Name: Name, dtype: object

In [77]:
df[df['Sex'] == 'female'].iloc[:4]

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
6,"Andrews, Miss Kornelia Theodosia",1st,63.0,female,1,1


In [78]:
df = df.set_index(df['Name'])

In [79]:
df.loc['Allen, Miss Elisabeth Walton']

Name        Allen, Miss Elisabeth Walton
PClass                               1st
Age                                   29
Sex                               female
Survived                               1
SexCode                                1
Name: Allen, Miss Elisabeth Walton, dtype: object

In [80]:
df[df['Sex']=='male'].head()

Unnamed: 0_level_0,Name,PClass,Age,Sex,Survived,SexCode
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Allison, Mr Hudson Joshua Creighton","Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
"Allison, Master Hudson Trevor","Allison, Master Hudson Trevor",1st,0.92,male,1,0
"Anderson, Mr Harry","Anderson, Mr Harry",1st,47.0,male,1,0
"Andrews, Mr Thomas, jr","Andrews, Mr Thomas, jr",1st,39.0,male,0,0
"Artagaveytia, Mr Ramon","Artagaveytia, Mr Ramon",1st,71.0,male,0,0


In [81]:
df[(df['Sex']=='female')&(df['Age']>50)].head()

Unnamed: 0_level_0,Name,PClass,Age,Sex,Survived,SexCode
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Andrews, Miss Kornelia Theodosia","Andrews, Miss Kornelia Theodosia",1st,63.0,female,1,1
"Appleton, Mrs Edward Dale (Charlotte Lamson)","Appleton, Mrs Edward Dale (Charlotte Lamson)",1st,58.0,female,1,1
"Bonnell, Miss Elizabeth","Bonnell, Miss Elizabeth",1st,58.0,female,1,1
"Brown, Mrs John Murray (Caroline Lane Lamson)","Brown, Mrs John Murray (Caroline Lane Lamson)",1st,59.0,female,1,1
"Bucknell, Mrs William Robert (Emma Eliza Ward)","Bucknell, Mrs William Robert (Emma Eliza Ward)",1st,60.0,female,1,1


### Replacing Values

In [82]:
df['Sex'].replace('female', 'woman').head()

Name
Allen, Miss Elisabeth Walton                     woman
Allison, Miss Helen Loraine                      woman
Allison, Mr Hudson Joshua Creighton               male
Allison, Mrs Hudson JC (Bessie Waldo Daniels)    woman
Allison, Master Hudson Trevor                     male
Name: Sex, dtype: object

#### Replace multiple values

In [83]:
df['Sex'].replace(['male', 'female'], ['man', 'woman']).head()

Name
Allen, Miss Elisabeth Walton                     woman
Allison, Miss Helen Loraine                      woman
Allison, Mr Hudson Joshua Creighton                man
Allison, Mrs Hudson JC (Bessie Waldo Daniels)    woman
Allison, Master Hudson Trevor                      man
Name: Sex, dtype: object

In [84]:
df.replace(r"1st", "First", regex=True).head()

Unnamed: 0_level_0,Name,PClass,Age,Sex,Survived,SexCode
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Allen, Miss Elisabeth Walton","Allen, Miss Elisabeth Walton",First,29.0,female,1,1
"Allison, Miss Helen Loraine","Allison, Miss Helen Loraine",First,2.0,female,0,1
"Allison, Mr Hudson Joshua Creighton","Allison, Mr Hudson Joshua Creighton",First,30.0,male,0,0
"Allison, Mrs Hudson JC (Bessie Waldo Daniels)","Allison, Mrs Hudson JC (Bessie Waldo Daniels)",First,25.0,female,0,1
"Allison, Master Hudson Trevor","Allison, Master Hudson Trevor",First,0.92,male,1,0


#### Renaming Columns

In [85]:
df.rename(columns={'PClass': 'Passenger Class', 'Sex': 'Gender'}).head()

Unnamed: 0_level_0,Name,Passenger Class,Age,Gender,Survived,SexCode
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Allen, Miss Elisabeth Walton","Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
"Allison, Miss Helen Loraine","Allison, Miss Helen Loraine",1st,2.0,female,0,1
"Allison, Mr Hudson Joshua Creighton","Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
"Allison, Mrs Hudson JC (Bessie Waldo Daniels)","Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
"Allison, Master Hudson Trevor","Allison, Master Hudson Trevor",1st,0.92,male,1,0


#### Finding Min, Max, Sum, Average and Count

In [86]:
print('Maximun: ',df['Age'].max())
print('Minimum: ', df['Age'].min())
print('Sum: ', df['Age'].sum())
print('Average: ', df['Age'].mean())
print('Count: ', df['Age'].count())
print('Standard Deviation: ', df['Age'].std())
print('Variance: ', df['Age'].var())
print('Kurtosis: ', df['Age'].kurt())
print('Skewness: ', df['Age'].skew())
print('Standard error of mean: ', df['Age'].sem())
print('Mode: ', df['Age'].mode())
print('Median: ', df['Age'].median())

Maximun:  71.0
Minimum:  0.17
Sum:  22980.88
Average:  30.397989417989415
Count:  756
Standard Deviation:  14.259048710359023
Variance:  203.32047012439133
Kurtosis:  -0.036536168924722556
Skewness:  0.36851087371648295
Standard error of mean:  0.5185965877244657
Mode:  0    22.0
dtype: float64
Median:  28.0


#### Finding Unique Values

In [87]:
df['Sex'].unique()

array(['female', 'male'], dtype=object)

In [88]:
df['Sex'].value_counts()

male      851
female    462
Name: Sex, dtype: int64

In [89]:
df['PClass'].value_counts()

3rd    711
1st    322
2nd    279
*        1
Name: PClass, dtype: int64

#### Handling Missing Values

In [90]:
df[df['Age'].isnull()].head()

Unnamed: 0_level_0,Name,PClass,Age,Sex,Survived,SexCode
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Aubert, Mrs Leontine Pauline","Aubert, Mrs Leontine Pauline",1st,,female,1,1
"Barkworth, Mr Algernon H","Barkworth, Mr Algernon H",1st,,male,1,0
"Baumann, Mr John D","Baumann, Mr John D",1st,,male,0,0
"Borebank, Mr John James","Borebank, Mr John James",1st,,male,0,0
"Bradley, Mr George","Bradley, Mr George",1st,,male,1,0


###### Change certain strings to missing values (NaN)

In [91]:
df['Sex']=df['Sex'].replace('male', np.nan)

#### Deleting a Column

In [92]:
df.drop('Age', axis=1).head()

Unnamed: 0_level_0,Name,PClass,Sex,Survived,SexCode
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Allen, Miss Elisabeth Walton","Allen, Miss Elisabeth Walton",1st,female,1,1
"Allison, Miss Helen Loraine","Allison, Miss Helen Loraine",1st,female,0,1
"Allison, Mr Hudson Joshua Creighton","Allison, Mr Hudson Joshua Creighton",1st,,0,0
"Allison, Mrs Hudson JC (Bessie Waldo Daniels)","Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,female,0,1
"Allison, Master Hudson Trevor","Allison, Master Hudson Trevor",1st,,1,0


In [95]:
df.drop(['Age', 'SexCode'], axis=1).head()

Unnamed: 0_level_0,Name,PClass,Sex,Survived
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Allen, Miss Elisabeth Walton","Allen, Miss Elisabeth Walton",1st,female,1
"Allison, Miss Helen Loraine","Allison, Miss Helen Loraine",1st,female,0
"Allison, Mr Hudson Joshua Creighton","Allison, Mr Hudson Joshua Creighton",1st,,0
"Allison, Mrs Hudson JC (Bessie Waldo Daniels)","Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,female,0
"Allison, Master Hudson Trevor","Allison, Master Hudson Trevor",1st,,1


In [96]:
#Deleting a column by its index

In [109]:
df.drop(df.columns[1], axis=1).head()

Unnamed: 0_level_0,Name,Age,Sex,Survived,SexCode
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Allen, Miss Elisabeth Walton","Allen, Miss Elisabeth Walton",29.0,female,1,1
"Allison, Miss Helen Loraine","Allison, Miss Helen Loraine",2.0,female,0,1
"Allison, Mr Hudson Joshua Creighton","Allison, Mr Hudson Joshua Creighton",30.0,,0,0
"Allison, Mrs Hudson JC (Bessie Waldo Daniels)","Allison, Mrs Hudson JC (Bessie Waldo Daniels)",25.0,female,0,1
"Allison, Master Hudson Trevor","Allison, Master Hudson Trevor",0.92,,1,0


#### Deleting a Row

In [111]:
df[df['Sex'] != 'male'].head()

Unnamed: 0_level_0,Name,PClass,Age,Sex,Survived,SexCode
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Allen, Miss Elisabeth Walton","Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
"Allison, Miss Helen Loraine","Allison, Miss Helen Loraine",1st,2.0,female,0,1
"Allison, Mr Hudson Joshua Creighton","Allison, Mr Hudson Joshua Creighton",1st,30.0,,0,0
"Allison, Mrs Hudson JC (Bessie Waldo Daniels)","Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,1
"Allison, Master Hudson Trevor","Allison, Master Hudson Trevor",1st,0.92,,1,0
