In [1]:
import numpy as np
import pandas as pd

# Creating Dataframes

In [2]:
# Create series from dictionary
pd.Series({'a': 1, 'b': 3, 'c': 7, 'd': 9}, name='number')

a    1
b    3
c    7
d    9
Name: number, dtype: int64

In [3]:
# Create dataframe from dictionary of dictionaries
# Each subdictionary is a column
pd.DataFrame({'number':{'a': 1, 'b': 3, 'c': 7, 'd': 9},
              'double':{'a': 2, 'b': 6, 'c': 14,'d': 18},
              'square':{'a': 1, 'b': 9, 'c': 49,'d': 81}})

Unnamed: 0,number,double,square
a,1,2,1
b,3,6,9
c,7,14,49
d,9,18,81


In [4]:
df = pd.read_csv('titanic.csv') # Loads dataframe from a csv file
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [5]:
# More options at https://pandas.pydata.org/docs/reference/io.html

# Selecting Data

In [6]:
df['survived'] # Returns column titled 'survived'
# We can usually use dot notation as well (e.g. df.survived)
# This doesn't work when the column name is a Python keyword or
# method name (e.g. df.class throws an error)

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: survived, Length: 891, dtype: int64

In [7]:
df[1:3] # Returns row 1, refers to index value

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True


In [8]:
df[df.survived==1] # Returns rows where survived==1
# df[df.who.isin(['woman','child'])] # Returns rows where who is in [woman, child]
# df[df.embark_town.notnull()] # Returns rows where embark_town is not null

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1,3,female,15.0,0,0,7.2250,C,Third,child,False,,Cherbourg,yes,True
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [9]:
df.iloc[0:2, 1:4] # Returns rows by index number (exclude end)
# df.iloc[[0,1],[1,2,3]] # Does the same thing

Unnamed: 0,pclass,sex,age
0,3,male,22.0
1,1,female,38.0


In [10]:
df.loc[0:1, 'pclass':'age'] # Gets rows by index value (include end)
# df.loc[[0,1],['pclass','sex','age']] # Does the same thing

Unnamed: 0,pclass,sex,age
0,3,male,22.0
1,1,female,38.0


# Data Exploration

In [11]:
df.head() # Returns first 5 rows
# df.head(n=10) # Returns first 10 rows
# df.tail(n=3) # Returns last 3 rows

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [12]:
df.shape # Returns number of rows and columns in dataframe

(891, 15)

In [13]:
df.info() # Prints index, datatype and memory information
# df.index.dtype # Returns datatype of index

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   survived     891 non-null    int64  
 1   pclass       891 non-null    int64  
 2   sex          891 non-null    object 
 3   age          714 non-null    float64
 4   sibsp        891 non-null    int64  
 5   parch        891 non-null    int64  
 6   fare         891 non-null    float64
 7   embarked     889 non-null    object 
 8   class        891 non-null    object 
 9   who          891 non-null    object 
 10  adult_male   891 non-null    bool   
 11  deck         203 non-null    object 
 12  embark_town  889 non-null    object 
 13  alive        891 non-null    object 
 14  alone        891 non-null    bool   
dtypes: bool(2), float64(2), int64(4), object(7)
memory usage: 92.4+ KB


In [14]:
df.describe() # Returns summary statistics for numerical columns

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
count,891.0,891.0,714.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,0.0,1.0,0.42,0.0,0.0,0.0
25%,0.0,2.0,20.125,0.0,0.0,7.9104
50%,0.0,3.0,28.0,0.0,0.0,14.4542
75%,1.0,3.0,38.0,1.0,0.0,31.0
max,1.0,3.0,80.0,8.0,6.0,512.3292


In [15]:
df.select_dtypes(include='number').corr() # Returns correlation between numerical and boolean columns

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
survived,1.0,-0.338481,-0.077221,-0.035322,0.081629,0.257307
pclass,-0.338481,1.0,-0.369226,0.083081,0.018443,-0.5495
age,-0.077221,-0.369226,1.0,-0.308247,-0.189119,0.096067
sibsp,-0.035322,0.083081,-0.308247,1.0,0.414838,0.159651
parch,0.081629,0.018443,-0.189119,0.414838,1.0,0.216225
fare,0.257307,-0.5495,0.096067,0.159651,0.216225,1.0


In [16]:
df.columns # Returns dataframe columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [17]:
df.who.unique() # Returns set of unique values for column
# df.who.value_counts() # Returns unique values with counts

array(['man', 'woman', 'child'], dtype=object)

# Data Manipulation

In [18]:
df.dropna() # Drops and returns all rows that contain null values
# df.dropna(axis=1) # Drops and returns all columns that contain null values

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
10,1,3,female,4.0,1,1,16.7000,S,Third,child,False,G,Southampton,yes,False
11,1,1,female,58.0,0,0,26.5500,S,First,woman,False,C,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False
872,0,1,male,33.0,0,0,5.0000,S,First,man,True,B,Southampton,no,True
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [19]:
df.fillna(0) # Replaces NaNs with x
# df.fillna(df.mean()) # Fills NaNs using column mean
# df.ffill() # Fills NaNs using previous row value
# df.bfill() # Fills NaNs using next row value

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,0,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,0,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,0,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,0,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,0.0,1,2,23.4500,S,Third,woman,False,0,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [20]:
df['class'].replace('Third','3rd') # Replaces matches with new string
# df.embark_town.str.match('Cher.*') # Returns regex matches of strings on column

0         3rd
1       First
2         3rd
3       First
4         3rd
        ...  
886    Second
887     First
888       3rd
889     First
890       3rd
Name: class, Length: 891, dtype: object

In [21]:
df.survived + df.pclass # Adds two columns
# Can use + - * / // % ** | & ~ > >= < <=

0      3
1      2
2      4
3      2
4      3
      ..
886    2
887    2
888    3
889    2
890    3
Length: 891, dtype: int64

In [22]:
df.survived.astype(float) # Changes type of column
# df.survived.astype(str) # Changes type of column
# df.survived.astype(np.dtype('ufloat64')) # Changes type of column

0      0.0
1      1.0
2      1.0
3      1.0
4      0.0
      ... 
886    0.0
887    1.0
888    0.0
889    1.0
890    0.0
Name: survived, Length: 891, dtype: float64

In [23]:
df.sex.str.upper() # Applies string function to column
# More string functions at https://pandas.pydata.org/pandas-docs/stable/user_guide/text.html

0        MALE
1      FEMALE
2      FEMALE
3      FEMALE
4        MALE
        ...  
886      MALE
887    FEMALE
888    FEMALE
889      MALE
890      MALE
Name: sex, Length: 891, dtype: object

In [24]:
df.apply(lambda x: x[0]) # Applies custom function to df columns (default axis is 'index')
# df.apply(lambda x: x['survived']+x['pclass'], axis='columns') # Applies custom function to df rows
# df.sex.map(lambda x: x[0]) # Applies custom function to series

survived                 0
pclass                   3
sex                   male
age                   22.0
sibsp                    1
parch                    0
fare                  7.25
embarked                 S
class                Third
who                    man
adult_male            True
deck                   NaN
embark_town    Southampton
alive                   no
alone                False
dtype: object

In [25]:
# df1.join(df2,on=col1,how='inner') # SQL-style joins the columns in df1 with the columns on df2
#                                   # where the rows for col1 have identical values.
#                                   # how can be one of 'left', 'right', 'outer', 'inner'
#                                   # TODO

In [26]:
# pd.concat([df1, df2],axis=1) # Adds the columns in df1 to the end of df2 (rows should be identical) #TODO

In [27]:
# df1.append(df2) # Adds the rows in df1 to the end of df2 (columns should be identical) #TODO
# pd.concat([df1,df2]) # Adds the rows in df1 to the end of df2 (columns should be identical)

In [28]:
# df.eval()  #TODO
# df.query() #TODO

In [29]:
#TODO Datetime https://strftime.org

In [30]:
df.sort_values(by='fare') # Sort dataframe by column
# df.sort_values(by=['fare', 'embark_town']) # Sort dataframe by multiple columns
# df.sort_index() # Sort dataframe by index value

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
271,1,3,male,25.0,0,0,0.0000,S,Third,man,True,,Southampton,yes,True
597,0,3,male,49.0,0,0,0.0000,S,Third,man,True,,Southampton,no,True
302,0,3,male,19.0,0,0,0.0000,S,Third,man,True,,Southampton,no,True
633,0,1,male,,0,0,0.0000,S,First,man,True,,Southampton,no,True
277,0,2,male,,0,0,0.0000,S,Second,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
438,0,1,male,64.0,1,4,263.0000,S,First,man,True,C,Southampton,no,False
341,1,1,female,24.0,3,2,263.0000,S,First,woman,False,C,Southampton,yes,False
737,1,1,male,35.0,0,0,512.3292,C,First,man,True,B,Cherbourg,yes,True
258,1,1,female,35.0,0,0,512.3292,C,First,woman,False,,Cherbourg,yes,True


# MultiIndex, Groupby, Pivot

In [31]:
df.set_index('survived') # Sets the index as survived

Unnamed: 0_level_0,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [32]:
df.set_index(['survived','pclass']) # Sets Multindex as [survived, pclass]

Unnamed: 0_level_0,Unnamed: 1_level_0,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
survived,pclass,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
0,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [33]:
df.reset_index() # Adds numeric index to df

Unnamed: 0,index,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [34]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [35]:
df.groupby('class')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x113090f10>

In [36]:
df.select_dtypes(include=np.number).columns

Index(['survived', 'pclass', 'age', 'sibsp', 'parch', 'fare'], dtype='object')

In [37]:
numeric_cols = df.select_dtypes(include=np.number).columns
df.groupby('class')[numeric_cols].mean() # Gets mean of numeric columns based on class
# .size(), .describe(), .first(), etc

Unnamed: 0_level_0,survived,pclass,age,sibsp,parch,fare
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
First,0.62963,1.0,38.233441,0.416667,0.356481,84.154687
Second,0.472826,2.0,29.87763,0.402174,0.380435,20.662183
Third,0.242363,3.0,25.14062,0.615071,0.393075,13.67555


In [38]:
df.groupby('class')[numeric_cols].agg(['mean', 'std']) # Aggregates multiple functions at once

Unnamed: 0_level_0,survived,survived,pclass,pclass,age,age,sibsp,sibsp,parch,parch,fare,fare
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
First,0.62963,0.484026,1.0,0.0,38.233441,14.802856,0.416667,0.611898,0.356481,0.693997,84.154687,78.380373
Second,0.472826,0.500623,2.0,0.0,29.87763,14.001077,0.402174,0.601633,0.380435,0.690963,20.662183,13.417399
Third,0.242363,0.428949,3.0,0.0,25.14062,12.495398,0.615071,1.374883,0.393075,0.888861,13.67555,11.778142


In [39]:
df.groupby('class').age.agg(lambda x: (x>30).sum()) # Aggregate class groups using custom function, how many people older than 30

class
First     125
Second     77
Third     103
Name: age, dtype: int64

In [40]:
# More info and methods at https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html
# Also look at https://jakevdp.github.io/PythonDataScienceHandbook/03.08-aggregation-and-grouping.html

In [41]:
df.pivot_table('survived', index='class', columns='sex') # Gets mean of survived based on class and sex

sex,female,male
class,Unnamed: 1_level_1,Unnamed: 2_level_1
First,0.968085,0.368852
Second,0.921053,0.157407
Third,0.5,0.135447


In [42]:
# More info and methods at https://jakevdp.github.io/PythonDataScienceHandbook/03.09-pivot-tables.html

In [43]:
df_pivot = df.pivot_table('survived', index=['class','embark_town'], columns=['sex','who'])
# df_pivot = df.pivot_table('survived', index=['class','embark_town'], columns=['sex','who'], aggfunc='sum')
df_pivot

Unnamed: 0_level_0,sex,female,female,male,male
Unnamed: 0_level_1,who,child,woman,child,man
class,embark_town,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
First,Cherbourg,,0.976744,,0.404762
First,Queenstown,,1.0,,0.0
First,Southampton,0.666667,0.977778,1.0,0.328947
Second,Cherbourg,1.0,1.0,1.0,0.111111
Second,Queenstown,,1.0,,0.0
Second,Southampton,1.0,0.898305,1.0,0.078652
Third,Cherbourg,0.818182,0.5,0.5,0.205128
Third,Queenstown,1.0,0.71875,0.0,0.085714
Third,Southampton,0.333333,0.385714,0.35,0.110204


In [44]:
df_pivot.stack() # Turns a column label into a row label

Unnamed: 0_level_0,Unnamed: 1_level_0,sex,female,male
class,embark_town,who,Unnamed: 3_level_1,Unnamed: 4_level_1
First,Cherbourg,man,,0.404762
First,Cherbourg,woman,0.976744,
First,Queenstown,man,,0.0
First,Queenstown,woman,1.0,
First,Southampton,child,0.666667,1.0
First,Southampton,man,,0.328947
First,Southampton,woman,0.977778,
Second,Cherbourg,child,1.0,1.0
Second,Cherbourg,man,,0.111111
Second,Cherbourg,woman,1.0,


In [45]:
df_pivot.unstack() # Turns a row label into a column label

sex,female,female,female,female,female,female,male,male,male,male,male,male
who,child,child,child,woman,woman,woman,child,child,child,man,man,man
embark_town,Cherbourg,Queenstown,Southampton,Cherbourg,Queenstown,Southampton,Cherbourg,Queenstown,Southampton,Cherbourg,Queenstown,Southampton
class,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
First,,,0.666667,0.976744,1.0,0.977778,,,1.0,0.404762,0.0,0.328947
Second,1.0,,1.0,1.0,1.0,0.898305,1.0,,1.0,0.111111,0.0,0.078652
Third,0.818182,1.0,0.333333,0.5,0.71875,0.385714,0.5,0.0,0.35,0.205128,0.085714,0.110204
