# Thank you to Titanic Encyclopedia for the dataset!

### Source: https://www.encyclopedia-titanica.org/

In [1]:
#Import dependencies
import os, glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate

In [2]:
#Pulling data from CSVs
titanic_manifest = "WebVisualizations/Resources/Titanic Passengers and Crew Complete List.csv"

In [3]:
#Read files into pandas and remove title row (2479x16 w/o removal) 2477 x 16
full = pd.read_csv(titanic_manifest, encoding="utf-8", header=1, skipfooter=1, engine ='python')
full.head()

Unnamed: 0,Name,Born,Died,Age,Gender,Class/Dept,Ticket,Fare,Cabin,Joined,Occupation,Survived?,Boat,Body,Nationality,URL
0,"ABBING, Mr Anthony",1870.0,1912.0,41,Male,3rd Class Passenger,5547,£7 11s,,Southampton,Blacksmith,LOST,,,American,https://www.encyclopedia-titanica.org/titanic-...
1,"ABBOTT, Mr Ernest Owen",1891.0,1912.0,21,Male,Victualling Crew,,,,Southampton,Lounge Pantry Steward,LOST,,,English,https://www.encyclopedia-titanica.org/titanic-...
2,"ABBOTT, Mr Eugene Joseph",1899.0,1912.0,13,Male,3rd Class Passenger,CA2673,£20 5s,,Southampton,Scholar,LOST,,,American,https://www.encyclopedia-titanica.org/titanic-...
3,"ABBOTT, Mr Rossmore Edward",1896.0,1912.0,16,Male,3rd Class Passenger,CA2673,£20 5s,,Southampton,Jeweller,LOST,,190.0,"English, American",https://www.encyclopedia-titanica.org/titanic-...
4,"ABBOTT, Mrs Rhoda Mary 'Rosa'",1873.0,1946.0,39,Female,3rd Class Passenger,CA2673,£20 5s,,Southampton,,SAVED,A,,English,https://www.encyclopedia-titanica.org/titanic-...


## Organize Data

In [4]:
#Drop extra data columns
columns = ['Born',
           'Died',
           'Ticket',
           'Fare',
           'Cabin',
           'Occupation',
           'Boat',
           'Body',
           'URL']

full_drop = full.drop(columns, axis=1)#, inplace=True)
full_drop.head()

Unnamed: 0,Name,Age,Gender,Class/Dept,Joined,Survived?,Nationality
0,"ABBING, Mr Anthony",41,Male,3rd Class Passenger,Southampton,LOST,American
1,"ABBOTT, Mr Ernest Owen",21,Male,Victualling Crew,Southampton,LOST,English
2,"ABBOTT, Mr Eugene Joseph",13,Male,3rd Class Passenger,Southampton,LOST,American
3,"ABBOTT, Mr Rossmore Edward",16,Male,3rd Class Passenger,Southampton,LOST,"English, American"
4,"ABBOTT, Mrs Rhoda Mary 'Rosa'",39,Female,3rd Class Passenger,Southampton,SAVED,English


In [5]:
#Add columns
full_drop['Type']= ""
full_drop['PersonID'] = full_drop.index + 1
full_drop.head()

Unnamed: 0,Name,Age,Gender,Class/Dept,Joined,Survived?,Nationality,Type,PersonID
0,"ABBING, Mr Anthony",41,Male,3rd Class Passenger,Southampton,LOST,American,,1
1,"ABBOTT, Mr Ernest Owen",21,Male,Victualling Crew,Southampton,LOST,English,,2
2,"ABBOTT, Mr Eugene Joseph",13,Male,3rd Class Passenger,Southampton,LOST,American,,3
3,"ABBOTT, Mr Rossmore Edward",16,Male,3rd Class Passenger,Southampton,LOST,"English, American",,4
4,"ABBOTT, Mrs Rhoda Mary 'Rosa'",39,Female,3rd Class Passenger,Southampton,SAVED,English,,5


In [6]:
#Rename columns
full_rename = full_drop.rename(columns={'Class/Dept': 'ClassDept','Survived?': 'Survived'})#, inplace=True)
full_rename.head()

Unnamed: 0,Name,Age,Gender,ClassDept,Joined,Survived,Nationality,Type,PersonID
0,"ABBING, Mr Anthony",41,Male,3rd Class Passenger,Southampton,LOST,American,,1
1,"ABBOTT, Mr Ernest Owen",21,Male,Victualling Crew,Southampton,LOST,English,,2
2,"ABBOTT, Mr Eugene Joseph",13,Male,3rd Class Passenger,Southampton,LOST,American,,3
3,"ABBOTT, Mr Rossmore Edward",16,Male,3rd Class Passenger,Southampton,LOST,"English, American",,4
4,"ABBOTT, Mrs Rhoda Mary 'Rosa'",39,Female,3rd Class Passenger,Southampton,SAVED,English,,5


In [7]:
#Format column order to LAUSD Template
full_order = full_rename[['PersonID',
                          'Name',
                          'Age',
                          'Gender',
                          'Nationality',
                          'Joined',
                          'ClassDept',
                          'Type',
                          'Survived']]
full_order.head()

Unnamed: 0,PersonID,Name,Age,Gender,Nationality,Joined,ClassDept,Type,Survived
0,1,"ABBING, Mr Anthony",41,Male,American,Southampton,3rd Class Passenger,,LOST
1,2,"ABBOTT, Mr Ernest Owen",21,Male,English,Southampton,Victualling Crew,,LOST
2,3,"ABBOTT, Mr Eugene Joseph",13,Male,American,Southampton,3rd Class Passenger,,LOST
3,4,"ABBOTT, Mr Rossmore Edward",16,Male,"English, American",Southampton,3rd Class Passenger,,LOST
4,5,"ABBOTT, Mrs Rhoda Mary 'Rosa'",39,Female,English,Southampton,3rd Class Passenger,,SAVED


### Edit & Plot Ages

In [8]:
#Find unique ages
full_order.Age.unique()

array(['41', '21', '13', '16', '39', '25', '30', '28', '19', '34', '45',
       '38', '27', '26', '20', '40', '24', '31', '37', '10m', '2', '5',
       '3', '23', '35', '22', '29', '11m', '36', '43', '17', '33', '42',
       '50', '4', '6', '9', '11', '62', '53', '18', '71', '57', '47',
       '32', '15', '44', '46', '52', '58', '56', '48', '55', '1', '12',
       '51', '59', '61', nan, '60', '7', '10', '14', '54', '49', '8',
       '64', '66', '70', '4m', '2m', '65', '63', '7m', '9m', '69', '67',
       '74', '5m'], dtype=object)

In [9]:
#
def edit_ages (months):
    if 'm' in months:
        return ('Not Onboard')

In [10]:
#Convert Age months to decimals by multipying month number by 12 and roundings up
replace_ages = {'2m' : 0.08,
                '4m' : 0.33,
                '5m' : 0.42,
                '7m' : 0.58,
                '9m' : 0.75,
                '10m' : 0.83,
                '11m' : 0.92,}                                                                                          

full_age = full_order.replace({"Age": replace_ages})
full_age

Unnamed: 0,PersonID,Name,Age,Gender,Nationality,Joined,ClassDept,Type,Survived
0,1,"ABBING, Mr Anthony",41,Male,American,Southampton,3rd Class Passenger,,LOST
1,2,"ABBOTT, Mr Ernest Owen",21,Male,English,Southampton,Victualling Crew,,LOST
2,3,"ABBOTT, Mr Eugene Joseph",13,Male,American,Southampton,3rd Class Passenger,,LOST
3,4,"ABBOTT, Mr Rossmore Edward",16,Male,"English, American",Southampton,3rd Class Passenger,,LOST
4,5,"ABBOTT, Mrs Rhoda Mary 'Rosa'",39,Female,English,Southampton,3rd Class Passenger,,SAVED
...,...,...,...,...,...,...,...,...,...
2472,2473,"ČULUMOVIć, Mr Jeso",17,Male,Croatian,Southampton,3rd Class Passenger,,LOST
2473,2474,"ḤANNā AL-HāJ, Mr Mansūr",35,Male,Syrian,Cherbourg,3rd Class Passenger,,LOST
2474,2475,"ḤANNā, Mr Mubārik Sulaymān Abī Āsī",27,Male,Syrian,Cherbourg,3rd Class Passenger,,SAVED
2475,2476,"ṬANNūS MU'AWWAD, Mr Ḥannā",34,Male,"Syrian,American",Cherbourg,3rd Class Passenger,,LOST


In [11]:
#Find unique ages
full_age.Age.unique()

array(['41', '21', '13', '16', '39', '25', '30', '28', '19', '34', '45',
       '38', '27', '26', '20', '40', '24', '31', '37', 0.83, '2', '5',
       '3', '23', '35', '22', '29', 0.92, '36', '43', '17', '33', '42',
       '50', '4', '6', '9', '11', '62', '53', '18', '71', '57', '47',
       '32', '15', '44', '46', '52', '58', '56', '48', '55', '1', '12',
       '51', '59', '61', nan, '60', '7', '10', '14', '54', '49', '8',
       '64', '66', '70', 0.33, 0.08, '65', '63', 0.58, 0.75, '69', '67',
       '74', 0.42], dtype=object)

In [12]:
#Remove rows with blank ages, which is 2450
remove_age = full_age.dropna(subset=['Age'])
remove_age

Unnamed: 0,PersonID,Name,Age,Gender,Nationality,Joined,ClassDept,Type,Survived
0,1,"ABBING, Mr Anthony",41,Male,American,Southampton,3rd Class Passenger,,LOST
1,2,"ABBOTT, Mr Ernest Owen",21,Male,English,Southampton,Victualling Crew,,LOST
2,3,"ABBOTT, Mr Eugene Joseph",13,Male,American,Southampton,3rd Class Passenger,,LOST
3,4,"ABBOTT, Mr Rossmore Edward",16,Male,"English, American",Southampton,3rd Class Passenger,,LOST
4,5,"ABBOTT, Mrs Rhoda Mary 'Rosa'",39,Female,English,Southampton,3rd Class Passenger,,SAVED
...,...,...,...,...,...,...,...,...,...
2472,2473,"ČULUMOVIć, Mr Jeso",17,Male,Croatian,Southampton,3rd Class Passenger,,LOST
2473,2474,"ḤANNā AL-HāJ, Mr Mansūr",35,Male,Syrian,Cherbourg,3rd Class Passenger,,LOST
2474,2475,"ḤANNā, Mr Mubārik Sulaymān Abī Āsī",27,Male,Syrian,Cherbourg,3rd Class Passenger,,SAVED
2475,2476,"ṬANNūS MU'AWWAD, Mr Ḥannā",34,Male,"Syrian,American",Cherbourg,3rd Class Passenger,,LOST


In [19]:
#Find person count per age
age_count = remove_age.groupby(["Age"]).count()["PersonID"]
age_count

Age
0.08    1
0.33    1
0.42    1
0.58    1
0.75    1
       ..
70      1
71      4
74      1
8       9
9       8
Name: PersonID, Length: 78, dtype: int64

In [20]:
#Find person count per age and survived status
age_survive = remove_age.groupby(["Age","Survived"]).size()
age_survive

Age   Survived
0.08  SAVED       1
0.33  LOST        1
0.42  SAVED       1
0.58  LOST        1
0.75  SAVED       1
                 ..
74    LOST        1
8     LOST        4
      SAVED       5
9     LOST        4
      SAVED       4
Length: 139, dtype: int64

In [None]:
#
time = study.groupby(["Drug Regimen","Timepoint"])
time_table = pd.DataFrame(time.size())

In [16]:
#
test_age = remove_age.groupby('Age')['Survived'].value_counts().unstack().fillna(0)
#test= full_age.groupby('Gender')['Survived'].value_counts()
test_age

Survived,LOST,SAVED
Age,Unnamed: 1_level_1,Unnamed: 2_level_1
0.08,0.0,1.0
0.33,1.0,0.0
0.42,0.0,1.0
0.58,1.0,0.0
0.75,0.0,1.0
...,...,...
70,1.0,0.0
71,4.0,0.0
74,1.0,0.0
8,4.0,5.0


In [17]:
#
ax = test_age[['Age','Survived']].plot(kind='bar', title ="test", figsize=(15, 10), legend=True, fontsize=12)
# ax.set_xlabel("Hour", fontsize=12)
# ax.set_ylabel("V", fontsize=12)
plt.show()

# 35]:
# #Create and format a scatter plot of weight and average tumor size of each mouse treated with Capomulin
# fig4, scatter = plt.subplots(figsize=(15,10))
# scatter = plt.scatter(averagedata["Weight (g)"],averagedata["Tumor Volume (mm3)"],color="orange",s=150,edgecolor="blue")
# plt.title("Capomulin Study Mice: Weight & Avg. Tumor Volume (mm3)",fontsize=25)
# plt.ylabel("Tumor Volume (mm3)",fontsize=16)
# plt.xlabel("Weight",fontsize=16)

# #Save Line Plot
# plt.tight_layout()
# plt.savefig("Images/scatterplot.png")
# plt.show()

KeyError: "None of [Index(['Age', 'Survived'], dtype='object', name='Survived')] are in the [columns]"

In [None]:
#Find unique genders
full_age.Gender.unique()

In [None]:
#Find unique genders
full_age.Survived.unique()

In [None]:
#Find unique count of persons, which is 2477 ('Name' has 10 duplciates, so we cannot use that column)
people = full_age["PersonID"].nunique()
people

In [None]:
test = full_age.groupby('Gender')['Survived'].value_counts().unstack().fillna(0)
#test= full_age.groupby('Gender')['Survived'].value_counts()
test

In [None]:
#test.plot.area(x='Gender', y='Survived')
colors = ['purple','navy']
explode = (0.1,0)
test.plot.pie(subplots=True,
                               figsize=(20,8),
                               colors = colors, explode = explode,
                               autopct="%.2f%%", textprops={'color':"w",'fontsize': 20})
plt.suptitle('Titanic Passengers & Crew: Survived by Gender',fontsize = 20)
#plt.xlabel('',fontsize = 50)
# plt.legend(title="Gender", loc='lower right',fontsize=7, bbox_to_anchor = (0.75, -01.0) )

plt.tight_layout()
# plt.savefig("Images/pandas_pie_plot.png")
# plt.show()

In [None]:
#Find and creat count table of persons based on gender
group_gender = full_age.groupby(["Gender",'Survived'])
group_gender

In [None]:
gender_table = pd.DataFrame(group_gender.size())
gender_table

In [None]:
full_gender = pd.DataFrame(gender_table.groupby(["Gender"]).count())
full_gender

In [None]:
group_gender = full_age.groupby(["PersonID","Gender"])
gender_table = pd.DataFrame(group_gender.size())
full_gender = pd.DataFrame(gender_table.groupby(["Gender"]).count())
full_gender.columns = ["Person Count"]
full_gender

In [None]:
# test =  full_age.groupby('Gender')['Survived'].count()
# test

test = pd.DataFrame({'Gender':'Male Female'.split(),
                     'Survived': 'LOST SAVED'.split()})

def sum_saved(grp):
    return np.sum(grp=='SAVED')
def sum_lost(grp):
    return np.sum(grp=='LOST')
def sum_nan(grp):
    return np.sum(grp=='nan')

result = test.agg({'Survived': [sum_saved, sum_lost,sum_nan]})
result.columns = [col[1] for col in result.columns]
print(result)

# summary = data.groupby('Gender').agg({'PersonID':['count'],
#                                       'Survied':['min','max','mean','median']})
# summary

# for status, group in Survived:

# full_age.groupby('Gender').apply(lambda x: pd.Series(dict(
#     sum_up=(x.B == 'up').sum(),
#     sum_down=(x.B == 'down').sum(),
#     over_200_up=((x.B == 'up') & (x.C > 200)).sum()

In [None]:
#
full_gender.plot.area(x='Gender', y='Person Count')

In [None]:
#Find unique Nationalities
full_age.Nationality.unique()

In [None]:
#2447
remove_nation = full_age.dropna(subset=['Nationality'])
remove_nation

In [None]:
full_age['Nationality']

In [None]:
#
#full_age.replace({'Nationality': {'[strings that contain ,]': 'Multi-Nationality'}})
#nations = remove_nation['Nationality'][remove_nation["Nationality"].str.contains(',')] = 'Multi-Nationality'
#nations = remove_nation.loc[remove_nation['Nationality'].str.contains(','), 'Nationality'] = 'Multi-Nationality'
full_age['Nationality'] = full_age['Nationality'].apply(lambda x: 'Multi-Nationality' if ',' in x else x)
#nations

In [None]:
#Find unique Joined ports (Trip order: Belfast-Ireland, Southampton-England, Cherbourg-France, Queenstown-Ireland)
full_age.Joined.unique()

In [None]:
#Find unique Class/Dept
full_age.ClassDept.unique()

In [None]:
#Define Person Type based on Class/Dept
def person_type (assigned):
    if 'Deserted' in assigned:
        return 'Not Onboard'
    
    elif 'Left' in assigned:
        return 'Not Onboard'
    
    elif 'Failed' in assigned:
        return 'Not Onboard'
    
    elif 'Officer' in assigned:
        return 'Officer'
    
    elif 'Victualling' in assigned:
        return 'Victualler'
    
    elif 'Engineering' in assigned:
        return 'Enginner'
    
    elif 'Restaurant Staff' in assigned:
        return 'Restaurant Staff'
    
    elif 'Deck Crew' in assigned:
        return 'Deck Crew'
    
    elif 'Musician' in assigned:
        return 'Musician'
    
    elif 'Servant' in assigned:
        return 'Servant'
    
    elif '1st Class' in assigned:
        return '1st Class'
    
    elif '2nd Class' in assigned:
        return '2nd Class'
    
    elif '3rd Class' in assigned:
        return '3rd Class'

In [None]:
#Apply Person Type based on Class/Dept
full_age['Type'] =full_age['ClassDept'].apply(person_type)
full_age

In [None]:
#Find unique Types
full_age.Type.unique()

In [None]:
# #Define Age and convert to -1
# def person_age (years):
#     if 'm' in years:
#         return '-1'

In [None]:
# #Apply Age conversion
# full_age =full_order['Age'].apply(person_age)
# full_age.head()
# TypeError: argument of type 'float' is not iterable