## Titanic Tragedy 
The RMS Titanic, a British luxury passenger liner, is infamous for sinking during its maiden voyage on April 15, 1912, after striking an iceberg in the North Atlantic. Launched in 1911 and completed in 1912, the Titanic was the largest ocean liner at the time, and it was a symbol of technological and luxurious advancements. The tragedy resulted in the deaths of over 1,500 passengers and crew members, making it one of the most tragic maritime disasters in history.

---

In [3]:
#load dataset
import numpy as np
import pandas as pd
titanic = pd.read_csv('dataset/titanic.csv')
titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [4]:
# How to find out total no. of records and columns  ?

In [5]:
titanic.shape

(891, 12)

In [6]:
# How to find out all missing values count in all columns ?

In [7]:
titanic.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [8]:
# How to show all missing Age records ?

In [9]:
titanic[titanic.Age.isnull()]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
17,18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13.0000,,S
19,20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.2250,,C
26,27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.2250,,C
28,29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.5500,,S
868,869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5000,,S
878,879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S


In [10]:
# How to show all missing Embarked records ?

In [11]:
titanic[titanic.Embarked.isnull()]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
61,62,1,1,"Icard, Miss. Amelie",female,38.0,0,0,113572,80.0,B28,
829,830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62.0,0,0,113572,80.0,B28,


In [12]:
# show total count survived or not survived passengers.

In [13]:
titanic[(titanic.Survived ==1) | (titanic.Survived == 0)].Survived.value_counts()

Survived
0    549
1    342
Name: count, dtype: int64

In [14]:
# how to find out total count survived male and female passengers?

In [15]:
titanic[titanic.Survived ==1].Sex.value_counts()

Sex
female    233
male      109
Name: count, dtype: int64

In [16]:
# how to find all passenger count which Fare = 0?

In [17]:
titanic[titanic.Fare == 0].shape

(15, 12)

In [18]:
# How many survived passenger count which Fare = 0?

In [19]:
titanic[(titanic.Survived == 1) & (titanic.Fare ==0)].shape

(1, 12)

In [20]:
# show maximum fare only 3 passenger list ?

In [21]:
titanic.sort_values(by = 'Fare' , ascending = False)[:3]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
258,259,1,1,"Ward, Miss. Anna",female,35.0,0,0,PC 17755,512.3292,,C
737,738,1,1,"Lesurer, Mr. Gustave J",male,35.0,0,0,PC 17755,512.3292,B101,C
679,680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36.0,0,1,PC 17755,512.3292,B51 B53 B55,C


In [22]:
# Only show 3 columns with maximum fare 3 pasenger list ? 

In [23]:
titanic.sort_values(by = 'Fare' , ascending = False)[:][["Name", "Survived",'Fare']][:3]
#or 
m = titanic.sort_values('Fare', ascending = False).index.tolist()[:3]
titanic.loc[m,['Name','Sex','Fare']]

Unnamed: 0,Name,Sex,Fare
258,"Ward, Miss. Anna",female,512.3292
737,"Lesurer, Mr. Gustave J",male,512.3292
679,"Cardeza, Mr. Thomas Drake Martinez",male,512.3292


In [24]:
# All missing Embarked data rows after dropping embarked column?

In [25]:
j = titanic[titanic.Embarked.isnull()]
j.dropna(axis = 'columns')    # axis =0 or 1 can also be used

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin
61,62,1,1,"Icard, Miss. Amelie",female,38.0,0,0,113572,80.0,B28
829,830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62.0,0,0,113572,80.0,B28


In [26]:
# All missing Age values replace wih NaN to -1 ?

In [27]:
k = titanic[titanic.Age.isnull()]
k.fillna(-1)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
5,6,0,3,"Moran, Mr. James",male,-1.0,0,0,330877,8.4583,-1,Q
17,18,1,2,"Williams, Mr. Charles Eugene",male,-1.0,0,0,244373,13.0000,-1,S
19,20,1,3,"Masselmani, Mrs. Fatima",female,-1.0,0,0,2649,7.2250,-1,C
26,27,0,3,"Emir, Mr. Farred Chehab",male,-1.0,0,0,2631,7.2250,-1,C
28,29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,-1.0,0,0,330959,7.8792,-1,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,0,3,"Razi, Mr. Raihed",male,-1.0,0,0,2629,7.2292,-1,C
863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,-1.0,8,2,CA. 2343,69.5500,-1,S
868,869,0,3,"van Melkebeke, Mr. Philemon",male,-1.0,0,0,345777,9.5000,-1,S
878,879,0,3,"Laleff, Mr. Kristo",male,-1.0,0,0,349217,7.8958,-1,S


In [28]:
# show all records which Fare between 0 to <100 ?

In [29]:
titanic[(titanic.Fare<100)]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [30]:
# consider you have only records which Fare is in between 0 to 100 , show all records having minimum and maximum Fare value(here multiple records can have minimum and maximum value)

In [31]:
import numpy as np
m = titanic[(titanic.Fare<100)]
m[(m.Fare==np.max(m.Fare))| (m.Fare==np.min(m.Fare))]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
179,180,0,3,"Leonard, Mr. Lionel",male,36.0,0,0,LINE,0.0,,S
263,264,0,1,"Harrison, Mr. William",male,40.0,0,0,112059,0.0,B94,S
271,272,1,3,"Tornquist, Mr. William Henry",male,25.0,0,0,LINE,0.0,,S
277,278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0.0,,S
302,303,0,3,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,LINE,0.0,,S
413,414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0.0,,S
466,467,0,2,"Campbell, Mr. William",male,,0,0,239853,0.0,,S
481,482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0.0,,S
520,521,1,1,"Perreault, Miss. Anne",female,30.0,0,0,12749,93.5,B73,S
597,598,0,3,"Johnson, Mr. Alfred",male,49.0,0,0,LINE,0.0,,S


In [32]:
# just above query using pandas 

In [33]:
#here m is having all records 0<Fare<100
m[(m.Fare.max()==m.Fare) | (m.Fare.min()==m.Fare) ]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
179,180,0,3,"Leonard, Mr. Lionel",male,36.0,0,0,LINE,0.0,,S
263,264,0,1,"Harrison, Mr. William",male,40.0,0,0,112059,0.0,B94,S
271,272,1,3,"Tornquist, Mr. William Henry",male,25.0,0,0,LINE,0.0,,S
277,278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0.0,,S
302,303,0,3,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,LINE,0.0,,S
413,414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0.0,,S
466,467,0,2,"Campbell, Mr. William",male,,0,0,239853,0.0,,S
481,482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0.0,,S
520,521,1,1,"Perreault, Miss. Anne",female,30.0,0,0,12749,93.5,B73,S
597,598,0,3,"Johnson, Mr. Alfred",male,49.0,0,0,LINE,0.0,,S


In [34]:
# same as above query

In [35]:
m[m.Fare.isin([m.Fare.max(),m.Fare.min()])]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
179,180,0,3,"Leonard, Mr. Lionel",male,36.0,0,0,LINE,0.0,,S
263,264,0,1,"Harrison, Mr. William",male,40.0,0,0,112059,0.0,B94,S
271,272,1,3,"Tornquist, Mr. William Henry",male,25.0,0,0,LINE,0.0,,S
277,278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0.0,,S
302,303,0,3,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,LINE,0.0,,S
413,414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0.0,,S
466,467,0,2,"Campbell, Mr. William",male,,0,0,239853,0.0,,S
481,482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0.0,,S
520,521,1,1,"Perreault, Miss. Anne",female,30.0,0,0,12749,93.5,B73,S
597,598,0,3,"Johnson, Mr. Alfred",male,49.0,0,0,LINE,0.0,,S
