# Load the Titanic dataset.

In [1]:
import pandas as pd
titanic=pd.read_csv("titanic.csv")
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


# Check missing values using .isnull().sum().


In [2]:
titanic.isnull().sum()

survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64

# Fill missing Age with median, and Embarked with mode.

In [3]:
titanic['age']=titanic['age'].fillna(titanic['age'].mean())
print(titanic['age'].mean())
print("Missing values AFTER:", titanic['age'].isnull().sum())

29.69911764705882
Missing values AFTER: 0


In [4]:
titanic['embarked']=titanic['embarked'].fillna(titanic['embarked'].mode()[0])
print(titanic['embarked'].mode()[0])
print("Missing values AFTER:", titanic['age'].isnull().mode()[0])

S
Missing values AFTER: False


# Removing Duplicates

In [5]:
print("duplicates:",titanic.duplicated().sum())


duplicates: 107


In [6]:
titanic=titanic.drop_duplicates()

In [7]:
print(titanic.duplicated().sum())

0


In [8]:
titanic.shape

(784, 15)

# Selecting Data

## Select only the columns who, Age, and Fare.  

In [14]:
combined=titanic[['who','age','fare']].head(10)
combined.head(10)

Unnamed: 0,who,age,fare
0,man,22.0,7.25
1,woman,38.0,71.2833
2,woman,26.0,7.925
3,woman,35.0,53.1
4,man,35.0,8.05
5,man,29.699118,8.4583
6,man,54.0,51.8625
7,child,2.0,21.075
8,woman,27.0,11.1333
9,child,14.0,30.0708


# Filtering Data

## Find all male passengers younger than 18.



In [21]:
conditional_age= (titanic['age']<18 )& (titanic['sex']=='male')
print(conditional_age)

0      False
1      False
2      False
3      False
4      False
       ...  
885    False
887    False
888    False
889    False
890    False
Length: 784, dtype: bool


In [22]:
filtered_data = titanic[conditional_age]
print(filtered_data[['age', 'sex']])

       age   sex
7     2.00  male
16    2.00  male
50    7.00  male
59   11.00  male
63    4.00  male
78    0.83  male
86   16.00  male
125  12.00  male
138  16.00  male
163  17.00  male
164   1.00  male
165   9.00  male
171   4.00  male
182   9.00  male
183   1.00  male
193   3.00  male
220  16.00  male
261   3.00  male
266  16.00  male
278   7.00  male
282  16.00  male
305   0.92  male
333  16.00  male
340   2.00  male
348   3.00  male
352  15.00  male
386   1.00  male
407   3.00  male
433  17.00  male
445   4.00  male
480   9.00  male
489   9.00  male
532  17.00  male
549   8.00  male
550  17.00  male
574  16.00  male
683  14.00  male
686  14.00  male
721  17.00  male
731  11.00  male
746  16.00  male
751   6.00  male
755   0.67  male
764  16.00  male
787   8.00  male
788   1.00  male
791  16.00  male
802  11.00  male
803   0.42  male
819  10.00  male
824   2.00  male
827   1.00  male
831   0.83  male
841  16.00  male
850   4.00  male
869   4.00  male


## Find all female passengers with Fare greater than 50.

In [24]:
female=(titanic['sex']=='female')& (titanic['fare']>50)
conditions=titanic[female]
print(conditions[['sex','fare']])

        sex      fare
1    female   71.2833
3    female   53.1000
31   female  146.5208
52   female   76.7292
61   female   80.0000
..      ...       ...
835  female   83.1583
849  female   89.1042
856  female  164.8667
871  female   52.5542
879  female   83.1583

[84 rows x 2 columns]


# Combined Filtering

## From the Titanic dataset, filter passengers who are:

-Female and above 40 years old

 -OR Male with Fare > 100

In [26]:
combination= (titanic['sex']=='female') | ((titanic['sex']=='male') & (titanic['fare']>100))

In [29]:
combined_filter= titanic[combination]
print(combined_filter[['sex','fare']].head(30))

       sex      fare
1   female   71.2833
2   female    7.9250
3   female   53.1000
8   female   11.1333
9   female   30.0708
10  female   16.7000
11  female   26.5500
14  female    7.8542
15  female   16.0000
18  female   18.0000
19  female    7.2250
22  female    8.0292
24  female   21.0750
25  female   31.3875
27    male  263.0000
28  female    7.8792
31  female  146.5208
32  female    7.7500
38  female   18.0000
39  female   11.2417
40  female    9.4750
41  female   21.0000
43  female   41.5792
44  female    7.8792
49  female   17.8000
52  female   76.7292
53  female   26.0000
56  female   10.5000
58  female   27.7500
61  female   80.0000
