In [4]:
import pandas as pd
import numpy as np

# Load the dataset
titanic = pd.read_csv("titanic - titanic.csv")

# Fill missing values safely (without inplace warning)
titanic['Age'] = titanic['Age'].fillna(titanic['Age'].median())
titanic['Embarked'] = titanic['Embarked'].fillna(titanic['Embarked'].mode()[0])

# Create age categories
bins = [0, 13, 18, 40, 100]
labels = ['Child (0-13)', 'Teen (14-18)', 'Adult (19-40)', 'Senior (41-100)']
titanic['AgeCategory'] = pd.cut(titanic['Age'], bins=bins, labels=labels, include_lowest=True, right=True)

# Group by AgeCategory, Sex, Embarked, and Pclass
grouped = titanic.groupby(['AgeCategory', 'Sex', 'Embarked', 'Pclass'], observed=True)['Survived']

# Calculate survival probability for each group
survival_probs = grouped.mean().reset_index()
survival_probs.rename(columns={'Survived': 'Survival_Probability'}, inplace=True)
survival_probs['Survival_Probability'] = np.round(survival_probs['Survival_Probability'], 3)

# Display the result
print(survival_probs)


        AgeCategory     Sex Embarked  Pclass  Survival_Probability
0      Child (0-13)  female        C       2                 1.000
1      Child (0-13)  female        C       3                 0.857
2      Child (0-13)  female        S       1                 0.000
3      Child (0-13)  female        S       2                 1.000
4      Child (0-13)  female        S       3                 0.353
5      Child (0-13)    male        C       2                 1.000
6      Child (0-13)    male        C       3                 0.667
7      Child (0-13)    male        Q       3                 0.000
8      Child (0-13)    male        S       1                 1.000
9      Child (0-13)    male        S       2                 1.000
10     Child (0-13)    male        S       3                 0.389
11     Teen (14-18)  female        C       1                 1.000
12     Teen (14-18)  female        C       2                 1.000
13     Teen (14-18)  female        C       3                 0