# Titanic Survival Prediction

In [117]:
import pandas as pd

train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [118]:
df = train_df
df_test = test_df

def show(df, n=5, precision=2):
    return df.head(n).style.set_table_styles(
        [{'selector': 'th', 'props': [('background-color', 'lightblue'),  # Header styling
                                      ('color', 'black'),
                                      ('font-weight', 'bold'),
                                      ('text-align', 'center')]},  
         {'selector': 'td', 'props': [('border', '1px solid black'),  # Cell styling
                                      ('text-align', 'center'),
                                      ('font-size', '14px'),
                                      ('background-color', 'whitesmoke'),
                                      ('color', 'black')]}]  
    ).format(precision=precision)  # Set precision for float values

show(df, precision=2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.28,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.92,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [119]:
passenger_in_training = df['PassengerId'].count()
passenger_in_testing = df_test['PassengerId'].count()
total_passenger = passenger_in_training + passenger_in_testing

print(f"Titanic Passanger in Data Training: {passenger_in_training} people ({passenger_in_training / total_passenger * 100:.2f} %)")
print(f"Titanic Passanger in Data Testing: {passenger_in_testing} people ({passenger_in_testing / total_passenger * 100:.2f} %)")
print(f"Titanic Passanger in Total: {total_passenger} people")

Titanic Passanger in Data Training: 891 people (68.07 %)
Titanic Passanger in Data Testing: 418 people (31.93 %)
Titanic Passanger in Total: 1309 people


After gathering the total number of Titanic Passengers, we simply continue to compare the ratio of males to females,
knowing that most of the survivors back then were women due to the rule that prioritized women and children.

In [120]:
print("### In Data Training ###\n")

male_count = df[df['Sex'] == "male"].shape[0]
female_count = df[df['Sex'] == "female"].shape[0]
total_train = male_count + female_count

print(f"Titanic Passanger Male: {male_count} people ({male_count / total_train * 100:.2f} %)")
print(f"Titanic Passanger Female: {female_count} people ({female_count / total_train * 100:.2f} %)")

print("\n### In Data Testing ###\n")

male_count = df_test[df_test['Sex'] == "male"].shape[0]
female_count = df_test[df_test['Sex'] == "female"].shape[0]
total_test = male_count + female_count

print(f"Titanic Passanger Male: {male_count} people ({male_count / total_test * 100:.2f} %)")
print(f"Titanic Passanger Female: {female_count} people ({female_count / total_test * 100:.2f} %)")


### In Data Training ###

Titanic Passanger Male: 577 people (64.76 %)
Titanic Passanger Female: 314 people (35.24 %)

### In Data Testing ###

Titanic Passanger Male: 266 people (63.64 %)
Titanic Passanger Female: 152 people (36.36 %)


In [121]:
import pandas as pd

# Male and Female data for training
male_train = df[df['Sex'] == 'male']
female_train = df[df['Sex'] == 'female']

# Function to calculate total, survived count, and survival rate for training data
def calculate_metrics(df, is_train=True):
    total = len(df)
    survived_count = df['Survived'].sum() if is_train else 0
    survival_rate = f"{(survived_count / total * 100):.2f}%" if total > 0 else "0.00%"
    return total, survived_count, survival_rate

# Data for male_train, female_train
male_train_metrics = calculate_metrics(male_train)
female_train_metrics = calculate_metrics(female_train)

# Total Data
total_data = calculate_metrics(df)

# DataFrame for table
data = {
    'Total': [male_train_metrics[0], female_train_metrics[0], total_data[0]],
    'Survived Count': [male_train_metrics[1], female_train_metrics[1], total_data[1]],
    'Survival Rate': [male_train_metrics[2], female_train_metrics[2], total_data[2]],
    'Percentage': [f"{(male_train_metrics[0] / total_data[0]) * 100:.2f}%",
                   f"{(female_train_metrics[0] / total_data[0]) * 100:.2f}%",
                   f"{((male_train_metrics[0] + female_train_metrics[0]) / total_data[0]) * 100:.2f}%"]
}

df_summary = pd.DataFrame(data, index=['Male', 'Female', 'Total'])

show(df_summary, precision=2)

Unnamed: 0,Total,Survived Count,Survival Rate,Percentage
Male,577,109,18.89%,64.76%
Female,314,233,74.20%,35.24%
Total,891,342,38.38%,100.00%
