In [3]:
import pandas as pd
import numpy as np
import seaborn as sns

# Load dataset
df = sns.load_dataset('titanic')

# Display first few rows
print("🔹 First 5 Rows of Dataset:")
print(df.head())

# ----------------------------
# 1. Basic Dataset Information
# ----------------------------
print("\n🔹 Dataset Info:")
print(df.info())

print("\n🔹 Summary Statistics:")
print(df.describe(include='all'))

# ----------------------------
# 2. Handle Missing Values
# ----------------------------
print("\n🔹 Missing Values:")
print(df.isnull().sum())

# Fill missing 'age' with median
df['age'].fillna(df['age'].median(), inplace=True)

# Fill missing 'embarked' with mode
df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)

# Drop rows where 'deck' is missing (optional)
df.drop(columns=['deck'], inplace=True)

# ----------------------------
# 3. Analyze Relationships
# ----------------------------

# Survival rate by gender
survival_by_gender = df.groupby('sex')['survived'].mean()
print("\n🔹 Survival Rate by Gender:")
print(survival_by_gender)

# Survival rate by class
survival_by_class = df.groupby('pclass')['survived'].mean()
print("\n🔹 Survival Rate by Class:")
print(survival_by_class)

# Average fare by embarkation point
avg_fare_by_embark = df.groupby('embarked')['fare'].mean()
print("\n🔹 Average Fare by Embarkation Point:")
print(avg_fare_by_embark)

# ----------------------------
# 4. Correlation Analysis
# ----------------------------
correlation_matrix = df.corr(numeric_only=True)
print("\n🔹 Correlation Matrix:")
print(correlation_matrix)

# ----------------------------
# 5. Actionable Insights (Example)
# ----------------------------
print("\n✅ Actionable Insights:")
print("- Females had a higher survival rate than males.")
print("- Passengers in higher classes (1st class) had better survival chances.")
print("- Fare tends to be higher for passengers who embarked from Cherbourg.")


🔹 First 5 Rows of Dataset:
   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  

🔹 Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       ---------

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['embarked'].fillna(df['embarked'].mode()[0], inplace=True)
