# 🧪 Exploratory Data Analysis: Titanic Dataset

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load train data
df = pd.read_csv("train.csv")
df.head()

In [None]:
# Initial Exploration
print("Shape :", df.shape)
df.info()
df.describe()
df.isnull().sum()

In [None]:
# Handling Missing Values
df['Age'] = df['Age'].fillna(df['Age'].median())
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
df.drop(columns =['Cabin'], inplace = True)

## 📊 Univariate Analysis

In [None]:
# Count of Survival 
sns.countplot(x = "Survived", data = df)
plt.title("Survival Count")
plt.show()

In [None]:
# Passenger Class
sns.countplot(x = "Pclass", data = df)
plt.title("Passenger Class Distribution")
plt.show()

In [None]:
# Gender
sns.countplot(x = "Sex", data = df)
plt.title("Gender Distribution")
plt.show()

In [None]:
# Histogram : Age Distribution
sns.histplot(df['Age'].dropna(), kde = True)
plt.title("Age Distribution")
plt.show()

In [None]:
# BoxPlot :Fare
sns.boxplot(x = "Fare", data = df)
plt.title("Fare Boxplot")
plt.show()

## 📈 Bivariate Analysis

In [None]:
# Survival by sex
sns.countplot(x = "Sex", hue = "Survived", data = df)
plt.title("Survival by Gender")
plt.show()

In [None]:
# BoxPlot : Age vs Survived
sns.boxplot(x ="Survived", y = "Age", data = df)
plt.title("Age vs Survived")
plt.show()

In [None]:
# Pclass vs Survived
sns.countplot(x = "Pclass", hue = "Survived", data = df)
plt.title("Survival by Passenger Class")
plt.show()

In [None]:
# ScatterPlot : Fare vs Age colored by Survival
sns.scatterplot(x = "Age", y = "Fare", hue = "Survived", data = df)
plt.title("Age vs Fare by Survival")
plt.show()

In [None]:
# Correlation Heatmap
plt.figure(figsize = (10, 6))
sns.heatmap(df.corr(numeric_only = True), annot = True, cmap = "coolwarm")
plt.title("Correlation Heatmap")
plt.show()

## 🔄 Multivariate Analysis

In [None]:
# Pairplot to see relationships between numeric variables
sns.pairplot(df[["Age", "Fare", "Pclass", "Survived"]], hue = "Survived")
plt.suptitle("Pairplot of Numerical Features by Survival", y =1.02)
plt.show()

In [None]:
# Survival by Class and Gender
sns.catplot(x="Pclass", hue="Sex", col="Survived", data=df, kind="count")
plt.suptitle("Survival by Class and Gender", y=1.05)
plt.show()