# 🛳 Titanic Data Analysis
Comprehensive Exploratory Data Analysis (EDA) of the Titanic dataset using **pandas, matplotlib, seaborn**.
---

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Global settings for plots
sns.set(style="whitegrid", palette="muted", font="Arial", font_scale=1.2)

# Display settings for pandas
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)


In [None]:

# Load Titanic dataset (make sure titanic.csv is in the data/ folder)
df = pd.read_csv("data/titanic.csv")
df.head()


In [None]:

print("Shape:", df.shape)
df.info()


In [None]:

# Statistical summary
df.describe(include="all")


In [None]:

# Fill missing values
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df['Fare'].fillna(df['Fare'].median(), inplace=True)

# Verify
df.isnull().sum()


In [None]:

plt.figure(figsize=(8,5))
sns.histplot(df['Age'], bins=30, kde=True, color="steelblue")
plt.title("Age Distribution")
plt.xlabel("Age")
plt.ylabel("Count")
plt.show()


In [None]:

plt.figure(figsize=(6,5))
sns.countplot(data=df, x="Sex", hue="Survived", palette="Set2")
plt.title("Survival by Gender")
plt.xlabel("Gender")
plt.ylabel("Count")
plt.legend(title="Survived", labels=["No", "Yes"])
plt.show()


In [None]:

plt.figure(figsize=(6,5))
sns.countplot(data=df, x="Pclass", hue="Survived", palette="Set1")
plt.title("Survival by Passenger Class")
plt.xlabel("Class")
plt.ylabel("Count")
plt.legend(title="Survived", labels=["No", "Yes"])
plt.show()


In [None]:

plt.figure(figsize=(10,7))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()


In [None]:

plt.figure(figsize=(10,6))
sns.violinplot(data=df, x="Sex", y="Age", hue="Survived", split=True, palette="muted")
plt.title("Survival by Age and Gender")
plt.xlabel("Gender")
plt.ylabel("Age")
plt.show()


In [None]:

plt.figure(figsize=(6,5))
sns.countplot(data=df, x="Embarked", hue="Survived", palette="pastel")
plt.title("Survival by Port of Embarkation")
plt.xlabel("Embarked")
plt.ylabel("Count")
plt.legend(title="Survived", labels=["No", "Yes"])
plt.show()


In [None]:

plt.figure(figsize=(8,5))
sns.histplot(df['Fare'], bins=40, kde=True, color="purple")
plt.title("Fare Distribution")
plt.xlabel("Fare")
plt.ylabel("Count")
plt.show()


In [None]:

sns.pairplot(df[['Survived','Pclass','Age','Fare']], hue="Survived", palette="husl")
plt.show()
