# Titanic Dataset - Exploratory Data Analysis (EDA)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
from google.colab import files
uploaded = files.upload()

df = pd.read_csv('train.csv')
df.head()


In [None]:
# Basic Info
df.info()


In [None]:
# Summary Statistics
df.describe(include='all')


In [None]:
# Check missing values
df.isnull().sum()


In [None]:
# Univariate Analysis - Survival Count
sns.countplot(x='Survived', data=df)
plt.title('Survival Count')
plt.show()


In [None]:
# Gender Distribution
sns.countplot(x='Sex', data=df)
plt.title('Gender Distribution')
plt.show()


In [None]:
# Pclass Distribution
sns.countplot(x='Pclass', data=df)
plt.title('Passenger Class Distribution')
plt.show()


In [None]:
# Age Distribution
plt.figure(figsize=(8,5))
sns.histplot(df['Age'].dropna(), kde=True, bins=30)
plt.title('Age Distribution')
plt.show()


In [None]:
# Bivariate - Survival by Gender
sns.countplot(x='Survived', hue='Sex', data=df)
plt.title('Survival by Gender')
plt.show()


In [None]:
# Bivariate - Survival by Pclass
sns.countplot(x='Survived', hue='Pclass', data=df)
plt.title('Survival by Passenger Class')
plt.show()


In [None]:
# Correlation Heatmap (numerical features only)
plt.figure(figsize=(10,8))
sns.heatmap(df.select_dtypes(include=[np.number]).corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


## ✅ Conclusion:
- Females had higher survival rate.
- 1st class passengers had higher survival.
- Age has some skew, with younger and older passengers differing in survival rates.
- Pclass, Sex, and Fare are correlated with survival.

This completes the Titanic dataset EDA.