In [None]:
!pip install pandas numpy seaborn scikit-learn

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, KBinsDiscretizer

df = sns.load_dataset('titanic')

print("Basic Dataset Information:\n")
print(df.info())
print("\n First 5 Rows:\n", df.head())
print("\n Summary Statistics:\n", df.describe(include='all'))

print("\n🔍 Columns with Missing Values:\n", df.isnull().sum())
df_cleaned = df.drop(columns=['deck'])
df_cleaned['age'].fillna(df['age'].median(), inplace=True)
df_cleaned['embarked'].fillna(df['embarked'].mode()[0], inplace=True)
df_cleaned.dropna(inplace=True)
print("\n✅ After Handling Missing Values:\n", df_cleaned.isnull().sum())


print("\n📊 Handling Continuous Variables:")
scaler = MinMaxScaler()
df_cleaned[['age_norm', 'fare_norm']] = scaler.fit_transform(df_cleaned[['age', 'fare']])
print("\nNormalized Age and Fare (0 to 1):\n", df_cleaned[['age_norm', 'fare_norm']].head())

kbin = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy='uniform')
df_cleaned['age_binned'] = kbin.fit_transform(df_cleaned[['age']])
print("\nBinned Age Categories:\n", df_cleaned[['age', 'age_binned']].head())


print("\n✅ Final Cleaned Data Sample:\n", df_cleaned.head())