In [None]:
import pandas as pd       # for handling dataframes
import numpy as np        # for numerical operations
import matplotlib.pyplot as plt  # for basic plotting
import seaborn as sns     # for beautiful graphs

# Set plot style
sns.set(style="darkgrid")
# Read the CSV file into a DataFrame
df = pd.read_csv("train.csv")

# Display the first 5 rows
df.head()



In [None]:
# Shows column names, non-null counts, and data types
df.info()

In [None]:
# Shows mean, min, max, etc., for numerical columns
df.describe()


In [None]:
# Shows how many null (missing) values each column has
df.isnull().sum()


In [None]:
# Count of survived vs not survived
sns.countplot(data=df,x='Survived')
plt.title("Survival Count (0 = No, 1 = Yes)")
plt.show()

In [None]:
# Compare survival between male and female
sns.countplot(data=df, x='Sex', hue='Survived')
plt.title("Survival by Gender")
plt.show()

In [None]:
# Plot histogram of passenger ages
plt.figure(figsize=(8,5))
sns.histplot(df['Age'].dropna(), kde=True, bins=30)
plt.title("Age Distribution")
plt.show()

In [None]:
# Survival rates by class
sns.countplot(data=df, x='Pclass', hue='Survived')
plt.title("Survival by Passenger Class")
plt.show()

In [None]:
# Compute correlation between numeric columns
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title("Feature Correlation")
plt.show()

In [21]:
# Fill missing 'Age' with median
df['Age'].fillna(df['Age'].median(), inplace=True)

# Fill missing 'Embarked' with the most common value
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
