### Step 1: Load Titanic Dataset

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('../datasets/titanic.csv')
df.head()

## Data Cleaning
### Step2: Data Exploration 

In [None]:
print("\n*****************show columns******************\n")
print(df.columns)
print("\n*****************show data information (Column, Non-Null Count, Dtype)******************\n")
print(df.info())

print("\n*****************Rows and columns count******************\n")
df.shape

In [None]:
print("\n*****************Check null values in all columns******************\n")
print(df.isnull().sum())
print("\n*****************Check data types of columns******************\n")
print(df.dtypes)



### Step3: Handle Missing Data

In [None]:
df['Age'].fillna(df['Age'].median(), inplace=True)
df

### Step 4: Handle Duplicates

In [None]:
df.duplicated().sum()
df.drop_duplicates(inplace=True)

### Step 5: Rename Columns and Drop columns

In [None]:
df.rename(columns={'Sex': 'Gender'}, inplace=True)
# df.drop(columns=['Cabin'], inplace=True)
df.head()

## Next Learning Part
### Step 6: Filtering rows in Pandas

In [None]:
# Passengers younger than 18
df[df['Age'] > 18].head()

In [None]:
# Passengers who paid more than 100
df[df['Fare'] > 100].head()

In [None]:
df[(df['Gender']=='female') & (df['Pclass']==1)].head()

### Step 7: Sorting in Pandas

In [None]:
# Sort by Fare - Highest to Lowest
df.sort_values(by='Fare', ascending=False).head()

In [None]:
# Sort by Age - Youngest to Oldest
df.sort_values(by='Age', ascending=True).head()

In [None]:
# Sort by multiple columns: Age ascending, Fare descending
df.sort_values(by=['Age', 'Fare'], ascending=[True, False]).head()

### Practice Exercises

In [None]:
# 1. Show top 10 passengers who paid the **highest Fare**.
df.sort_values(by='Fare', ascending=False).head(10)

In [None]:
# 2. Show 5 passengers with the **lowest Age**.
df.sort_values(by='Age', ascending=True).head()

In [None]:
# 3. Sort passengers by **Pclass**, then by **Fare descending**.
df.sort_values(by=['Pclass', 'Fare'], ascending=[True, False]).head(10)

### Step 8: GroupBy and Aggregation

In [None]:
#  1. Average Fare by Class
df.groupby('Pclass')['Fare'].mean()
# len(df.groupby('Pclass'))


In [None]:
# 2. Average Age by Gender
df.groupby('Gender')['Age'].mean()

In [None]:
# 3. Survivors by Class
df.groupby('Pclass')['Survived'].sum()

 #### Multiple Aggregations by Pclass

In [None]:
#  4. Multiple Aggregations by Pclass
df.groupby('Pclass').agg({
    'Fare': 'mean',
    'Age': 'median',
    'Survived': 'sum'
})

In [None]:
# 1. Count of Passengers by Gender and Embarked
df.groupby(['Gender', 'Embarked']).size()

In [None]:
# 2. Average Age and Fare by Gender and Pclass
df.groupby(['Gender', 'Pclass']).agg({
    'Age': 'mean',
    'Fare': 'mean'
}).reset_index()