# Importing Libraries

In [1]:
import pandas as pd
import numpy as np

# Generate Data

In [18]:
data = {
    'Name': ['  Alice  ', 'BOB', 'Charlie', 'alice', 'Bob'],
    'Age': ['25', '30', '35', '25', '30'],
    'Salary': [50000, 60000, 75000, 50000, 60000],
    'City': ['NYC', 'LA', 'SF', 'NYC', 'LA']
}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,Name,Age,Salary,City
0,Alice,25,50000,NYC
1,BOB,30,60000,LA
2,Charlie,35,75000,SF
3,alice,25,50000,NYC
4,Bob,30,60000,LA


# Data Type Conversion

In [23]:
display(df.dtypes)
df_cleaning = df.copy()
df_cleaning['Age'] = df_cleaning['Age'].astype('int64')

df_cleaning['City'] = df_cleaning['City'].astype('category')

display(df_cleaning.dtypes)



Name      object
Age       object
Salary     int64
City      object
dtype: object

Name        object
Age          int64
Salary       int64
City      category
dtype: object

# String Operations

In [24]:
df_cleaning['Name'] = df_cleaning['Name'].str.lower().str.strip()

df_cleaning.head()

Unnamed: 0,Name,Age,Salary,City
0,alice,25,50000,NYC
1,bob,30,60000,LA
2,charlie,35,75000,SF
3,alice,25,50000,NYC
4,bob,30,60000,LA


# Remove Duplicate

In [27]:
df_cleaning.duplicated().sum()

df_cleaning.drop_duplicates(inplace=True)

df_cleaning.reset_index(drop=True, inplace=True)

df_cleaning.head()


Unnamed: 0,Name,Age,Salary,City
0,alice,25,50000,NYC
1,bob,30,60000,LA
2,charlie,35,75000,SF


# Rename Columns

In [28]:
df_cleaning.columns = df_cleaning.columns.str.lower()

df_cleaning.head()

Unnamed: 0,name,age,salary,city
0,alice,25,50000,NYC
1,bob,30,60000,LA
2,charlie,35,75000,SF


# Handle Outliers

In [30]:
# Handle Outliers
Q1 = df_cleaning['salary'].quantile(0.25)
Q3 = df_cleaning['salary'].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

df_no_outliers = df_cleaning[
    (df_cleaning['salary'] >= lower_bound) 
    & (df_cleaning['salary'] <= upper_bound)
    ]

df_no_outliers.head()


Unnamed: 0,name,age,salary,city
0,alice,25,50000,NYC
1,bob,30,60000,LA
2,charlie,35,75000,SF
