# Data Pre-processing Techniques
This notebook demonstrates various data pre-processing techniques using a demo dataset.

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv('demo_dataset.csv', parse_dates=['Signup Date'])
df.head()


## Outlier Detection

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Visualize outliers in income
sns.boxplot(x=df['Income'])
plt.title('Income Outlier Detection')
plt.show()


## Normalization vs Standardization

In [None]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

scaler_norm = MinMaxScaler()
scaler_std = StandardScaler()

df['Income_Norm'] = scaler_norm.fit_transform(df[['Income']])
df['Income_Std'] = scaler_std.fit_transform(df[['Income']])
df[['Income', 'Income_Norm', 'Income_Std']]


## Encoding Categorical Variables

In [None]:
from sklearn.preprocessing import LabelEncoder

# Label Encoding
le = LabelEncoder()
df['Gender_Label'] = le.fit_transform(df['Gender'])

# One-Hot Encoding
df_encoded = pd.get_dummies(df, columns=['Purchased'], drop_first=True)
df_encoded.head()


## Feature Scaling, Polynomial Features, and Interaction Terms

In [None]:
from sklearn.preprocessing import PolynomialFeatures

# Example with Age and Income
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
poly_features = poly.fit_transform(df[['Age', 'Income']])
poly_df = pd.DataFrame(poly_features, columns=poly.get_feature_names_out(['Age', 'Income']))
df = df.join(poly_df)
df.head()


## Handling Time Series/Date Features

In [None]:
# Extracting features from Signup Date
df['Signup_Month'] = df['Signup Date'].dt.month
df['Signup_Weekday'] = df['Signup Date'].dt.weekday
df[['Signup Date', 'Signup_Month', 'Signup_Weekday']]
