# Feature Engineering
This notebook covers feature engineering tasks, including creating new features, scaling numerical features, and encoding categorical features.

In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [8]:
# Load your data
df = pd.read_csv('C:\\Users\\saife\\OneDrive\\Desktop\\Graduation\\data\\processed\\data_after_preprocessing.csv')
df.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Complain,Satisfaction Score,Card Type,Point Earned,BalanceToSalary,ProductsPerTenure,Geography_Germany,Geography_Spain,Gender_Male
0,-0.326221,0.293517,-1.04176,-1.225848,1,1,1,0.021886,1,1,2,DIAMOND,464,-0.035804,-0.016781,False,False,False
1,-0.440036,0.198164,-1.387538,0.11735,1,0,1,0.216534,0,1,3,DIAMOND,456,-0.02893,0.967674,False,True,False
2,-1.536794,0.293517,1.032908,1.333053,3,1,0,0.240687,1,1,3,DIAMOND,377,-0.022868,-0.262894,False,False,False
3,0.501521,0.007457,-1.387538,-1.225848,2,0,0,-0.108918,0,0,5,GOLD,350,-0.035804,2.936584,False,False,False
4,2.063884,0.388871,-1.04176,0.785728,1,1,1,-0.365276,0,0,5,GOLD,425,-0.021154,-0.016781,False,True,False


## Create New Features
Generate new features such as customer tenure based on existing data.

In [10]:
def create_new_features(df):
    # Add placeholder columns for 'last_active_date' and 'signup_date'
    df['last_active_date'] = pd.to_datetime('2023-01-01')  # Example fixed date
    df['signup_date'] = pd.to_datetime('2020-01-01')  # Example fixed date
    
    # Calculate tenure in months
    df['tenure_months'] = (df['last_active_date'] - df['signup_date']).dt.days // 30
    return df

# Example usage
df = create_new_features(df)

## Scale Numerical Features
Apply feature scaling to numerical features using `StandardScaler`.

In [12]:
def scale_features(df, numerical_features):
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df[numerical_features])
    df_scaled = pd.DataFrame(scaled_features, columns=numerical_features)
    return df_scaled

# Define numerical features
numerical_features = ['CreditScore', 'Age', 'Tenure', 'Balance', 'EstimatedSalary', 'BalanceToSalary', 'ProductsPerTenure']

# Example usage
df_scaled = scale_features(df, numerical_features)

## Encode Categorical Features
Convert categorical features into numerical format using one-hot encoding.

In [13]:
def encode_categorical_features(df, categorical_features):
    df_encoded = pd.get_dummies(df, columns=categorical_features, drop_first=True)
    return df_encoded

# Define categorical features
categorical_features = ['Card Type']

# Example usage
df_encoded = encode_categorical_features(df, categorical_features)