In [1]:
# Feature Scaling & Encoding

# Objective: Learn to scale numerical features and encode categorical features for better model performance.
# Instructions:
# For each example, perform the following steps:
#     1. Load the Dataset: Load the dataset into your environment.
#     2. Feature Scaling: Apply scaling methods (StandardScaler or MinMaxScaler) to specified numerical columns.
#     3. Feature Encoding: Apply encoding methods (One-Hot Encoding or Label Encoding) to specified categorical columns.
#     4. Verify Changes: Check the data to ensure proper scaling and encoding. 
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, LabelEncoder

# Step 1: Load the dataset
# You can replace this with your own dataset
data = pd.DataFrame({
    'Age': [25, 32, 47, 51, 62],
    'Salary': [50000, 60000, 80000, 90000, 120000],
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female'],
    'Department': ['HR', 'Finance', 'IT', 'Finance', 'IT']
})

print("Original Data:\n", data)

# Step 2: Feature Scaling
# Scaling numerical columns using StandardScaler and MinMaxScaler
numerical_cols = ['Age', 'Salary']

# Using StandardScaler
scaler_standard = StandardScaler()
data_standard_scaled = pd.DataFrame(scaler_standard.fit_transform(data[numerical_cols]), columns=[f"{col}_std" for col in numerical_cols])

# Using MinMaxScaler
scaler_minmax = MinMaxScaler()
data_minmax_scaled = pd.DataFrame(scaler_minmax.fit_transform(data[numerical_cols]), columns=[f"{col}_minmax" for col in numerical_cols])

# Step 3: Feature Encoding
# Label Encoding for 'Gender'
le = LabelEncoder()
data['Gender_encoded'] = le.fit_transform(data['Gender'])

# One-Hot Encoding for 'Department'
ohe = OneHotEncoder(sparse=False)
department_encoded = ohe.fit_transform(data[['Department']])
department_encoded_df = pd.DataFrame(department_encoded, columns=ohe.get_feature_names_out(['Department']))

# Step 4: Combine all processed columns
final_data = pd.concat([data, data_standard_scaled, data_minmax_scaled, department_encoded_df], axis=1)

print("\nProcessed Data:\n", final_data)


# Task:
#   Dataset: car_features.csv (get it by your own it includes the columns of Mileage , Horsepower)
    # Columns to scale: Mileage , Horsepower
    # Column to encode: Fuel_Type
    # Steps:
    #     1. Load car_features.csv .
    #     2. Scale Mileage and Horsepower using StandardScaler.
    #     3. Encode Fuel_Type using Label Encoding.
    #     4. Confirm scaling and encoding by checking these columns.



    
    
    

Original Data:
    Age  Salary  Gender Department
0   25   50000    Male         HR
1   32   60000  Female    Finance
2   47   80000  Female         IT
3   51   90000    Male    Finance
4   62  120000  Female         IT


TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'