In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder


data_path = r"candy-data.csv"
df = pd.read_csv(data_path)
print("First 5 rows of the dataset:")
print(df.head())
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns

# Standardization (mean=0, std=1)
scaler_standard = StandardScaler()
df[numerical_cols] = scaler_standard.fit_transform(df[numerical_cols])

# Normalization (scaled between 0 and 1)
scaler_minmax = MinMaxScaler()
df[numerical_cols] = scaler_minmax.fit_transform(df[numerical_cols])

print("\nData after Feature Scaling (Standardization and Normalization):")
print(df.head())

# 2. **One-Hot Encoding**: Converting categorical columns into binary (0 and 1) columns

# Selecting categorical columns for one-hot encoding
categorical_cols = df.select_dtypes(include=['object']).columns

# Apply one-hot encoding
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

print("\nData after One-Hot Encoding:")
print(df.head())

# Exporting the preprocessed dataset to a new file
df.to_csv('preprocessed_dataset.csv', index=False)
print("\nPreprocessed dataset has been saved as 'preprocessed_dataset.csv'")

First 5 rows of the dataset:
  competitorname  chocolate  fruity  caramel  peanutyalmondy  nougat  \
0      100 Grand          1       0        1               0       0   
1   3 Musketeers          1       0        0               0       1   
2       One dime          0       0        0               0       0   
3    One quarter          0       0        0               0       0   
4      Air Heads          0       1        0               0       0   

   crispedricewafer  hard  bar  pluribus  sugarpercent  pricepercent  \
0                 1     0    1         0         0.732         0.860   
1                 0     0    1         0         0.604         0.511   
2                 0     0    0         0         0.011         0.116   
3                 0     0    0         0         0.011         0.511   
4                 0     0    0         0         0.906         0.511   

   winpercent  
0   66.971725  
1   67.602936  
2   32.261086  
3   46.116505  
4   52.341465  

Data aft