In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.impute import SimpleImputer

df = pd.read_csv("dataset.csv")

print(df.isnull().sum())

df = df.drop(columns=['Customer_Name', 'Customer_ID'])

imputer = SimpleImputer(strategy='most_frequent')
df['Age'] = imputer.fit_transform(df[['Age']])

label_encoder = LabelEncoder()

categorical_columns = ['Gender', 'Income_Level', 'Visit_Frequency', 'Customer_Category',
                       'Loyalty_Member', 'Payment_Method', 'City', 'Region', 'Store_Type',
                       'Season', 'Day_of_Week']
for col in categorical_columns:
    df[col] = label_encoder.fit_transform(df[col])

df['Date'] = pd.to_datetime(df['Date'])
df['Time_of_Purchase'] = pd.to_datetime(df['Time_of_Purchase'], format='%H:%M:%S').dt.time

df['Month'] = df['Date'].dt.month
df['Day_of_Year'] = df['Date'].dt.dayofyear

scaler = MinMaxScaler()
df['Total_Cost'] = scaler.fit_transform(df[['Total_Cost']])

print(df[['Total_Cost']].head())


Transaction_ID        0
Date                  0
Time_of_Purchase      0
Customer_ID           0
Customer_Name         0
Age                   0
Gender                0
Income_Level          0
Visit_Frequency       0
Customer_Category     0
Loyalty_Member        0
Products              0
Product_Categories    0
Total_Items           0
Total_Cost            0
Payment_Method        0
Discount_Applied      0
City                  0
Region                0
Store_Type            0
Street                0
Season                0
Day_of_Week           0
dtype: int64
   Total_Cost
0    0.295703
1    0.383524
2    0.525757
3    0.470729
4    0.295475


In [None]:
X = df[['Age', 'Gender', 'Income_Level', 'Visit_Frequency', 'Customer_Category',
        'Loyalty_Member', 'Total_Items', 'Discount_Applied', 'City', 'Region',
        'Month', 'Day_of_Year']]

y = df['Total_Cost']


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')


Mean Absolute Error: 0.10197942040480469
Mean Squared Error: 0.016289736725784
Root Mean Squared Error: 0.12763125293510208
