In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

import warnings                    
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_excel("Ecommerce_Cleaned_Data.xlsx")
df

Unnamed: 0,Category,Price,Discount,Final_Price,Payment_Method
0,5,36.53,15,31.05,3
1,2,232.79,20,186.23,3
2,5,317.02,25,237.76,1
3,6,173.19,25,129.89,4
4,0,244.80,20,195.84,3
...,...,...,...,...,...
3655,0,486.79,0,486.79,4
3656,6,212.87,15,180.94,0
3657,4,389.76,0,389.76,3
3658,3,447.66,30,313.36,4


**Data Modelling**

In [3]:
X = df.drop(columns='Discount')
y = df['Discount']

**Train-Test Split**

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=10 )

**Modelling**

In [6]:
from sklearn.linear_model import ElasticNet

en_base = ElasticNet()
en_base.fit(X_train, y_train)

**Predictions**

In [7]:
train_predictions = en_base.predict(X_train)
test_predictions = en_base.predict(X_test)

**Evaluation**

In [8]:

print("Train R2:", en_base.score(X_train, y_train))
print("Test R2:", en_base.score(X_test, y_test))

from sklearn.model_selection import cross_val_score
print("Cross Validation Score:", cross_val_score(en_base, X, y, cv=5).mean())

Train R2: 0.7631288127717355
Test R2: 0.7640497182278724
Cross Validation Score: 0.763239216286342


**Hyperparameter Tuning**

In [9]:
from sklearn.model_selection import GridSearchCV

# Model
estimator = ElasticNet()

# Parameters & values
param_grid = {"alpha": [0.1, 0.2, 0.5, 0.7, 1, 10, 50, 100, 1000], 'l1_ratio':[0.1,0.5,0.75,0.9,0.98,1]}

# Identifying the best value of the parameter within given values for the given data
model_hp = GridSearchCV(estimator, param_grid, cv=5, scoring='neg_mean_squared_error')
model_hp.fit(X_train, y_train)

# Displaying the best parameters
model_hp.best_params_


{'alpha': 0.5, 'l1_ratio': 0.5}

**Rebuilding model with ideal parameter**

In [10]:
# Modelling

en_best = ElasticNet(alpha=0.5, l1_ratio=0.5)
en_best.fit(X_train, y_train)


# Predictions
train_predictions = en_best.predict(X_train)
test_predictions = en_best.predict(X_test)

# Evaluation
from sklearn.model_selection import cross_val_score

print("Train R2:", en_best.score(X_train, y_train))
print("Test R2:", en_best.score(X_test, y_test))
print("Cross Validation Score:", cross_val_score(en_best, X, y, cv=5).mean())

Train R2: 0.7631312797713259
Test R2: 0.7641143843834272
Cross Validation Score: 0.763089207771816


**Prediction on New Data**

In [11]:
input_data = pd.DataFrame( {'Category':[5,1,2,3], 'Price':[36.50, 365.90, 109.26, 890.56 ], 'Final_Price': [35.20, 228.6, 89.50, 459.23 ], 'Payment_Method': [0,2,1,4] } )
input_data

Unnamed: 0,Category,Price,Final_Price,Payment_Method
0,5,36.5,35.2,0
1,1,365.9,228.6,2
2,2,109.26,89.5,1
3,3,890.56,459.23,4


In [12]:
en_best.predict(input_data)

array([17.42200646, 38.87654212, 18.74617901, 95.87601367])