In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
import pickle

In [2]:
df = pd.read_csv('Expanded_Dataset.csv')

In [3]:
df.head()

Unnamed: 0,Year,Quarter,Product Model,5G Capability,Units Sold,Revenue ($),Market Share (%),Regional 5G Coverage (%),5G Subscribers (millions),Avg 5G Speed (Mbps),Preference for 5G (%),Region
0,2019,Q1,Galaxy S10,No,26396,4212951.0,1.04,57.36,39.55,293.1,55.87,Asia-Pacific
1,2019,Q1,Galaxy Note10,No,25671,7240266.0,2.82,85.8,42.58,67.46,37.26,Latin America
2,2019,Q1,Galaxy S20,No,16573,25608330.0,-0.03,47.02,3.78,77.25,84.66,Middle East & Africa
3,2019,Q1,Galaxy Note20,No,7177,21984420.0,0.84,25.7,23.41,105.27,40.03,North America
4,2019,Q1,Galaxy S21,No,45633,16342440.0,2.36,89.13,44.43,206.17,76.88,Latin America


In [4]:
X = df.drop(columns=['Revenue ($)'])
y = df['Revenue ($)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [5]:
transformer = ColumnTransformer(transformers=[
    ('tnf1', OrdinalEncoder(categories=[['Q1', 'Q2', 'Q3', 'Q4']]), [1]),
    ('tnf2', OneHotEncoder(drop='first', handle_unknown='ignore'), [2, 10, 3])
], remainder='passthrough')

In [6]:
X_train = transformer.fit_transform(X_train)
X_train = pd.DataFrame(X_train)

In [7]:
X_test = transformer.fit_transform(X_test)
X_test = pd.DataFrame(X_test)

In [8]:
model = LinearRegression()
model.fit(X_train, y_train)

In [9]:
y_pred = model.predict(X_test)

In [10]:
print("R2 score is :", r2_score(y_test, y_pred))

R2 score is : 0.06311762192526693


In [11]:
print("R2 Score = ", r2_score(y_test, y_pred))
print('MSE = ', np.sqrt(mean_squared_error(y_test, y_pred)))

R2 Score =  0.06311762192526693
MSE =  18681730.958529755


In [12]:
R = Ridge(alpha=0.01)

In [13]:
R.fit(X_train, y_train)

In [14]:
y_pred1 = R.predict(X_test)

In [15]:
print("R2 Score = ", r2_score(y_test, y_pred1))
print('MSE = ', np.sqrt(mean_squared_error(y_test, y_pred1)))

R2 Score =  0.06318767738969089
MSE =  18681032.481412616


In [16]:
knn = KNeighborsRegressor(n_neighbors=1)

In [17]:
knn.fit(X_train, y_train)

In [18]:
y_pred2 = knn.predict(X_test)

In [19]:
print("R2 Score = ", r2_score(y_test, y_pred2))
print('MSE = ', np.sqrt(mean_squared_error(y_test, y_pred2)))

R2 Score =  0.7217379780898268
MSE =  10181260.427499002


In [20]:
L = Lasso(alpha=100)
L.fit(X_train, y_train)
y_pred3 = L.predict(X_test)

  model = cd_fast.enet_coordinate_descent(


In [21]:
print("R2 Score = ", r2_score(y_test, y_pred3))
print('MSE = ', np.sqrt(mean_squared_error(y_test, y_pred3)))

R2 Score =  0.06318913794242409
MSE =  18681017.918920737


In [27]:
with open('classifier.pkl', 'wb') as file:
    clf = pickle.dump(knn, file)

In [28]:
X_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,2021.0,55419.0,2.59,68.80,35.98,196.85,72.96
1,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,2024.0,37131.0,0.59,83.06,0.34,274.20,42.93
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,2022.0,27735.0,6.40,42.86,50.44,118.36,80.72
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,2019.0,31105.0,4.77,93.04,20.52,110.55,73.07
4,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,2020.0,49246.0,6.07,51.07,47.75,137.42,71.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2022.0,23678.0,0.58,45.61,43.79,156.56,72.06
196,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,2023.0,35503.0,3.87,85.47,48.17,262.84,68.00
197,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,2021.0,24758.0,4.72,98.62,35.63,259.73,81.14
198,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,2023.0,30009.0,5.49,48.11,22.61,157.00,94.16


In [29]:
X_test.shape

(200, 27)