In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score
from sklearn.neighbors import KNeighborsRegressor
import pickle
from sklearn.pipeline import Pipeline

In [3]:
df = pd.read_csv('Expanded_Dataset.csv')

In [4]:
df.head()

Unnamed: 0,Year,Quarter,Product Model,5G Capability,Units Sold,Revenue ($),Market Share (%),Regional 5G Coverage (%),5G Subscribers (millions),Avg 5G Speed (Mbps),Preference for 5G (%),Region
0,2019,Q1,Galaxy S10,No,26396,4212951.0,1.04,57.36,39.55,293.1,55.87,Asia-Pacific
1,2019,Q1,Galaxy Note10,No,25671,7240266.0,2.82,85.8,42.58,67.46,37.26,Latin America
2,2019,Q1,Galaxy S20,No,16573,25608330.0,-0.03,47.02,3.78,77.25,84.66,Middle East & Africa
3,2019,Q1,Galaxy Note20,No,7177,21984420.0,0.84,25.7,23.41,105.27,40.03,North America
4,2019,Q1,Galaxy S21,No,45633,16342440.0,2.36,89.13,44.43,206.17,76.88,Latin America


In [5]:
X = df.drop(columns=['Revenue ($)'])
y = df['Revenue ($)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [6]:
transformer = ColumnTransformer(transformers=[
    ('tnf1', OrdinalEncoder(categories=[['Q1', 'Q2', 'Q3', 'Q4']]), [1]),
    ('tnf2', OneHotEncoder(drop='first', handle_unknown='ignore'), [2, 10, 3])
], remainder='passthrough')

In [7]:
X_train = transformer.fit_transform(X_train)
X_train = pd.DataFrame(X_train)

In [8]:
X_test = transformer.fit_transform(X_test)
X_test = pd.DataFrame(X_test)

In [9]:
model = LinearRegression()
model.fit(X_train, y_train)

In [10]:
y_pred = model.predict(X_test)

In [11]:
print("R2 score is :", r2_score(y_test, y_pred))

R2 score is : 0.10642867950124157


In [12]:
print("R2 Score = ", r2_score(y_test, y_pred))
print('MSE = ', np.sqrt(mean_squared_error(y_test, y_pred)))

R2 Score =  0.10642867950124157
MSE =  16502448.385552337


In [13]:
R = Ridge(alpha=0.01)

In [14]:
R.fit(X_train, y_train)

In [15]:
y_pred1 = R.predict(X_test)

In [16]:
print("R2 Score = ", r2_score(y_test, y_pred1))
print('MSE = ', np.sqrt(mean_squared_error(y_test, y_pred1)))

R2 Score =  0.1064419554793945
MSE =  16502325.794913532


In [17]:
knn = KNeighborsRegressor(n_neighbors=1)

In [18]:
knn.fit(X_train, y_train)

In [19]:
y_pred2 = knn.predict(X_test)

In [20]:
print("R2 Score = ", r2_score(y_test, y_pred2))
print('MSE = ', np.sqrt(mean_squared_error(y_test, y_pred2)))

R2 Score =  0.8852205448151488
MSE =  5914470.252978718


In [21]:
L = Lasso(alpha=100)
L.fit(X_train, y_train)
y_pred3 = L.predict(X_test)

  model = cd_fast.enet_coordinate_descent(


In [22]:
print("R2 Score = ", r2_score(y_test, y_pred3))
print('MSE = ', np.sqrt(mean_squared_error(y_test, y_pred3)))

R2 Score =  0.10643954612098672
MSE =  16502348.043043267


In [23]:
with open('classifier.pkl', 'wb') as file:
    clf = pickle.dump(knn, file)

In [27]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,2024.0,28852.0,0.23,46.10,3.47,194.32,86.51
1,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,2021.0,28000.0,6.67,56.79,11.35,240.77,51.48
2,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,2019.0,29329.0,1.23,57.98,32.34,132.81,77.31
3,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2023.0,12074.0,1.67,65.23,47.14,106.03,70.52
4,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,2022.0,64883.0,4.60,61.16,31.32,154.51,92.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,2023.0,22044.0,5.12,79.73,37.98,63.50,81.51
796,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2023.0,28445.0,3.99,46.80,37.91,175.60,66.60
797,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,2021.0,50384.0,4.36,89.43,51.51,132.01,80.99
798,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,2023.0,56470.0,5.15,71.88,21.15,192.30,46.87


In [24]:
X_test.shape

(200, 27)

In [35]:
knn.predict(X_test)[56]

31929959.75

In [44]:
print("KNN Prediction for Index 56:", knn.predict([X_test.iloc[56]])[0])

KNN Prediction for Index 56: 31929959.75


In [49]:
#chat GPT (Only this section)
# Define the input data for the new phone
new_phone = pd.DataFrame([{
    'Year': 2025,
    'Quarter': 'Q1',
    'Product Model': 'Galaxy S24',
    '5G Capability': 'Yes',
    'Units Sold': 50000,
    'Market Share (%)': 3.5,
    'Regional 5G Coverage (%)': 75.2,
    '5G Subscribers (millions)': 100.5,
    'Avg 5G Speed (Mbps)': 200.3,
    'Preference for 5G (%)': 60.7,
    'Region': 'North America'
}])

# Apply the same transformation as training data
new_phone_transformed = transformer.transform(new_phone)

# Predict revenue using the best model (KNN in this case)
predicted_revenue = knn.predict(new_phone_transformed)

# Print the result
print(f"Predicted Revenue for the new phone: ${predicted_revenue[0]:,.2f}")

Predicted Revenue for the new phone: $8,902,985.24


