In [3]:
import numpy as np
import pandas as pd

In [4]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score,mean_absolute_error

In [5]:
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor,ExtraTreesRegressor, VotingRegressor, StackingRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor

In [6]:
df = pd.read_csv('Final_Dataset.csv')

In [7]:
X = df.drop(columns=['Price'])
y = np.log(df['Price'])

In [8]:
X.head(5)

Unnamed: 0,Company,TypeName,Ram,Weight,Touchscreen,Ips,Ppi,Cpu brand,HDD,SSD,Gpu brand,Os
0,Apple,Ultrabook,8,1.37,0,1,226.983005,Intel Core i5,0,128,Intel,Mac
1,Apple,Ultrabook,8,1.34,0,0,127.67794,Intel Core i5,0,0,Intel,Mac
2,HP,Notebook,8,1.86,0,0,141.211998,Intel Core i5,0,256,Intel,Others/No OS/Linux
3,Apple,Ultrabook,16,1.83,0,1,220.534624,Intel Core i7,0,512,AMD,Mac
4,Apple,Ultrabook,8,1.37,0,1,226.983005,Intel Core i5,0,256,Intel,Mac


In [9]:
y.head(5)

0    11.175745
1    10.776766
2    10.329931
3    11.814473
4    11.473093
Name: Price, dtype: float64

In [10]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=2)

In [11]:
def save_result(result):
    with open('result.txt', 'a') as file:  # Open in append mode
        file.write(result + '\n')  # Write result with a new line


# Linear Regression

In [12]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = LinearRegression()

# Create a pipeline
Linear_Regression_pipe = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
Linear_Regression_pipe.fit(X_train, y_train)

# Make predictions
y_pred = Linear_Regression_pipe.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))  # Avoid overriding function name
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('LinearRegression:\n' + r2_value + mae_value)


r2_score = 0.8073

mae = 0.2102



# Ridge Regression

In [13]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])  # One-hot encode categorical columns
], remainder='passthrough')

# Define the Ridge regression model with alpha=10
step2 = Ridge(alpha=10)

# Create a pipeline
RIDGE_REGRESSION_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
RIDGE_REGRESSION_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = RIDGE_REGRESSION_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))  # Compute R-squared score
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))  # Compute Mean Absolute Error

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Ridge Regression:\n' + r2_value + mae_value)


r2_score = 0.8127

mae = 0.2093



# Lasso Regression

In [14]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = Lasso(alpha=0.001)

# Create a pipeline
LASSO_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
LASSO_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = LASSO_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Lasso Regression:\n' + r2_value + mae_value)

r2_score = 0.8072

mae = 0.2111



# KNN

In [15]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = KNeighborsRegressor(n_neighbors=5)

# Create a pipeline
KNN_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
KNN_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = KNN_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('KNN Regression:\n' + r2_value + mae_value)

r2_score = 0.8045

mae = 0.1991



# Decision Tree

In [16]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = DecisionTreeRegressor(random_state=42)

# Create a pipeline
DECISION_TREE_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
DECISION_TREE_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = DECISION_TREE_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Decision Tree Regression:\n' + r2_value + mae_value)


r2_score = 0.7806

mae = 0.2085



# SVM

In [17]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = SVR(kernel='rbf')

# Create a pipeline
SVM_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
SVM_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = SVM_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Support Vector Regression (SVR):\n' + r2_value + mae_value)


r2_score = 0.6111

mae = 0.2959



# Random Forest

In [18]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = RandomForestRegressor(n_estimators=100, random_state=42)

# Create a pipeline
RAND_FRST_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
RAND_FRST_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = RAND_FRST_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Random Forest Regressor:\n' + r2_value + mae_value)


r2_score = 0.8857

mae = 0.1565



# Extra Trees

In [19]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = ExtraTreesRegressor(n_estimators=100, random_state=42)

# Create a pipeline
EXT_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
EXT_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = EXT_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Extra Trees Regressor:\n' + r2_value + mae_value)

r2_score = 0.8687

mae = 0.1599



# AdaBoost

In [20]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = AdaBoostRegressor(n_estimators=50, random_state=42)

# Create a pipeline
ADA_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
ADA_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = ADA_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('AdaBoost Regressor:\n' + r2_value + mae_value)

r2_score = 0.7996

mae = 0.2271



# Gradient Boost

In [21]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Create a pipeline
GD_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
GD_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = GD_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Gradient Boosting Regressor:\n' + r2_value + mae_value)

r2_score = 0.8672

mae = 0.1754



# Xg Boost

In [22]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the model
step2 = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Create a pipeline
XG_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
XG_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = XG_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('XGBoost Regressor:\n' + r2_value + mae_value)

r2_score = 0.8881

mae = 0.1545



# Voting Regression

In [23]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define the base models
model1 = LinearRegression()
model2 = Ridge(alpha=10)
model3 = Lasso(alpha=0.001)
model4 = RandomForestRegressor(n_estimators=100, random_state=42)
model5 = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Define the ensemble model using Voting Regressor
step2 = VotingRegressor(estimators=[
    ('lr', model1),
    ('ridge', model2),
    ('lasso', model3),
    ('rf', model4),
    ('xgb', model5)
])

# Create a pipeline
VOTING_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
VOTING_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = VOTING_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Voting Regressor:\n' + r2_value + mae_value)

r2_score = 0.8654

mae = 0.1771



# Stacking

In [24]:
# Define the preprocessing step
step1 = ColumnTransformer(transformers=[
    ('col_tnf', OneHotEncoder(sparse_output=False, drop='first'), [0,1,7,10,11])
], remainder='passthrough')

# Define base models
model1 = LinearRegression()
model2 = Ridge(alpha=10)
model3 = Lasso(alpha=0.001)
model4 = RandomForestRegressor(n_estimators=100, random_state=42)
model5 = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)

# Define meta-model
meta_model = GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, random_state=42)

# Define Stacking Regressor
step2 = StackingRegressor(estimators=[
    ('lr', model1),
    ('ridge', model2),
    ('lasso', model3),
    ('rf', model4),
    ('xgb', model5)
], final_estimator=meta_model)

# Create a pipeline
STACKING_PIPE = Pipeline([('step1', step1), ('step2', step2)])

# Train the model
STACKING_PIPE.fit(X_train, y_train)

# Make predictions
y_pred = STACKING_PIPE.predict(X_test)

# Calculate metrics
r2_value = 'r2_score = {:.4f}\n'.format(r2_score(y_test, y_pred))  
mae_value = 'mae = {:.4f}\n'.format(mean_absolute_error(y_test, y_pred))

# Print results
print(r2_value)
print(mae_value)

# Save results
save_result('Stacking Regressor:\n' + r2_value + mae_value)


r2_score = 0.8867

mae = 0.1600



In [25]:
import joblib


In [27]:
# Save the trained model
joblib.dump(XG_PIPE, 'model/xgboost_pipeline.pkl')
print("Model saved as xgboost_pipeline.pkl")


Model saved as xgboost_pipeline.pkl
