In [55]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [56]:
path_to_csv = "/content/laptops.csv"
data = pd.read_csv(path_to_csv,encoding="ISO-8859–1",index_col=False)

In [57]:
# remove "Unnamed: 0" column
del data['Unnamed: 0']

In [58]:
print("Before -- \n\n",data.dtypes)

# extract "float value" from "Weight" column (for example. "1.2kg" to 1.2)
data['Weight'] = data['Weight'].str.extract('(\d+.\d+|\d+)').astype(float)
data['Weight']

# extract "int value" from "Ram" column (for example. "16gb" to 16)
data['Ram'] = data['Ram'].str.extract('(\d+)').astype(int)
data['Ram']

print()
print("After --\n\n ",data.dtypes)

Before -- 

 Company              object
Product              object
TypeName             object
Inches              float64
ScreenResolution     object
Cpu                  object
Ram                  object
Memory               object
Gpu                  object
OpSys                object
Weight               object
Price_euros         float64
dtype: object

After --

  Company              object
Product              object
TypeName             object
Inches              float64
ScreenResolution     object
Cpu                  object
Ram                   int64
Memory               object
Gpu                  object
OpSys                object
Weight              float64
Price_euros         float64
dtype: object


In [59]:
# conversion of 'Price_euros' to 'Price_rupees'
data['Price_rupees'] = 86.93 * data['Price_euros']

# drop 'Price_euros' column
data.drop(columns=['Price_euros'],inplace=True)

In [60]:
# Using List comprehension for extracting categorical column names 
col = [feature for feature in data.columns if data[feature].dtype == 'O']

In [61]:
for feature in col:
    labels_ordered= data.groupby([feature])['Price_rupees'].mean().sort_values().index
    labels_ordered={k:i for i,k in enumerate(labels_ordered,0)}
    data[feature]=data[feature].map(labels_ordered)
    # print(labels_ordered)

In [62]:
data

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_rupees
0,13,559,3,13.3,26,85,8,13,80,8,1.37,116459.2517
1,13,333,3,13.3,13,64,8,14,45,8,1.34,78144.8542
2,5,107,1,15.6,15,58,8,20,54,2,1.86,49984.7500
3,13,559,3,15.4,36,111,16,30,95,8,1.83,220580.5285
4,13,559,3,13.3,26,99,8,20,84,8,1.37,156786.9480
...,...,...,...,...,...,...,...,...,...,...,...,...
1298,6,193,2,14.0,16,77,4,13,47,4,1.80,55461.3400
1299,6,473,2,13.3,22,77,16,30,47,4,1.30,130308.0700
1300,6,12,1,14.0,6,16,2,6,6,4,1.50,19906.9700
1301,5,243,1,15.6,6,77,6,12,32,4,2.19,66414.5200


In [63]:
print("DataTypes-",data.dtypes,"\n")
print("Is it null - \n",data.isnull().sum())

DataTypes- Company               int64
Product               int64
TypeName              int64
Inches              float64
ScreenResolution      int64
Cpu                   int64
Ram                   int64
Memory                int64
Gpu                   int64
OpSys                 int64
Weight              float64
Price_rupees        float64
dtype: object 

Is it null - 
 Company             0
Product             0
TypeName            0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Memory              0
Gpu                 0
OpSys               0
Weight              0
Price_rupees        0
dtype: int64


In [64]:
# target variable
target_variable = 'Price_rupees'
y = data[target_variable]

# features
X = data[data.columns.difference([target_variable])]

In [65]:
# split dataset
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30)

# Linear Regression

In [66]:
from sklearn.linear_model import LinearRegression

In [67]:
lr_model = LinearRegression().fit(X_train,y_train)

In [68]:
lr_model.score(X_train,y_train) # train-set score

0.8484387773238823

In [69]:
lr_model.score(X_test,y_test) # test-set score

0.881981589265236

In [70]:
# prediction
y_pred_lr = lr_model.predict(X_test) 

# Random Forest Regressor

In [71]:
from sklearn.ensemble import RandomForestRegressor

In [72]:
rfr_model = RandomForestRegressor().fit(X_train,y_train)

In [73]:
rfr_model.score(X_train,y_train) # train-set score

0.988548133817297

In [74]:
rfr_model.score(X_test,y_test) # test-set score

0.9408862641457855

In [75]:
y_pred_rfr = rfr_model.predict(X_test)

# Linear Support Vector Regression

In [76]:
from sklearn.svm import LinearSVR

In [77]:
svrLIr_model = LinearSVR().fit(X_train,y_train)

In [78]:
svrLIr_model.score(X_train,y_train) # train-set score

0.7797945716025813

In [79]:
svrLIr_model.score(X_test,y_test) # test-set score

0.8461164386209666

In [80]:
y_pred_svrLIr = svrLIr_model.predict(X_test)

# K Nearest neighbors Regressor

In [81]:
from sklearn.neighbors import KNeighborsRegressor

In [82]:
knr_model = KNeighborsRegressor().fit(X_train,y_train)

In [83]:
knr_model.score(X_train,y_train) # train-set score

0.9422763541287311

In [84]:
knr_model.score(X_test,y_test) # test-set score

0.9264598629549512