# Importing Libraries

In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from pandas.plotting import scatter_matrix
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

In [35]:
from sklearn.linear_model import LinearRegression
from lightgbm import LGBMRegressor
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.metrics import  r2_score,mean_absolute_error,mean_squared_error

#Loading Data

In [48]:
d_train = pd.read_csv("/train.csv")
d_test = pd.read_csv("/test.csv")

:#Deop the  first column it is just index

In [49]:
d_train = d_train.drop(labels=['Id'],axis=1)
d_test = d_test.drop(labels=['Id'],axis=1)

In [None]:
d_train.describe()

In [50]:
d_train['volume'] = d_train['x']*d_train['y']*d_train['z']
d_train.drop(['x', 'y', 'z'], axis=1, inplace=True)
d_train.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,volume
0,1.06,Ideal,I,SI2,61.8,57.0,4270,176.48334
1,1.51,Premium,G,VVS2,60.9,58.0,15164,246.965796
2,0.32,Ideal,F,VS2,61.3,56.0,828,52.943373
3,0.53,Ideal,G,VS2,61.2,56.0,1577,86.422842
4,0.7,Premium,H,VVS2,61.0,57.0,2596,115.3152


In [51]:
X = d_train.drop('price', axis=1)
y = d_train['price']

In [52]:
cate_cols = X.select_dtypes(include='object').columns
num_cols = X.select_dtypes(exclude='object').columns

cut_unque = ['Fair', 'Good', 'Very Good','Premium','Ideal']
color_unque = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
clarity_unque = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']

In [53]:
num_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('scaler',StandardScaler())

    ]

)

# Categorigal Pipeline
cat_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),
    ('ordinalencoder',OrdinalEncoder(categories=[cut_unque,color_unque,clarity_unque])),
    ('scaler',StandardScaler())
    ]

)

preprocessor=ColumnTransformer([
('num_pipeline',num_pipeline,num_cols),
('cat_pipeline',cat_pipeline,cate_cols)
])

In [54]:

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())


In [55]:
def evaluate(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

In [47]:
models={
    'LinearRegression':LinearRegression(),
    'RandomForestRegressor':RandomForestRegressor(n_estimators = 100, random_state = 42),
    'DecisionTreeRegressor' :DecisionTreeRegressor(random_state = 42),
    'XGboost':xgb.XGBRegressor(),

}
trained_model_list=[]
model_list=[]
r2_list=[]

print(' Training Performance  Measurment')
print('---------'*30)
print('\n')

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    mae, rmse, r2_square=evaluate(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    r2_list.append(r2_square)



    print('---------'*40)
    print('\n')

LinearRegression
Model Training Performance
RMSE: 1165.49881289608
MAE: 783.358931324257
R2 score 91.22548942357811
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


RandomForestRegressor
Model Training Performance
RMSE: 534.1989072734043
MAE: 266.71772138084754
R2 score 98.15666095867263
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


DecisionTreeRegressor
Model Traini

In [59]:
models={
    'LinearRegression':LinearRegression(),
    'RandomForestRegressor':RandomForestRegressor(n_estimators = 100, random_state = 42),
    'DecisionTreeRegressor' :DecisionTreeRegressor(random_state = 42),
    'XGboost':xgb.XGBRegressor(),

}
trained_model_list=[]
model_list=[]
r2_list=[]

print(' Training Performance  Measurment')
print('---------'*30)
print('\n')
for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    mae, rmse, r2_square=evaluate(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])


    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    r2_list.append(r2_square)



    print('---------'*20)
    print('\n')

 Training Performance  Measurment
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


LinearRegression
RMSE: 1203.0759001765455
MAE: 832.1894455997799
R2 score 90.6505667550266
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


RandomForestRegressor
RMSE: 539.6073144325477
MAE: 277.53211724395084
R2 score 98.11914685919369
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


DecisionTreeRegressor
RMSE: 700.4950241134173
MAE: 353.35808905881896
R2 score 96.83036437025694
------------------------------------------