In [33]:
import pandas as pd 


In [34]:
df = pd.read_csv('gemstone.csv')

In [35]:
df.head()

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [36]:
df = df.drop(labels=['Unnamed: 0'],axis=1)
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [37]:
X = df.drop(labels=['price'],axis=1),
Y = df[['price']]

In [38]:
Y

Unnamed: 0,price
0,326
1,326
2,327
3,334
4,335
...,...
53935,2757
53936,2757
53937,2757
53938,2757


In [39]:
if isinstance(X, tuple):
    X = pd.concat(X, axis=1)
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(exclude='object').columns

In [40]:
cut_categories = ['Fair','Good','Very Good','Premium','Ideal']
color_categories = ['D','E','F','G','H','I','J']
clarity_categores = ['I1','SI2', 'SI1','VS2','VS1', 'VVS2','VVS1','IF']

In [41]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [42]:
num_pipeline = Pipeline(
    steps=[('imputer',SimpleImputer(strategy='median')),
           ('scalar',StandardScaler())
        ]
)


cat_pipeline = Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='most_frequent')),
        ('ordinalencoder',OrdinalEncoder(categories=[cut_categories,color_categories,clarity_categores]))
    ]
)


preprocessor = ColumnTransformer([
    ('num_pipline',num_pipeline,numerical_cols),
    ('cat_pipeline',cat_pipeline,categorical_cols)
])

In [43]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.34,random_state=36,)

In [44]:
X_train = pd.DataFrame(preprocessor.fit_transform(X_train),columns= preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [45]:
X_train.head()

Unnamed: 0,num_pipline__carat,num_pipline__depth,num_pipline__table,num_pipline__x,num_pipline__y,num_pipline__z,cat_pipeline__cut,cat_pipeline__color,cat_pipeline__clarity
0,-0.794126,-0.173137,-1.546541,-0.835937,-0.799527,-0.811208,4.0,0.0,4.0
1,-0.098857,0.176559,-1.099331,0.135207,0.089192,0.172533,4.0,1.0,4.0
2,0.764963,0.456316,0.242299,0.856882,0.825303,0.875205,3.0,5.0,3.0
3,0.891375,-0.592773,-0.204911,1.079622,1.04075,0.945472,3.0,3.0,3.0
4,-1.046952,-0.103198,0.242299,-1.290326,-1.239398,-1.232811,3.0,2.0,5.0


In [46]:
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [48]:
regression = LinearRegression()
regression.fit(X_train,y_train)

In [61]:
import numpy as np 
def evaluate_model(true,predicated):
    mae = mean_absolute_error(true,predicated)
    mse = mean_squared_error(true,predicated)
    rmse = np.sqrt(mean_squared_error,(true,predicated))
    r2_score = r2_score(true,predicated)
    return mae,rmse,r2_square

In [63]:
models = {
    'LinearRegression': LinearRegression(),
    'Lasso': Lasso(),
    'Ridge': Ridge(),
    'ElasticNet': ElasticNet()
}

trained_model_list = []
model_list = []
r2_list = []

for model_name, model in models.items():
   
    model.fit(X_train, y_train)
    
   
    y_pred = model.predict(X_test)
    
    
    mae = mean_absolute_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    r2_square = r2_score(y_test, y_pred)
    
    
    print(model_name)
    print('Model Training Performance:')
    print('RMSE:', rmse)
    print('MAE:', mae)
    print('R2 score:', r2_square * 100)
    
   
    model_list.append(model_name)
    r2_list.append(r2_square)

    
    trained_model_list.append(model)










LinearRegression
Model Training Performance:
RMSE: 1286.143649806104
MAE: 816.2164712894291
R2 score: 89.6845213883663
Lasso
Model Training Performance:
RMSE: 1241.6646611905503
MAE: 814.7847170246138
R2 score: 90.38566901489051
Ridge
Model Training Performance:
RMSE: 1283.8990737902946
MAE: 816.2447787556275
R2 score: 89.72049508668779
ElasticNet
Model Training Performance:
RMSE: 1639.0536992095444
MAE: 1080.2783847830772
R2 score: 83.24683785034892
