# Regularization 
1. Ridge(L2) - Penalty is applied on sqaure of coefficients
2. Lasso (L2) - Penalty is applied on absolute ovalues


In [1]:
from warnings import filterwarnings

filterwarnings("ignore")

# Step 1 - Data Ingestion

In [3]:
import pandas as pd

df = pd.read_csv("Cars93.csv", na_values=["", "NA"], keep_default_na=False)

# Step 3 - Separate X and Y weight

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94 entries, 0 to 93
Data columns (total 28 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   id                  94 non-null     int64  
 1   Manufacturer        94 non-null     object 
 2   Model               94 non-null     object 
 3   Type                94 non-null     object 
 4   Min.Price           94 non-null     float64
 5   Price               94 non-null     float64
 6   Max.Price           94 non-null     float64
 7   MPG.city            94 non-null     int64  
 8   MPG.highway         94 non-null     int64  
 9   AirBags             90 non-null     object 
 10  DriveTrain          94 non-null     object 
 11  Cylinders           94 non-null     object 
 12  EngineSize          94 non-null     float64
 13  Horsepower          94 non-null     int64  
 14  RPM                 94 non-null     int64  
 15  Rev.per.mile        94 non-null     int64  
 16  Man.trans.

In [5]:
df.drop_duplicates(keep="first")

Unnamed: 0,id,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Make
0,1,Acura,Integra,Small,12.9,15.9,18.8,25,31,,...,5,177,102,68,37,26.5,11.0,2705,non-USA,Acura Integra
1,2,Acura,Legend,Midsize,29.2,33.9,38.7,18,25,Driver & Passenger,...,5,195,115,71,38,30.0,15.0,3560,non-USA,Acura Legend
2,3,Audi,90,Compact,25.9,29.1,32.3,20,26,Driver only,...,5,180,102,67,37,28.0,14.0,3375,non-USA,Audi 90
3,4,Audi,100,Midsize,30.8,37.7,44.6,19,26,,...,6,193,106,70,37,31.0,17.0,3405,non-USA,Audi 100
4,5,BMW,535i,Midsize,23.7,30.0,36.2,22,30,Driver only,...,4,186,109,69,39,27.0,13.0,3640,non-USA,BMW 535i
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,89,Volkswagen,Eurovan,Van,16.6,19.7,22.7,17,21,,...,7,187,115,72,38,34.0,,3960,non-USA,Volkswagen Eurovan
89,90,Volkswagen,Passat,Compact,17.6,20.0,22.4,21,30,,...,5,180,103,67,35,31.5,14.0,2985,non-USA,Volkswagen Passat
90,91,Volkswagen,Corrado,Sporty,22.9,23.3,23.7,18,25,,...,4,159,97,66,36,26.0,15.0,2810,non-USA,Volkswagen Corrado
91,92,Volvo,240,Compact,21.8,22.7,23.5,21,28,Driver only,...,5,190,104,67,37,29.5,14.0,2985,non-USA,Volvo 240


In [6]:
X = df.drop(columns=["id", "Weight"])

In [7]:
Y = df[["Weight"]]

In [8]:
Y.head()

Unnamed: 0,Weight
0,2705
1,3560
2,3375
3,3405
4,3640


# Step 4 - Apply preprocessing on X

In [9]:
cat = list(X.columns[X.dtypes == "object"])
con = list(X.columns[X.dtypes != "object"])

In [10]:
cat

['Manufacturer',
 'Model',
 'Type',
 'AirBags',
 'DriveTrain',
 'Cylinders',
 'Man.trans.avail',
 'Origin',
 'Make']

In [11]:
con

['Min.Price',
 'Price',
 'Max.Price',
 'MPG.city',
 'MPG.highway',
 'EngineSize',
 'Horsepower',
 'RPM',
 'Rev.per.mile',
 'Fuel.tank.capacity',
 'Passengers',
 'Length',
 'Wheelbase',
 'Width',
 'Turn.circle',
 'Rear.seat.room',
 'Luggage.room']

In [12]:
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
num_pipe = make_pipeline(
    SimpleImputer(strategy="median"), 
    StandardScaler()
    )

In [14]:
cat_pipe = make_pipeline(
    SimpleImputer(strategy="constant", fill_value="NotAvail"),
    OneHotEncoder(handle_unknown="ignore", sparse_output=False, drop="first"),
)

In [None]:
pre = ColumnTransformer(
    [
        ("num", num_pipe, con), ("cat", cat_pipe, cat)
    ]).set_output(transform="pandas")

In [16]:
X_pre = pre.fit_transform(X)
X_pre.head()

Unnamed: 0,num__Min.Price,num__Price,num__Max.Price,num__MPG.city,num__MPG.highway,num__EngineSize,num__Horsepower,num__RPM,num__Rev.per.mile,num__Fuel.tank.capacity,...,cat__Make_Toyota Camry,cat__Make_Toyota Celica,cat__Make_Toyota Previa,cat__Make_Toyota Tercel,cat__Make_Volkswagen Corrado,cat__Make_Volkswagen Eurovan,cat__Make_Volkswagen Fox,cat__Make_Volkswagen Passat,cat__Make_Volvo 240,cat__Make_Volvo 850
0,-0.482591,-0.373436,-0.28084,0.468246,0.358751,-0.833407,-0.07309,1.682139,1.115958,-1.050235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.3989,1.508761,1.541987,-0.789329,-0.777967,0.525656,1.084859,0.347784,-0.006346,0.420485,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.017985,1.006842,0.955751,-0.430022,-0.588514,0.137352,0.544483,0.347784,-0.117566,0.083445,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.583587,1.906114,2.082423,-0.609675,-0.588514,0.137352,0.544483,0.347784,0.398088,1.370325,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.764041,1.100952,1.312989,-0.070715,0.169298,0.816883,1.239252,0.681373,0.418309,1.370325,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Step 5 - Apply train test

In [17]:
from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(
    X_pre, Y, test_size=0.2, random_state=10
)

In [18]:
xtrain.head()

Unnamed: 0,num__Min.Price,num__Price,num__Max.Price,num__MPG.city,num__MPG.highway,num__EngineSize,num__Horsepower,num__RPM,num__Rev.per.mile,num__Fuel.tank.capacity,...,cat__Make_Toyota Camry,cat__Make_Toyota Celica,cat__Make_Toyota Previa,cat__Make_Toyota Tercel,cat__Make_Volkswagen Corrado,cat__Make_Volkswagen Eurovan,cat__Make_Volkswagen Fox,cat__Make_Volkswagen Passat,cat__Make_Volvo 240,cat__Make_Volvo 850
42,-0.378705,-0.20613,-0.061001,0.288592,0.358751,-0.445103,-0.07309,0.514578,0.54975,0.114085,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53,-0.713449,-0.823072,-0.876235,1.007206,1.306016,-0.833407,-0.787159,0.347784,-0.238896,-0.651915,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21,1.433529,1.048669,0.699273,-0.430022,-0.588514,0.622732,0.062004,-0.819777,-1.118539,-0.192315,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.325411,0.13894,-0.015202,-0.609675,-0.209608,1.108111,0.505884,-0.819777,-1.553306,0.420485,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26,-0.263276,-0.404806,-0.500678,-0.250368,-0.399061,-0.153876,-0.845057,-0.819777,0.256536,-0.192315,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
ytrain.head()

Unnamed: 0,Weight
42,3040
53,2440
21,3570
6,3470
26,3080


In [20]:
xtest.head()

Unnamed: 0,num__Min.Price,num__Price,num__Max.Price,num__MPG.city,num__MPG.highway,num__EngineSize,num__Horsepower,num__RPM,num__Rev.per.mile,num__Fuel.tank.capacity,...,cat__Make_Toyota Camry,cat__Make_Toyota Celica,cat__Make_Toyota Previa,cat__Make_Toyota Tercel,cat__Make_Volkswagen Corrado,cat__Make_Volkswagen Eurovan,cat__Make_Volkswagen Fox,cat__Make_Volkswagen Passat,cat__Make_Volvo 240,cat__Make_Volvo 850
34,-0.494134,-0.572112,-0.610597,0.288592,0.169298,-0.639255,-0.555569,0.347784,0.003765,-0.345515,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
91,0.544726,0.337616,0.149677,-0.250368,-0.209608,-0.348027,-0.574869,0.18099,-0.249007,-0.253595,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.583587,1.906114,2.082423,-0.609675,-0.588514,0.137352,0.544483,0.347784,0.398088,1.370325,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
35,-0.297905,0.04483,0.314556,-1.328289,-1.725232,0.331504,0.023405,-0.819777,-0.521999,1.339685,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,0.152268,-0.11202,-0.317479,-0.430022,-0.209608,0.622732,0.177799,0.014195,-0.703995,0.420485,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
ytest.head()

Unnamed: 0,Weight
34,2710
91,2985
3,3405
35,3735
19,3515


# Step 6 - Model building
1. Linear regression
2. Ridge
3. Lasso 
4. How to tune alpha value -> Hyperparameter tuning

In [22]:
from sklearn.linear_model import LinearRegression

model1 = LinearRegression()
model1.fit(xtrain, ytrain)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [23]:
model1.score(xtrain, ytrain)

1.0

In [25]:
model1.score(xtest, ytest)

0.8975839752147634

In [26]:
from sklearn.linear_model import Ridge

model2 = Ridge(alpha=1.5)
model2.fit(xtrain, ytrain)

0,1,2
,alpha,1.5
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [27]:
model2.score(xtrain, ytrain)

0.996199901414883

In [28]:
model2.score(xtest, ytest)

0.9108000422653845

### Cross - Validation

In [29]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model2, xtrain, ytrain, cv=5, scoring="r2")

In [30]:
scores

array([0.96657033, 0.96711143, 0.89324158, 0.95558109, 0.92468055])

In [31]:
scores.mean()

np.float64(0.9414369962055293)

# Hyperparameter tuning for Ridge


In [32]:
import numpy as np

In [33]:
params = {"alpha": np.arange(start=1, stop=60, step=0.5)}

In [34]:
params

{'alpha': array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
         6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5, 10. , 10.5, 11. , 11.5,
        12. , 12.5, 13. , 13.5, 14. , 14.5, 15. , 15.5, 16. , 16.5, 17. ,
        17.5, 18. , 18.5, 19. , 19.5, 20. , 20.5, 21. , 21.5, 22. , 22.5,
        23. , 23.5, 24. , 24.5, 25. , 25.5, 26. , 26.5, 27. , 27.5, 28. ,
        28.5, 29. , 29.5, 30. , 30.5, 31. , 31.5, 32. , 32.5, 33. , 33.5,
        34. , 34.5, 35. , 35.5, 36. , 36.5, 37. , 37.5, 38. , 38.5, 39. ,
        39.5, 40. , 40.5, 41. , 41.5, 42. , 42.5, 43. , 43.5, 44. , 44.5,
        45. , 45.5, 46. , 46.5, 47. , 47.5, 48. , 48.5, 49. , 49.5, 50. ,
        50.5, 51. , 51.5, 52. , 52.5, 53. , 53.5, 54. , 54.5, 55. , 55.5,
        56. , 56.5, 57. , 57.5, 58. , 58.5, 59. , 59.5])}

In [35]:
from sklearn.model_selection import GridSearchCV

ridge1 = Ridge()
gscv_ridge = GridSearchCV(ridge1, param_grid=params, cv=5, scoring="r2")
gscv_ridge.fit(xtrain, ytrain)

0,1,2
,estimator,Ridge()
,param_grid,"{'alpha': array([ 1. , ..., 59. , 59.5])}"
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,alpha,np.float64(4.5)
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [36]:
gscv_ridge.best_params_

{'alpha': np.float64(4.5)}

In [37]:
gscv_ridge.best_score_

np.float64(0.9422594470229775)

In [38]:
best_ridge = gscv_ridge.best_estimator_
best_ridge

0,1,2
,alpha,np.float64(4.5)
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [39]:
best_ridge.score(xtrain, ytrain)

0.9875433321291324

In [40]:
best_ridge.score(xtest, ytest)

0.9231151432599731

# Lasso Model

In [42]:
from sklearn.linear_model import Lasso

model3 = Lasso(alpha=0.1)
model3.fit(xtrain, ytrain)

  model = cd_fast.enet_coordinate_descent(


0,1,2
,alpha,0.1
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [43]:
model3.score(xtrain, ytrain)

0.9998964269230045

In [44]:
model3.score(xtest, ytest)

0.8711427635853297

# Hyperparameter tuning on lasso

In [45]:
params

{'alpha': array([ 1. ,  1.5,  2. ,  2.5,  3. ,  3.5,  4. ,  4.5,  5. ,  5.5,  6. ,
         6.5,  7. ,  7.5,  8. ,  8.5,  9. ,  9.5, 10. , 10.5, 11. , 11.5,
        12. , 12.5, 13. , 13.5, 14. , 14.5, 15. , 15.5, 16. , 16.5, 17. ,
        17.5, 18. , 18.5, 19. , 19.5, 20. , 20.5, 21. , 21.5, 22. , 22.5,
        23. , 23.5, 24. , 24.5, 25. , 25.5, 26. , 26.5, 27. , 27.5, 28. ,
        28.5, 29. , 29.5, 30. , 30.5, 31. , 31.5, 32. , 32.5, 33. , 33.5,
        34. , 34.5, 35. , 35.5, 36. , 36.5, 37. , 37.5, 38. , 38.5, 39. ,
        39.5, 40. , 40.5, 41. , 41.5, 42. , 42.5, 43. , 43.5, 44. , 44.5,
        45. , 45.5, 46. , 46.5, 47. , 47.5, 48. , 48.5, 49. , 49.5, 50. ,
        50.5, 51. , 51.5, 52. , 52.5, 53. , 53.5, 54. , 54.5, 55. , 55.5,
        56. , 56.5, 57. , 57.5, 58. , 58.5, 59. , 59.5])}

In [46]:
lasso1 = Lasso()
gscv_Lasso = GridSearchCV(lasso1, param_grid=params, cv=5, scoring="r2")
gscv_Lasso.fit(xtrain, ytrain)

0,1,2
,estimator,Lasso()
,param_grid,"{'alpha': array([ 1. , ..., 59. , 59.5])}"
,scoring,'r2'
,n_jobs,
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,alpha,np.float64(3.0)
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [47]:
gscv_Lasso.best_params_

{'alpha': np.float64(3.0)}

In [48]:
gscv_Lasso.best_score_

np.float64(0.9412601882547676)

In [49]:
best_lasso = gscv_Lasso.best_estimator_

In [50]:
best_lasso

0,1,2
,alpha,np.float64(3.0)
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [51]:
best_lasso.score(xtrain, ytrain)

0.9758175954180973

In [53]:
best_lasso.score(xtest, ytest)  # data science is all about experimenting

0.9069141669247586

# from above result i can say that Ridge model is best here because it has highest R2 score in test and cross validation

# Step 07- Evaluate best model in detail

In [54]:
from sklearn.metrics import (
    root_mean_squared_error,
    mean_absolute_error,
    mean_absolute_percentage_error,
    r2_score,
)

In [55]:
def evaluate_model(model, x, y):
    ypred = model.predict(x)

    mae = mean_absolute_error(y, ypred)
    rmse = root_mean_squared_error(y, ypred)
    mape = mean_absolute_percentage_error(y, ypred)
    r2 = r2_score(y, ypred)

    print(f"RMSE : {rmse:.2f}")
    print(f"MAE: {mae:.2f}")
    print(f"Mape: {mape: .2%}")
    print(f"r2_score: {r2:.2%}")

In [56]:
evaluate_model(best_ridge, xtrain, ytrain)

RMSE : 67.87
MAE: 51.16
Mape:  1.68%
r2_score: 98.75%


In [57]:
evaluate_model(best_ridge, xtest, ytest)

RMSE : 133.00
MAE: 106.62
Mape:  3.59%
r2_score: 92.31%


### Because both train and test results have r2score >=0.8 this is a good model can be used for out of sample prediction

# Step 8 - Out of sample prediction

In [58]:
xnew = pd.read_csv("sample.csv", na_values=["", "NA"], keep_default_na=False)

In [59]:
xnew

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Fuel.tank.capacity,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Origin,Make
0,Audi,100,Midsize,30.8,37.7,44.6,19,26,,Front,...,15.0,6,190,106,65,37,31.0,17.0,non-USA,Audi 100
1,Pontiac,Sunbird,Compact,9.4,11.1,12.8,23,31,,Front,...,15.2,5,181,101,66,39,25.0,13.0,USA,Pontiac Sunbird
2,Chevrolet,Lumina,Midsize,13.4,15.9,18.4,21,29,,Front,...,16.5,6,198,108,71,40,28.5,16.0,USA,Chevrolet Lumina
3,Mazda,RX-7,Sporty,32.5,32.5,32.5,17,25,Driver only,Rear,...,20.0,2,169,96,69,37,,,non-USA,Mazda RX-7
4,Volkswagen,Fox,Small,8.7,9.1,9.5,25,33,,Front,...,12.4,4,163,93,63,34,26.0,10.0,non-USA,Volkswagen Fox


In [60]:
pre

0,1,2
,transformers,"[('num', ...), ('cat', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'median'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,missing_values,
,strategy,'constant'
,fill_value,'NotAvail'
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,'first'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'


In [61]:
xnew_pre = pre.transform(xnew)

In [62]:
xnew_pre

Unnamed: 0,num__Min.Price,num__Price,num__Max.Price,num__MPG.city,num__MPG.highway,num__EngineSize,num__Horsepower,num__RPM,num__Rev.per.mile,num__Fuel.tank.capacity,...,cat__Make_Toyota Camry,cat__Make_Toyota Celica,cat__Make_Toyota Previa,cat__Make_Toyota Tercel,cat__Make_Volkswagen Corrado,cat__Make_Volkswagen Eurovan,cat__Make_Volkswagen Fox,cat__Make_Volkswagen Passat,cat__Make_Volvo 240,cat__Make_Volvo 850
0,1.583587,1.906114,2.082423,-0.609675,-0.588514,0.137352,0.544483,0.347784,0.398088,-0.498715,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,-0.886592,-0.875355,-0.830436,0.108939,0.358751,-0.639255,-0.652065,-0.152599,0.66097,-0.437435,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.424877,-0.373436,-0.317479,-0.250368,-0.020155,-0.445103,-0.652065,-0.152599,0.519418,-0.039115,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.779816,1.362368,0.974071,-0.968982,-0.777967,-1.318786,2.146313,2.015728,-0.026568,1.033285,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.967393,-1.084488,-1.132713,0.468246,0.737657,-0.833407,-1.211741,0.347784,0.42842,-1.295355,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [63]:
preds = best_ridge.predict(xnew_pre)

In [64]:
preds

array([3309.97245052, 2660.31856078, 3074.10243046, 3215.45814382,
       2216.38174272])

In [65]:
xnew["Weight"] = preds.round(2)

In [66]:
xnew

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,...,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Origin,Make,Weight
0,Audi,100,Midsize,30.8,37.7,44.6,19,26,,Front,...,6,190,106,65,37,31.0,17.0,non-USA,Audi 100,3309.97
1,Pontiac,Sunbird,Compact,9.4,11.1,12.8,23,31,,Front,...,5,181,101,66,39,25.0,13.0,USA,Pontiac Sunbird,2660.32
2,Chevrolet,Lumina,Midsize,13.4,15.9,18.4,21,29,,Front,...,6,198,108,71,40,28.5,16.0,USA,Chevrolet Lumina,3074.1
3,Mazda,RX-7,Sporty,32.5,32.5,32.5,17,25,Driver only,Rear,...,2,169,96,69,37,,,non-USA,Mazda RX-7,3215.46
4,Volkswagen,Fox,Small,8.7,9.1,9.5,25,33,,Front,...,4,163,93,63,34,26.0,10.0,non-USA,Volkswagen Fox,2216.38


In [68]:
xnew.to_csv("Ridgeresult.csv", index=False)

# Step 9 - Save the model objects with preprocessors

In [69]:
pre

0,1,2
,transformers,"[('num', ...), ('cat', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'median'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,missing_values,
,strategy,'constant'
,fill_value,'NotAvail'
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,'first'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'


In [70]:
best_ridge

0,1,2
,alpha,np.float64(4.5)
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [71]:
import joblib as jl

In [72]:
jl.dump(pre, "pre.joblib")

['pre.joblib']

In [73]:
jl.dump(best_ridge, "RidgeModel.joblib")

['RidgeModel.joblib']

In [74]:
p = jl.load("pre.joblib")

In [75]:
p

0,1,2
,transformers,"[('num', ...), ('cat', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'median'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,missing_values,
,strategy,'constant'
,fill_value,'NotAvail'
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,'first'
,sparse_output,False
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'


In [76]:
r = jl.load("RidgeModel.joblib")

In [77]:
r

0,1,2
,alpha,np.float64(4.5)
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,
