In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression , Ridge, Lasso
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error,r2_score
from sklearn.model_selection import train_test_split

#with simple **train test**


##Label encoding

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/ab06025689b4d8904b6a3e330dc8012cf4316f92/data.csv")
le = LabelEncoder()
df['Car'] = le.fit_transform(df['Car'])
df['Model'] = le.fit_transform(df['Model'])


X = df[['Car'	,'Model',	'Volume',	'Weight']]
y = df['CO2']
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.8, random_state=10)

regr = LinearRegression()
regr.fit(X_train, y_train)

predictedCO2 = regr.predict(X_test)

mae = mean_absolute_error(y_test , predictedCO2)
mse = mean_squared_error(y_test , predictedCO2)
rmse = np.sqrt(mse)
r2=r2_score(y_test , predictedCO2)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')



MAE: 5.705962317776956
MSE: 44.50925876067645
RMSE: 6.671525969422322
R2: 0.3163924740380867
____________________________________________________________________________________________________


##One Hot Encoding


In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/ab06025689b4d8904b6a3e330dc8012cf4316f92/data.csv")
encoder = OneHotEncoder(sparse_output=False)

# Fitting the encoder and transforming the data
one_hot_encoded_array = encoder.fit_transform(df[['Car', 'Model']])

# The transformed data is an array, so we need to convert it back to a DataFrame
one_hot_encoded_df = pd.DataFrame(one_hot_encoded_array, columns=encoder.get_feature_names_out(['Car', 'Model']))

# Concatenating the one-hot encoded columns to the original DataFrame
df = pd.concat([df, one_hot_encoded_df], axis=1).drop(['Car', 'Model'], axis=1)
#print(df.columns)
X = df.drop(columns=['CO2'])
y = df['CO2']

X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.8, random_state=10)

regr = LinearRegression()
regr.fit(X_train, y_train)

predictedCO2 = regr.predict(X_test)

mae = mean_absolute_error(y_test , predictedCO2)
mse = mean_squared_error(y_test , predictedCO2)
rmse = np.sqrt(mse)
r2=r2_score(y_test , predictedCO2)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')


MAE: 7.004939976173425
MSE: 60.52400234879123
RMSE: 7.779717369467302
R2: 0.0704256898673773
____________________________________________________________________________________________________


#Applying Lasso

##with Label Encoder


In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/ab06025689b4d8904b6a3e330dc8012cf4316f92/data.csv")
le = LabelEncoder()
df['Car'] = le.fit_transform(df['Car'])
df['Model'] = le.fit_transform(df['Model'])


X = df[['Car'	,'Model',	'Volume',	'Weight']]
y = df['CO2']

poly = PolynomialFeatures(degree=10)
co2_poly = poly.fit_transform(X)
#X_train, X_test, y_train, y_test = train_test_split(co2_poly,y, train_size=0.8, random_state=10)


l1 = Lasso(alpha=0.5)  # alpha is the regularization parameter
l1.fit(co2_poly, y)
#regr = LinearRegression()
#regr.fit(X_train, y_train)

predictedCO2 = l1.predict(co2_poly)

mae = mean_absolute_error(y  , predictedCO2)
mse = mean_squared_error(y  , predictedCO2)
rmse = np.sqrt(mse)
r2=r2_score(y  , predictedCO2)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')



MAE: 1.3595620071902421
MSE: 4.6484805731416765
RMSE: 2.156033527833386
R2: 0.9139600562305715
____________________________________________________________________________________________________


  model = cd_fast.enet_coordinate_descent(


In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/ab06025689b4d8904b6a3e330dc8012cf4316f92/data.csv")
le = LabelEncoder()
df['Car'] = le.fit_transform(df['Car'])
df['Model'] = le.fit_transform(df['Model'])


X = df[['Car'	,'Model',	'Volume',	'Weight']]
y = df['CO2']

poly = PolynomialFeatures(degree=2)
co2_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(co2_poly,y, train_size=0.8, random_state=10)


l1 = Lasso(alpha=15)  # alpha is the regularization parameter
l1.fit(X_train, y_train)

predictedCO2 = l1.predict(X_test)

mae = mean_absolute_error(y_test  , predictedCO2)
mse = mean_squared_error(y_test , predictedCO2)
rmse = np.sqrt(mse)
r2=r2_score(y_test  , predictedCO2)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

MAE: 5.117954547576161
MSE: 39.265674509503256
RMSE: 6.266232880248168
R2: 0.39692748533520317
____________________________________________________________________________________________________


  model = cd_fast.enet_coordinate_descent(


##onehot encoder

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/ab06025689b4d8904b6a3e330dc8012cf4316f92/data.csv")
encoder = OneHotEncoder(sparse_output=False)

# Fitting the encoder and transforming the data
one_hot_encoded_array = encoder.fit_transform(df[['Car', 'Model']])

# The transformed data is an array, so we need to convert it back to a DataFrame
one_hot_encoded_df = pd.DataFrame(one_hot_encoded_array, columns=encoder.get_feature_names_out(['Car', 'Model']))

# Concatenating the one-hot encoded columns to the original DataFrame
df = pd.concat([df, one_hot_encoded_df], axis=1).drop(['Car', 'Model'], axis=1)
#print(df.columns)
X = df.drop(columns=['CO2'])
y = df['CO2']

poly = PolynomialFeatures(degree=2)
co2_poly = poly.fit_transform(X)
#X_train, X_test, y_train, y_test = train_test_split(co2_poly,y, train_size=0.8, random_state=10)


l1 = Lasso(alpha=0.1)  # alpha is the regularization parameter
l1.fit(co2_poly, y)
#regr = LinearRegression()
#regr.fit(X_train, y_train)

predictedCO2 = l1.predict(co2_poly)

mae = mean_absolute_error(y  , predictedCO2)
mse = mean_squared_error(y  , predictedCO2)
rmse = np.sqrt(mse)
r2=r2_score(y  , predictedCO2)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')


MAE: 0.0027702630082787985
MSE: 1.0896759154513885e-05
RMSE: 0.003301023955458955
R2: 0.9999997983090323
____________________________________________________________________________________________________


  model = cd_fast.enet_coordinate_descent(


In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/ab06025689b4d8904b6a3e330dc8012cf4316f92/data.csv")
encoder = OneHotEncoder(sparse_output=False)

# Fitting the encoder and transforming the data
one_hot_encoded_array = encoder.fit_transform(df[['Car', 'Model']])

# The transformed data is an array, so we need to convert it back to a DataFrame
one_hot_encoded_df = pd.DataFrame(one_hot_encoded_array, columns=encoder.get_feature_names_out(['Car', 'Model']))

# Concatenating the one-hot encoded columns to the original DataFrame
df = pd.concat([df, one_hot_encoded_df], axis=1).drop(['Car', 'Model'], axis=1)
#print(df.columns)
X = df.drop(columns=['CO2'])
y = df['CO2']

poly = PolynomialFeatures(degree=2)
co2_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(co2_poly,y, train_size=0.8, random_state=10)


l1 = Lasso(alpha=1 )  # alpha is the regularization parameter
l1.fit(X_train, y_train)
#regr = LinearRegression()
#regr.fit(X_train, y_train)

predictedCO2 = l1.predict(X_test)

mae = mean_absolute_error(y_test  , predictedCO2)
mse = mean_squared_error(y_test  , predictedCO2)
rmse = np.sqrt(mse)
r2=r2_score(y_test  , predictedCO2)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

MAE: 6.635396219117624
MSE: 70.26924995879843
RMSE: 8.38267558472821
R2: -0.07924933941999024
____________________________________________________________________________________________________


  model = cd_fast.enet_coordinate_descent(


#applying ridge

##label encoding

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/ab06025689b4d8904b6a3e330dc8012cf4316f92/data.csv")
le = LabelEncoder()
df['Car'] = le.fit_transform(df['Car'])
df['Model'] = le.fit_transform(df['Model'])


X = df[['Car'	,'Model',	'Volume',	'Weight']]
y = df['CO2']

poly = PolynomialFeatures(degree=2)
co2_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(co2_poly,y, train_size=0.8, random_state=10)


l2 = Ridge(alpha=0.1)  # alpha is the regularization parameter
l2.fit(X_train, y_train)

predictedCO2 = l2.predict(X_test)

mae = mean_absolute_error(y_test  , predictedCO2)
mse = mean_squared_error(y_test , predictedCO2)
rmse = np.sqrt(mse)
r2=r2_score(y_test  , predictedCO2)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

MAE: 11.086146485938748
MSE: 159.5469149480504
RMSE: 12.631188184333665
R2: -1.4504445780358113
____________________________________________________________________________________________________


##one hot encoding

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/ab06025689b4d8904b6a3e330dc8012cf4316f92/data.csv")
encoder = OneHotEncoder(sparse_output=False)

# Fitting the encoder and transforming the data
one_hot_encoded_array = encoder.fit_transform(df[['Car', 'Model']])

# The transformed data is an array, so we need to convert it back to a DataFrame
one_hot_encoded_df = pd.DataFrame(one_hot_encoded_array, columns=encoder.get_feature_names_out(['Car', 'Model']))

# Concatenating the one-hot encoded columns to the original DataFrame
df = pd.concat([df, one_hot_encoded_df], axis=1).drop(['Car', 'Model'], axis=1)
#print(df.columns)
X = df.drop(columns=['CO2'])
y = df['CO2']

poly = PolynomialFeatures(degree=2)
co2_poly = poly.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(co2_poly,y, train_size=0.8, random_state=10)


l2 = Ridge(alpha=1 )  # alpha is the regularization parameter
l2.fit(X_train, y_train)
#regr = LinearRegression()
#regr.fit(X_train, y_train)

predictedCO2 = l2.predict(X_test)

mae = mean_absolute_error(y_test  , predictedCO2)
mse = mean_squared_error(y_test  , predictedCO2)
rmse = np.sqrt(mse)
r2=r2_score(y_test  , predictedCO2)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

MAE: 6.1972069618007914
MSE: 48.03177506594763
RMSE: 6.930496018752743
R2: 0.26229095171090755
____________________________________________________________________________________________________
