<a href="https://colab.research.google.com/github/mehdiabbasidev/darsman-machine-learning/blob/main/Ridge_Lasso_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Data set download link:
https://drive.google.com/file/d/1YYUpJx5_xttmji1VEV6XFaSdkaNd8NDf/view?usp=sharing

## auto-mpg : داده ها مشخصات فنی خودروها است
* mpg:  مایل با هر گالن
* cylinders: تعداد سیلندرها
* displacement: جابجایی
* horsepower: اسب بخار
* weight: وزن
* acceleration: شتاب
* model year: سال مدل
* origin: منشأ
* car name: نام خودرو

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
import seaborn as sns
from sklearn import preprocessing
%matplotlib inline

In [None]:
df = pd.read_csv("/content/drive/MyDrive/datasets/auto-mpg.csv")
df.head()

In [None]:
df.head()
df.shape
df.columns
df.dtypes
df.duplicated().sum()
df.nunique()
df.cylinders.nunique()
df.info()
df.describe()
df.isnull().sum()
df.corr(numeric_only=True)
df['cylinders'].value_counts()
df['origin'].value_counts().to_frame()
(df['horsepower'].str.isnumeric()==False).sum()
(df['horsepower']=='?').sum()
(df=='?').sum()
df[df['horsepower']=='?']

df[df['origin']==1].shape
df[df['origin']==2].shape
df[df['origin']==3].shape

(79, 9)

### Data Cleaning

In [None]:
df=df.replace('?',np.nan)

In [None]:
df=df.drop('car name',axis=1)
df.head()

In [None]:
df=df.apply(lambda x: x.fillna(x.median()),axis=0)

In [None]:
df["horsepower"] = df["horsepower"].astype(int)
df.dtypes

### Data Visualization

In [None]:
plt.figure(figsize=(15,8))
corr = df.corr()
mask = np.triu(corr)
sns.heatmap(corr,square=True, mask= mask, annot= True, fmt = '.2f')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
ax = sns.countplot(data = df,x = 'origin')
ax.bar_label(ax.containers[0], label_type='edge')
plt.title("origin distribution")
plt.xlabel("Origin")
plt.ylabel("Car count")
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.pointplot(x = 'model year', y = 'mpg', hue = 'origin', data = df, errorbar=None);
plt.title("model year - mpg based on origin")
plt.xlabel("model year")
plt.ylabel("mpg")
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.xlabel("year", fontsize = 15)
plt.ylabel("mpg", fontsize = 15)
sns.lineplot(x = 'model year', y = 'mpg', data = df);

In [None]:
ax = sns.countplot(x = 'cylinders', data = df, color = '#4287f5')
ax.bar_label(ax.containers[0], label_type='edge')
plt.xlabel("cylinder", fontsize = 15)
plt.ylabel("cars count", fontsize = 15)
plt.show()

In [None]:
sns.scatterplot(x = 'horsepower',y = 'mpg',  data = df)
plt.title("horsepower - mpg")
plt.xlabel("horsepower")
plt.ylabel("mpg")
plt.show()

### Create Model

In [None]:
X = df.drop('mpg', axis=1)
y = df[['mpg']]
X.shape
y.shape

(398, 1)

In [None]:
# X_scaled = preprocessing.scale(X)
# X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

# y_scaled = preprocessing.scale(y)
# y_scaled = pd.DataFrame(y_scaled, columns=y.columns)

# X_scaled.shape
# y_scaled.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
for i, col_name in enumerate(X_train.columns):
    print(f"Regression model coefficient for {col_name} =>\t\t\t{model.coef_[0][i]}")

print(f"intercept => {model.intercept_[0]}")

Regression model coefficient for cylinders =>			-0.27839835574965366
Regression model coefficient for displacement =>			0.017945885956041005
Regression model coefficient for horsepower =>			-0.015941309405825124
Regression model coefficient for weight =>			-0.006708749803368989
Regression model coefficient for acceleration =>			0.10799192624266944
Regression model coefficient for model year =>			0.7936838683224272
Regression model coefficient for origin =>			1.3343753006382393
intercept => -20.963138663631067


In [None]:
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))

0.8102080061039307
0.8641192964254736


In [None]:
ridge = Ridge(alpha=0.2)
ridge.fit(X_train,y_train)

In [None]:
ridge.coef_
for i,col in enumerate(X_train.columns):
    print (f"Ridge model coefficients for {col} =>\t\t {ridge.coef_[0][i]}")

Ridge model coefficients for cylinders =>		 -0.27738245215348023
Ridge model coefficients for displacement =>		 0.01791295987228325
Ridge model coefficients for horsepower =>		 -0.015921341983330867
Ridge model coefficients for weight =>		 -0.006709063358038304
Ridge model coefficients for acceleration =>		 0.10792099178213559
Ridge model coefficients for model year =>		 0.7936495848410888
Ridge model coefficients for origin =>		 1.332005770919568


In [None]:
lasso = Lasso(alpha=0.5)
lasso.fit(X_train,y_train)


In [None]:
lasso.coef_
for i,col in enumerate(X_train.columns):
    print (f"Lasso model coefficients for {col} =>\t\t {lasso.coef_[i]}")

In [None]:
print(ridge.score(X_train, y_train))
print(ridge.score(X_test, y_test))

0.8102079709043211
0.8641032432221173


In [None]:
print(lasso.score(X_train, y_train))
print(lasso.score(X_test, y_test))

0.7990205472990484
0.8457778392190498


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
import numpy as np
X = df.drop('mpg', axis=1)
y = df[['mpg']]

alphas = [round(num, 2) for num in np.arange(0.1, 1, 0.05)]
for a in alphas:
    lasso = Lasso(alpha=a)
    lasso.fit(X_train,y_train)
    print(lasso.score(X_train, y_train))
    print(lasso.score(X_test, y_test))
    print("------------------")


0.8093625228398579
0.8602489000629899
------------------
0.8087050701175166
0.8591231755577021
------------------
0.8077846671661588
0.8576822416694705
------------------
0.8066013129215989
0.855925954808505
------------------
0.8051549123930659
0.8538544980289264
------------------
0.8036436847475892
0.8517245565270893
------------------
0.801915800119057
0.8493614526223694
------------------
0.7999575503790766
0.8467482252419274
------------------
0.7990205472990484
0.8457778392190498
------------------
0.7989424580228661
0.8461261254188808
------------------
0.7988570455154049
0.8464662252894531
------------------
0.7987640823984185
0.8467971088512196
------------------
0.798663680642399
0.8471193121051721
------------------
0.7985560106587499
0.847433507475465
------------------
0.7984407402426437
0.847738314974301
------------------
0.7983180292256133
0.848034396931177
------------------
0.7981878811500985
0.8483217815635349
------------------
0.7980502939416304
0.8486004551665924