<a href="https://colab.research.google.com/github/mehdiabbasidev/darsman-machine-learning/blob/main/ElasticNet_Ridge_Lasso_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Data set download link:


## auto-mpg : داده ها مشخصات فنی خودروها است
* mpg:  مایل با هر گالن
* cylinders: تعداد سیلندرها
* displacement: جابجایی
* horsepower: اسب بخار
* weight: وزن
* acceleration: شتاب
* model year: سال مدل
* origin: منشأ
* car name: نام خودرو

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
import seaborn as sns
from sklearn import preprocessing
%matplotlib inline

In [None]:
df = pd.read_csv("/content/drive/MyDrive/datasets/auto-mpg.csv")
df.head()

In [None]:
df.head()
df.shape
df.columns
df.dtypes
df.duplicated().sum()
df.nunique()
df.cylinders.nunique()
df.info()
df.describe()
df.isnull().sum()
df.corr(numeric_only=True)
df['cylinders'].value_counts()
df['origin'].value_counts().to_frame()
(df['horsepower'].str.isnumeric()==False).sum()
(df['horsepower']=='?').sum()
(df=='?').sum()
df[df['horsepower']=='?']

df[df['origin']==1].shape
df[df['origin']==2].shape
df[df['origin']==3].shape

### Data Cleaning

In [None]:
df=df.replace('?',np.nan)

In [None]:
df=df.drop('car name',axis=1)
df.head()

In [None]:
df=df.apply(lambda x: x.fillna(x.median()),axis=0)

In [None]:
df["horsepower"] = df["horsepower"].astype(int)
df.dtypes

### Data Visualization

In [None]:
plt.figure(figsize=(15,8))
corr = df.corr()
mask = np.triu(corr)
sns.heatmap(corr,square=True, mask= mask, annot= True, fmt = '.2f')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
ax = sns.countplot(data = df,x = 'origin')
ax.bar_label(ax.containers[0], label_type='edge')
plt.title("origin distribution")
plt.xlabel("Origin")
plt.ylabel("Car count")
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.pointplot(x = 'model year', y = 'mpg', hue = 'origin', data = df, errorbar=None);
plt.title("model year - mpg based on origin")
plt.xlabel("model year")
plt.ylabel("mpg")
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.xlabel("year", fontsize = 15)
plt.ylabel("mpg", fontsize = 15)
sns.lineplot(x = 'model year', y = 'mpg', data = df);

In [None]:
ax = sns.countplot(x = 'cylinders', data = df, color = '#4287f5')
ax.bar_label(ax.containers[0], label_type='edge')
plt.xlabel("cylinder", fontsize = 15)
plt.ylabel("cars count", fontsize = 15)
plt.show()

In [None]:
sns.scatterplot(x = 'horsepower',y = 'mpg',  data = df)
plt.title("horsepower - mpg")
plt.xlabel("horsepower")
plt.ylabel("mpg")
plt.show()

### Create Model

In [None]:
X = df.drop('mpg', axis=1)
y = df[['mpg']]
X.shape
y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [None]:
ridge = Ridge(alpha=0.2)
ridge.fit(X_train,y_train)

In [None]:
lasso = Lasso(alpha=0.5)
lasso.fit(X_train,y_train)

In [None]:
elastic = ElasticNet(alpha=0.5,l1_ratio=0.1)
elastic.fit(X_train,y_train)

In [None]:
print(ridge.score(X_train, y_train))
print(ridge.score(X_test, y_test))

In [None]:
print(lasso.score(X_train, y_train))
print(lasso.score(X_test, y_test))

In [None]:
print(elastic.score(X_train, y_train))
print(elastic.score(X_test, y_test))