In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.dpi'] = 300

In [2]:
df = sns.load_dataset('mpg')

In [3]:
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,usa,ford mustang gl
394,44.0,4,97.0,52.0,2130,24.6,82,europe,vw pickup
395,32.0,4,135.0,84.0,2295,11.6,82,usa,dodge rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger


In [4]:
df.shape

(398, 9)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
mpg             398 non-null float64
cylinders       398 non-null int64
displacement    398 non-null float64
horsepower      392 non-null float64
weight          398 non-null int64
acceleration    398 non-null float64
model_year      398 non-null int64
origin          398 non-null object
name            398 non-null object
dtypes: float64(4), int64(3), object(2)
memory usage: 28.1+ KB


In [6]:
target = df['mpg']

In [7]:
df['horsepower'].isnull().value_counts()

False    392
True       6
Name: horsepower, dtype: int64

In [8]:
df.dropna(inplace=True)

In [9]:
df['horsepower'].isnull().value_counts()

False    392
Name: horsepower, dtype: int64

In [10]:
features = df[['horsepower','displacement','weight']]

In [11]:
features

Unnamed: 0,horsepower,displacement,weight
0,130.0,307.0,3504
1,165.0,350.0,3693
2,150.0,318.0,3436
3,150.0,304.0,3433
4,140.0,302.0,3449
...,...,...,...
393,86.0,140.0,2790
394,52.0,97.0,2130
395,84.0,135.0,2295
396,79.0,120.0,2625


In [12]:
features.isna().sum()

horsepower      0
displacement    0
weight          0
dtype: int64

In [13]:
target = df['mpg']

In [14]:
target.value_counts()

13.0    20
14.0    19
18.0    17
15.0    16
26.0    14
        ..
30.7     1
31.9     1
44.0     1
28.8     1
27.4     1
Name: mpg, Length: 127, dtype: int64

In [15]:
features.shape

(392, 3)

In [16]:
df.shape

(392, 9)

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, target)

In [18]:
X_train.shape

(294, 3)

In [19]:
y_train.shape

(294,)

In [20]:
X_test.shape

(98, 3)

In [21]:
98/392*100

25.0

In [22]:
from sklearn.linear_model import LinearRegression

In [23]:
model=LinearRegression()

In [24]:
model.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [25]:
model.coef_

array([-0.04663276, -0.01009815, -0.00455092])

In [26]:
model.intercept_

43.765793564349806

In [27]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [28]:
predict = model.predict(X_test)

In [29]:
predict[:5]

array([20.69683654, 31.22143219, 13.9510498 , 26.41188183, 27.1115263 ])

In [30]:
y_test.to_numpy()[:5]

array([19.9, 29. , 14.5, 33.5, 31.6])

In [37]:
acc = r2_score(y_test, predict)
print(f"Accuracy: {acc*100:.2f}")

Accuracy: 72.79


In [38]:
from sklearn.preprocessing import StandardScaler

In [39]:
scale_model = StandardScaler()

In [40]:
scaled_features = scale_model.fit_transform(features)

In [41]:
scaled_features = pd.DataFrame(scaled_features, columns=['horsepower','displacement','weight'])

In [42]:
scaled_features[:5]

Unnamed: 0,horsepower,displacement,weight
0,0.664133,1.07729,0.62054
1,1.574594,1.488732,0.843334
2,1.184397,1.182542,0.540382
3,1.184397,1.048584,0.536845
4,0.924265,1.029447,0.555706


In [43]:
new_model = LinearRegression()

In [44]:
xtrain, xtest, ytrain, ytest = train_test_split(scaled_features, target)

In [45]:
xtrain.head()

Unnamed: 0,horsepower,displacement,weight
316,-0.376395,-0.578047,-0.31425
111,-0.506461,-0.692868,-0.78695
144,-0.766593,-0.999058,-1.005028
21,-0.376395,-0.836395,-0.645494
285,0.872238,1.4983,1.15218


In [46]:
ytrain.head()

318    29.8
112    19.0
146    28.0
21     24.0
287    16.5
Name: mpg, dtype: float64

In [47]:
new_model.fit(xtrain, ytrain)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [48]:
pred = new_model.predict(xtest)

In [49]:
pwd

'C:\\Users\\w10\\Desktop\\internbatch\\data science'

In [50]:
import pickle
fp=open('mil.pkl','wb')
pickle.dump(new_model,fp)

In [51]:
fp.close()