In [63]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline  import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_percentage_error

import joblib

In [9]:
data = pd.read_csv('linear_regression_data.csv')

In [17]:
data.shape

(100, 2)

In [19]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       100 non-null    float64
 1   y       100 non-null    float64
dtypes: float64(2)
memory usage: 1.7 KB


In [33]:
X = data[['X']]

In [35]:
y = data['y']

In [37]:
train_X, test_X, train_y, test_y =  train_test_split(X,y, test_size=0.2, random_state=98)

In [39]:
pipeline = Pipeline(steps = [ 
    ('scaler', StandardScaler()),
    ('model', LinearRegression())]
)

In [41]:
pipeline.fit(train_X, train_y)

In [49]:
train_pred = pipeline.predict(train_X)
test_pred = pipeline.predict(test_X)

In [51]:
train_pred

array([8.44914413, 4.2022399 , 4.28577711, 5.25855125, 8.47056576,
       4.42244813, 6.91790666, 4.71331155, 5.72970025, 6.15969492,
       7.45719491, 7.84601821, 6.61273661, 4.95319141, 9.44435546,
       4.52411374, 8.95845813, 5.90038968, 7.02360132, 6.24884485,
       4.42923168, 5.11735494, 5.2790214 , 5.97592321, 9.64497192,
       6.91024578, 7.08195718, 9.09217044, 4.84844811, 4.53239539,
       6.00680211, 5.18879065, 4.66240065, 6.70101635, 8.62066585,
       8.78841083, 8.9754997 , 7.28450443, 4.31259039, 8.45454836,
       9.52709043, 5.60682937, 8.65505361, 7.07073411, 6.32725079,
       5.78679514, 9.284406  , 5.67653838, 8.52626585, 9.13440169,
       9.21478138, 9.54901186, 7.5054483 , 7.54110874, 8.09190419,
       5.89634795, 8.09864398, 6.56721726, 5.97510789, 4.49375038,
       8.39099179, 4.81423335, 7.49182419, 7.70756126, 4.94525848,
       8.21807114, 8.6942316 , 8.44746275, 6.20348744, 8.23131388,
       7.96642782, 5.2737077 , 4.58507793, 7.181452  , 6.79055

In [59]:
print("Train MAPE:",mean_absolute_percentage_error(train_y,train_pred ))
print("Train R2",r2_score(train_y,train_pred ))

Train MAPE: 0.11835536178081799
Train R2 0.7644057864152756


In [61]:
print("Test MAPE:",mean_absolute_percentage_error(test_y,test_pred ))
print("Test R2",r2_score(test_y,test_pred  ))

Test MAPE: 0.08787710194652996
Test R2 0.7902006549427103


In [67]:
joblib.dump(pipeline, 'pipline_lr_deploy.pkl' )

['pipline_lr_deploy.pkl']

In [71]:
pipeline.named_steps['model'].intercept_

6.822655623615527

In [75]:
pipeline.named_steps['model'].coef_[0]

1.6648523735234784