In [1]:
import pandas as pd      
import numpy as np 
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle

In [2]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    mse = mean_squared_error(actual, pred)
    score = r2_score(actual, pred)
    return print(" r2_score:", score, "\n","mae:", mae, "\n","mse:",mse, "\n","rmse:",rmse)

In [3]:
df = pd.read_csv("Advertising.csv")
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   radio      200 non-null    float64
 2   newspaper  200 non-null    float64
 3   sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [5]:
X= df.drop("sales", axis=1)
y= df["sales"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
filename = 'my_model'
pickle.dump(model, open(filename, 'wb'))

In [6]:
df.describe()

Unnamed: 0,TV,radio,newspaper,sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,14.0225
std,85.854236,14.846809,21.778621,5.217457
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,10.375
50%,149.75,22.9,25.75,12.9
75%,218.825,36.525,45.1,17.4
max,296.4,49.6,114.0,27.0


In [7]:
y_pred = model.predict(X_test)
eval_metrics(y_test, y_pred)

 r2_score: 0.8609466508230368 
 mae: 1.5116692224549084 
 mse: 3.79679723671522 
 rmse: 1.9485372043446387


**deployment**

In [8]:
final_model = pickle.load(open('my_model', 'rb'))

In [9]:
columns=list(X.columns)
columns

['TV', 'radio', 'newspaper']

In [10]:
my_dict = {
    "TV": 150,
    "radio": 25,
    "newspaper": 30,
}

In [11]:
df = pd.DataFrame.from_dict([my_dict])

In [12]:
df

Unnamed: 0,TV,radio,newspaper
0,150,25,30


In [13]:
prediction = final_model.predict(df)
print(prediction)

[14.50650223]


In [14]:
print("The estimated value of sales is {}. ".format(int(prediction[0])))

The estimated value of sales is 14. 


In [15]:
conda env list

# conda environments:
#
base                     C:\Users\serda\anaconda3
DA                       C:\Users\serda\anaconda3\envs\DA
DL                       C:\Users\serda\anaconda3\envs\DL
ML                    *  C:\Users\serda\anaconda3\envs\ML
DL                       c:\Users\serda\anaconda3\envs\DL


Note: you may need to restart the kernel to use updated packages.


In [16]:
pip list

Package                      Version
---------------------------- ---------------
absl-py                      2.1.0
annotated-types              0.6.0
anyascii                     0.3.2
asttokens                    2.2.1
astunparse                   1.6.3
attrs                        23.2.0
autocorrect                  2.6.1
backcall                     0.2.0
beartype                     0.17.2
Bottleneck                   1.3.7
certifi                      2024.2.2
charset-normalizer           3.3.2
chart-studio                 1.1.0
click                        8.1.7
colorama                     0.4.6
colorlover                   0.3.0
comm                         0.1.4
contourpy                    1.2.0
contractions                 0.1.73
cufflinks                    0.17.3
cycler                       0.11.0
daal4py                      2023.1.1
debugpy                      1.6.7.post1
decorator                    5.1.1
et-xmlfile                   1.1.0
executing                 

In [18]:
pip install -r requirements.txt

Collecting scikit-learn==1.2.2 (from -r requirements.txt (line 1))
  Using cached scikit_learn-1.2.2-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting streamlit==1.22.0 (from -r requirements.txt (line 2))
  Using cached streamlit-1.22.0-py2.py3-none-any.whl.metadata (7.3 kB)
Collecting altair<5,>=3.2.0 (from streamlit==1.22.0->-r requirements.txt (line 2))
  Using cached altair-4.2.2-py3-none-any.whl.metadata (13 kB)
Collecting blinker>=1.0.0 (from streamlit==1.22.0->-r requirements.txt (line 2))
  Using cached blinker-1.8.1-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools>=4.0 (from streamlit==1.22.0->-r requirements.txt (line 2))
  Using cached cachetools-5.3.3-py3-none-any.whl.metadata (5.3 kB)
Collecting protobuf<4,>=3.12 (from streamlit==1.22.0->-r requirements.txt (line 2))
  Using cached protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Collecting pympler>=0.9 (from streamlit==1.22.0->-r requirements.txt (line 2))
  Using cached Pympler-1.0.1-py3-none-any.whl

In [19]:
pip list

Package                      Version
---------------------------- ---------------
absl-py                      2.1.0
altair                       4.2.2
annotated-types              0.6.0
anyascii                     0.3.2
asttokens                    2.2.1
astunparse                   1.6.3
attrs                        23.2.0
autocorrect                  2.6.1
backcall                     0.2.0
beartype                     0.17.2
blinker                      1.8.1
Bottleneck                   1.3.7
cachetools                   5.3.3
certifi                      2024.2.2
charset-normalizer           3.3.2
chart-studio                 1.1.0
click                        8.1.7
colorama                     0.4.6
colorlover                   0.3.0
comm                         0.1.4
contourpy                    1.2.0
contractions                 0.1.73
cufflinks                    0.17.3
cycler                       0.11.0
daal4py                      2023.1.1
debugpy                      1.6

In [1]:
!streamlit run my_app.py

^C
