# TASK 4: " Sales Prediction"

### Use the advertising dataset given and analyse the relationship between `'TV advertising'` and `'sales'` using a 
`regression model.`

In [1]:
# 1 Getting the data ready
import pandas as pd
import numpy as np

advertising_data = pd.read_csv("advertising_data.csv")
advertising_data

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,14.0
197,177.0,9.3,6.4,14.8
198,283.6,42.0,66.2,25.5


In [2]:
advertising_data.shape

(200, 4)

In [3]:
advertising_data.describe

<bound method NDFrame.describe of         TV  Radio  Newspaper  Sales
0    230.1   37.8       69.2   22.1
1     44.5   39.3       45.1   10.4
2     17.2   45.9       69.3   12.0
3    151.5   41.3       58.5   16.5
4    180.8   10.8       58.4   17.9
..     ...    ...        ...    ...
195   38.2    3.7       13.8    7.6
196   94.2    4.9        8.1   14.0
197  177.0    9.3        6.4   14.8
198  283.6   42.0       66.2   25.5
199  232.1    8.6        8.7   18.4

[200 rows x 4 columns]>

In [4]:
# Checking for any missing data values
advertising_data.isna().sum() # No missing values in our data

TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64

In [5]:
advertising_data.dtypes

TV           float64
Radio        float64
Newspaper    float64
Sales        float64
dtype: object

In [6]:
advertising_data = advertising_data.drop("Newspaper",axis=1)

In [7]:
advertising_data = advertising_data.drop("Radio",axis=1)
advertising_data

Unnamed: 0,TV,Sales
0,230.1,22.1
1,44.5,10.4
2,17.2,12.0
3,151.5,16.5
4,180.8,17.9
...,...,...
195,38.2,7.6
196,94.2,14.0
197,177.0,14.8
198,283.6,25.5


In [8]:
# Choosing the ML estimator/ Model for above data
from sklearn.linear_model import Ridge
model = Ridge()


#Split into x/y variables
x = advertising_data.drop("Sales" , axis=1)
y = advertising_data["Sales"]

# Train Test Split
np.random.seed(42)
from sklearn.model_selection import train_test_split
x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2)

# Fitting our model
model.fit(x_train,y_train)
score = model.score(x_test , y_test)
print(f"Performance score of this Ridge estimator in percentage is = {score*100} % ")

Performance score of this Ridge estimator in percentage is = 80.25613038860038 % 


### Predicting `Sales` using predict function

In [9]:
y_preds = model.predict(x_test)
y_preds[:5]

array([16.06747252, 17.84847345, 23.25805571,  7.65626565, 19.22999754])

In [10]:
df = pd.DataFrame(data={"Actual values":y_test , "Predicted values":y_preds})
df["Difference"] = df["Predicted values"] - df["Actual values"]
df.head()

Unnamed: 0,Actual values,Predicted values,Difference
95,16.9,16.067473,-0.832527
15,22.4,17.848473,-4.551527
30,21.4,23.258056,1.858056
158,7.3,7.656266,0.356266
128,24.7,19.229998,-5.470002


### Evaluating our model using evaluation metrics

In [11]:
from sklearn.metrics import r2_score , mean_absolute_error , mean_squared_error
np.random.seed(42)

# Evaluate using metrics
print("Regression metrics on the test set are : ")
print(f"R2 Score is = {r2_score(y_test , y_preds)}")
print(f"Mean Absolute Error is = {mean_absolute_error(y_test , y_preds)}")
print(f"Mean Squared Error is = {mean_squared_error(y_test , y_preds)}")
print("  ")

Regression metrics on the test set are : 
R2 Score is = 0.8025613038860039
Mean Absolute Error is = 1.9502947852927242
Mean Squared Error is = 6.1010728924882045
  
