In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv("/content/drive/MyDrive/datasets/advertising.csv")
df

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,14.0
197,177.0,9.3,6.4,14.8
198,283.6,42.0,66.2,25.5


In [4]:
df.isna().sum()

TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64

In [5]:
df.shape

(200, 4)

In [6]:
x = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [7]:
ones_column = np.ones((x.shape[0], 1))
x = np.hstack((ones_column, x))

In [8]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)
x_train

array([[  1. , 116. ,   7.7,  23.1],
       [  1. , 177. ,   9.3,   6.4],
       [  1. ,  43.1,  26.7,  35.1],
       [  1. ,  62.3,  12.6,  18.3],
       [  1. , 224. ,   2.4,  15.6],
       [  1. ,  38.2,   3.7,  13.8],
       [  1. ,  70.6,  16. ,  40.8],
       [  1. , 147.3,  23.9,  19.1],
       [  1. , 104.6,   5.7,  34.4],
       [  1. ,  76.3,  27.5,  16. ],
       [  1. ,  78.2,  46.8,  34.5],
       [  1. , 168.4,   7.1,  12.8],
       [  1. ,   8.7,  48.9,  75. ],
       [  1. ,   7.8,  38.9,  50.6],
       [  1. ,  76.4,   0.8,  14.8],
       [  1. , 129.4,   5.7,  31.3],
       [  1. ,  73.4,  17. ,  12.9],
       [  1. , 289.7,  42.3,  51.2],
       [  1. ,  19.6,  20.1,  17. ],
       [  1. , 197.6,   3.5,   5.9],
       [  1. , 284.3,  10.6,   6.4],
       [  1. , 184.9,  21. ,  22. ],
       [  1. , 112.9,  17.4,  38.6],
       [  1. ,  23.8,  35.1,  65.9],
       [  1. , 290.7,   4.1,   8.5],
       [  1. ,  19.4,  16. ,  22.3],
       [  1. , 293.6,  27.7,   1.8],
 

In [9]:
x_train.shape

(160, 4)

In [10]:
def linear_regression(X,y,learning_rate,epochs):
  m, n =X.shape    # 160 4
  weights = np.zeros(n)  #[0,0,0,0]
  cost_list = []

  for i in range(epochs):
    predictions = np.dot(X, weights)
    error = predictions - y
    gradient = np.dot(X.T,error)/m
    weights -= learning_rate*gradient

    cost = (1/(2*m)) * np.sum(error**2)
    cost_list.append(cost)
  return weights, cost_list


In [11]:
learning_rate = 0.00001
epochs = 20000
weights, cost_list = linear_regression(x_train, y_train, learning_rate, epochs)
y_pred = np.dot(x_test, weights)

mse = np.mean((y_test - y_pred)**2)
rmse = np.sqrt(mse)
mae = np.mean(np.abs(y_test-y_pred))
variance = np.sum((y_test - np.mean(y_test))**2)
r2_score = 1-mse/variance


In [12]:
print("Mean Absolute erroe is ",mse)
print("Root mean squared erroe is ",rmse)
print("Mean absolute error is :",mae)
print("R2_score is: ",r2_score)

Mean Absolute erroe is  6.210207386055659
Root mean squared erroe is  2.492028769106741
Mean absolute error is : 1.9533916578030923
R2_score is:  0.9949757392244486


In [13]:
df1 = pd.DataFrame({
    "Actual value":y_test,
    "Predicted value":y_pred,
    "Erroe":y_test-y_pred
})

df1

Unnamed: 0,Actual value,Predicted value,Erroe
0,16.9,17.712376,-0.812376
1,22.4,22.234407,0.165593
2,21.4,25.607045,-4.207045
3,7.3,7.987801,-0.687801
4,24.7,22.274733,2.425267
5,12.6,12.258483,0.341517
6,22.3,22.174635,0.125365
7,8.4,5.877034,2.522966
8,16.5,16.100445,0.399555
9,16.1,16.387352,-0.287352
