In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('online.csv')

In [3]:
df

Unnamed: 0,Marketing Spend,Administration,Transport,Area,Profit
0,114523.61,136897.8,471784.1,Dhaka,192261.83
1,162597.7,151377.59,443898.53,Ctg,191792.06
2,153441.51,101145.55,407934.54,Rangpur,191050.39
3,144372.41,118671.85,383199.62,Dhaka,182901.99
4,142107.34,91391.77,366168.42,Rangpur,166187.94
5,131876.9,99814.71,362861.36,Dhaka,156991.12
6,134615.46,147198.87,127716.82,Ctg,156122.51
7,130298.13,145530.06,323876.68,Rangpur,155752.6
8,120542.52,148718.95,311613.29,Dhaka,152211.77
9,123334.88,108679.17,304981.62,Ctg,149759.96


# Shape

In [4]:
df.shape

(50, 5)

# Checking Null

In [5]:
df.isnull().sum()

Marketing Spend    0
Administration     0
Transport          0
Area               0
Profit             0
dtype: int64

# Separate X and Y

In [6]:
x = df.drop(['Profit'], axis=1)

In [7]:
x

Unnamed: 0,Marketing Spend,Administration,Transport,Area
0,114523.61,136897.8,471784.1,Dhaka
1,162597.7,151377.59,443898.53,Ctg
2,153441.51,101145.55,407934.54,Rangpur
3,144372.41,118671.85,383199.62,Dhaka
4,142107.34,91391.77,366168.42,Rangpur
5,131876.9,99814.71,362861.36,Dhaka
6,134615.46,147198.87,127716.82,Ctg
7,130298.13,145530.06,323876.68,Rangpur
8,120542.52,148718.95,311613.29,Dhaka
9,123334.88,108679.17,304981.62,Ctg


In [8]:
y = df['Profit']

In [9]:
y

0     192261.83
1     191792.06
2     191050.39
3     182901.99
4     166187.94
5     156991.12
6     156122.51
7     155752.60
8     152211.77
9     149759.96
10    146121.95
11    144259.40
12    141585.52
13    134307.35
14    132602.65
15    129917.04
16    126992.93
17    125370.37
18    124266.90
19    122776.86
20    118474.03
21    111313.02
22    110352.25
23    108733.99
24    108552.04
25    107404.34
26    105733.54
27    105008.31
28    103282.38
29    101004.64
30     99937.59
31     97483.56
32     97427.84
33     96778.92
34     96712.80
35     96479.51
36     90708.19
37     89949.14
38     81229.06
39     81005.76
40     78239.91
41     77798.83
42     71498.49
43     69758.98
44     65200.33
45     64926.08
46     49490.75
47     42559.73
48     35673.41
49     14681.40
Name: Profit, dtype: float64

# One Hot Encoding

In [10]:
# convert the colum into categorical colums
city = pd.get_dummies(x['Area'],drop_first=True)

In [11]:
city

Unnamed: 0,Dhaka,Rangpur
0,1,0
1,0,0
2,0,1
3,1,0
4,0,1
5,1,0
6,0,0
7,0,1
8,1,0
9,0,0


In [12]:
x = x.drop('Area',axis=1)

In [13]:
x

Unnamed: 0,Marketing Spend,Administration,Transport
0,114523.61,136897.8,471784.1
1,162597.7,151377.59,443898.53
2,153441.51,101145.55,407934.54
3,144372.41,118671.85,383199.62
4,142107.34,91391.77,366168.42
5,131876.9,99814.71,362861.36
6,134615.46,147198.87,127716.82
7,130298.13,145530.06,323876.68
8,120542.52,148718.95,311613.29
9,123334.88,108679.17,304981.62


# Concatation Dataset

In [14]:
x = pd.concat([x,city],axis=1)

In [15]:
x

Unnamed: 0,Marketing Spend,Administration,Transport,Dhaka,Rangpur
0,114523.61,136897.8,471784.1,1,0
1,162597.7,151377.59,443898.53,0,0
2,153441.51,101145.55,407934.54,0,1
3,144372.41,118671.85,383199.62,1,0
4,142107.34,91391.77,366168.42,0,1
5,131876.9,99814.71,362861.36,1,0
6,134615.46,147198.87,127716.82,0,0
7,130298.13,145530.06,323876.68,0,1
8,120542.52,148718.95,311613.29,1,0
9,123334.88,108679.17,304981.62,0,0


In [16]:
from sklearn.model_selection import train_test_split

In [17]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

In [18]:
from sklearn.linear_model import LinearRegression

In [19]:
reg = LinearRegression()

In [20]:
reg.fit(xtrain,ytrain)

LinearRegression()

In [21]:
ytest

28    103282.38
11    144259.40
10    146121.95
41     77798.83
2     191050.39
27    105008.31
38     81229.06
31     97483.56
22    110352.25
4     166187.94
33     96778.92
35     96479.51
26    105733.54
Name: Profit, dtype: float64

In [22]:
predict = reg.predict(xtest)

In [23]:
predict

array([103501.0825284 , 128011.28068627, 126695.43891127,  70573.91718775,
       173381.96874259, 124238.07860872,  69298.09250304,  98399.41936876,
       116419.1480864 , 161430.98134847,  94740.73303076,  89920.22800514,
       105956.86065332])

In [27]:
reg.score(xtest,ytest)

0.8840978623923471

# R-Squared Value

In [24]:
from sklearn.metrics import r2_score

In [25]:
score = r2_score(ytest, predict)

In [26]:
score

0.8840978623923471