# Linear Regression Multiple Variables

In [2]:
import pandas as pd
import numpy as np
from sklearn import linear_model

In [3]:
df = pd.read_csv('homeprices.csv')
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [4]:
df.bedrooms.median()

4.0

In [5]:
df.bedrooms = df.bedrooms.fillna(df.bedrooms.median())

In [6]:
df.bedrooms

0    3.0
1    4.0
2    4.0
3    3.0
4    5.0
5    6.0
Name: bedrooms, dtype: float64

In [7]:
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000


In [8]:
reg = linear_model.LinearRegression()
reg.fit(df.drop('price',axis='columns'),df.price)

In [10]:
# Second method
reg.fit(df[['area','bedrooms','age']],df.price)

In [11]:
reg.coef_

array([  112.06244194, 23388.88007794, -3231.71790863])

In [12]:
reg.intercept_

221323.00186540437

In [13]:
reg.predict([[3000, 3, 40]])



array([498408.25158031])

In [17]:
112.06244194*3000 + 23388.88007794*3 + -3231.71790863*40 + 221323.00186540384

498408.25157402386

In [16]:
reg.predict([[4100, 6.0, 8]])



array([795258.55102673])

In [18]:
reg.predict([[2500, 4, 5]])



array([578876.03748933])

# Exercise

In exercise folder (same level as this notebook on github) there is hiring.csv. This file contains hiring statics for a firm such as experience of candidate, his written test score and personal interview score. Based on these 3 factors, HR will decide the salary. Given this data, you need to build a machine learning model for HR department that can help them decide salaries for future candidates. Using this predict salaries for following candidates,

2 yr experience, 9 test score, 6 interview score

12 yr experience, 10 test score, 10 interview score

In [20]:
# pip install word2number

Collecting word2number
  Downloading word2number-1.1.zip (9.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py): started
  Building wheel for word2number (setup.py): finished with status 'done'
  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5589 sha256=b52c644de7257a21c1807e2d4038a708657856caddfd0a3277921958e0d8e0dc
  Stored in directory: c:\users\kp121\appdata\local\packages\pythonsoftwarefoundation.python.3.11_qbz5n2kfra8p0\localcache\local\pip\cache\wheels\cd\ef\ae\073b491b14d25e2efafcffca9e16b2ee6d114ec5c643ba4f06
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1
Note: you may need to restart the kernel to use updated packages.


In [21]:
from word2number import w2n

In [22]:
d = pd.read_csv("hiring.csv")
d

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [23]:
d.experience = d.experience.fillna('zero')

In [24]:
d

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [25]:
d.experience = d.experience.apply(w2n.word_to_num)
d

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,,7,72000
7,11,7.0,8,80000


In [28]:
import math
median_test_score = math.floor(d['test_score(out of 10)'].mean())
median_test_score

7

In [27]:
# import math
# median_test_score = math.floor(d['test_score(out of 10)'].median())
# median_test_score

8

In [29]:
d['test_score(out of 10)'] = d['test_score(out of 10)'].fillna(median_test_score)
d

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,7.0,7,72000
7,11,7.0,8,80000


In [30]:
reg = linear_model.LinearRegression()
reg.fit(d[['experience','test_score(out of 10)','interview_score(out of 10)']],d['salary($)'])

In [31]:
reg.predict([[2,9,6]])



array([53713.86677124])

In [32]:
reg.predict([[12,10,10]])



array([93747.79628651])