In [1]:
%matplotlib
import matplotlib.pyplot as plt

Using matplotlib backend: module://matplotlib_inline.backend_inline


In [2]:
import pandas as pd
import numpy as np
from sklearn import linear_model

In [3]:
df = pd.DataFrame({"area":[2600,3000,3200,3600,4000], "bedrooms":[3,4,None,3,5], "age":[20,15,18,30,8], "price":[550000,565000,610000,595000,760000]})

In [4]:
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


### Handling Missing Values (Data Preprocessing)

We can take the median of the column and add it

In [7]:
import math
median_bedrooms = math.floor(df.bedrooms.median())
median_bedrooms

3

In [9]:
df.bedrooms = df.bedrooms.fillna(median_bedrooms)

In [10]:
df

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,3.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000


### Training the Model (Creating Linear Regression Object)

In [11]:
reg = linear_model.LinearRegression()
reg.fit(df[["area","bedrooms","age"]], df.price)

In [13]:
reg.coef_

array([   137.25, -26025.  ,  -6825.  ])

This is basically m1, m2, m3

In [14]:
reg.intercept_

np.float64(383724.9999999998)

In [20]:
reg.predict([[3000, 3, 40]])



array([444400.])

In [21]:
reg.predict([[2500,4,5]])



array([588625.])

## Excercise (Hiring Prediction)

In [59]:
hdf = pd.read_csv("hiring.csv")

In [60]:
hdf

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [25]:
!pip install word2number

Collecting word2number
  Downloading word2number-1.1.zip (9.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py): started
  Building wheel for word2number (setup.py): finished with status 'done'
  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5590 sha256=91537fa49bc2a0a3cb7e86e4b1d81181208706e5dcb0b10815a8a1baef255e9f
  Stored in directory: c:\users\ryave\appdata\local\pip\cache\wheels\5b\79\fb\d25928e599c7e11fe4e00d32048cd74933f34a74c633d2aea6
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1


In [26]:
from word2number import w2n

### To change the cell value using iloc

In [61]:
hdf.iloc[0,0] = "zero"

In [62]:
hdf.iloc[1,0] = "zero"

In [63]:
hdf

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [64]:
testscore_mean = math.floor(hdf["test_score(out of 10)"].mean())

In [65]:
hdf["test_score(out of 10)"] = hdf["test_score(out of 10)"].fillna(testscore_mean)

In [66]:
hdf

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,7.0,7,72000
7,eleven,7.0,8,80000


In [67]:
hdf.experience = hdf.experience.apply(w2n.word_to_num)

In [47]:
hdf

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


In [68]:
hreg = linear_model.LinearRegression()

In [69]:
hreg.fit(hdf[["experience", "test_score(out of 10)", "interview_score(out of 10)"]], hdf["salary($)"])

In [70]:
hreg.predict([[2,9,6]])



array([53713.86677124])

In [71]:
hreg.predict([[12,10,10]])



array([93747.79628651])