In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib
matplotlib.rcParams["figure.figsize"] = (20, 10)

In [2]:
df1  = pd.read_csv("datasetexcel.csv")
df1.head()

Unnamed: 0,Location,Number of Rooms,Price
0,"Jos Terminus, Ahmadu Bello Way, Jos",1,75000
1,"Rayfield Road, Jos",2,210000
2,"Rock Haven Street, Jos",3,250000
3,"Angwan Rukuba, Jos",3,130000
4,"Lamingo Road, Jos",2,220000


In [3]:
df1.shape

(49, 3)

In [4]:
dummies = pd.get_dummies(df1.Location)
dummies.head(3)

Unnamed: 0,"Angwan Rukuba, Jos","Bauchi Ring Road, Jos","Busa Buji Street, Jos","Farin Gada Road, Jos","Jos Terminus, Ahmadu Bello Way, Jos","Lamingo Road, Jos","Old Airport Road, Jos","Rayfield Road, Jos","Rock Haven Street, Jos"
0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,1


In [5]:
df2 = pd.concat([df1, dummies], axis = "columns")
df2.head(3)

Unnamed: 0,Location,Number of Rooms,Price,"Angwan Rukuba, Jos","Bauchi Ring Road, Jos","Busa Buji Street, Jos","Farin Gada Road, Jos","Jos Terminus, Ahmadu Bello Way, Jos","Lamingo Road, Jos","Old Airport Road, Jos","Rayfield Road, Jos","Rock Haven Street, Jos"
0,"Jos Terminus, Ahmadu Bello Way, Jos",1,75000,0,0,0,0,1,0,0,0,0
1,"Rayfield Road, Jos",2,210000,0,0,0,0,0,0,0,1,0
2,"Rock Haven Street, Jos",3,250000,0,0,0,0,0,0,0,0,1


In [6]:
df3 = df2.drop("Location", axis = "columns")
df3.head(3)

Unnamed: 0,Number of Rooms,Price,"Angwan Rukuba, Jos","Bauchi Ring Road, Jos","Busa Buji Street, Jos","Farin Gada Road, Jos","Jos Terminus, Ahmadu Bello Way, Jos","Lamingo Road, Jos","Old Airport Road, Jos","Rayfield Road, Jos","Rock Haven Street, Jos"
0,1,75000,0,0,0,0,1,0,0,0,0
1,2,210000,0,0,0,0,0,0,0,1,0
2,3,250000,0,0,0,0,0,0,0,0,1


In [7]:
df3.shape


(49, 11)

In [8]:
X = df3.drop("Price", axis = "columns")
X.head()

Unnamed: 0,Number of Rooms,"Angwan Rukuba, Jos","Bauchi Ring Road, Jos","Busa Buji Street, Jos","Farin Gada Road, Jos","Jos Terminus, Ahmadu Bello Way, Jos","Lamingo Road, Jos","Old Airport Road, Jos","Rayfield Road, Jos","Rock Haven Street, Jos"
0,1,0,0,0,0,1,0,0,0,0
1,2,0,0,0,0,0,0,0,1,0
2,3,0,0,0,0,0,0,0,0,1
3,3,1,0,0,0,0,0,0,0,0
4,2,0,0,0,0,0,1,0,0,0


In [9]:
y = df3.Price
y.head()

0     75000
1    210000
2    250000
3    130000
4    220000
Name: Price, dtype: int64

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state=10)

In [11]:
from sklearn.linear_model import LinearRegression
lr_clf = LinearRegression()
lr_clf.fit(X_train, y_train)
lr_clf.score(X_test, y_test)

0.8364996027710523

In [12]:
def predict_price(location, noOfRooms):
    loc_index = np.where(X.columns == location)[0][0]
    
    x = np.zeros(len(X.columns))
    x[0] = noOfRooms
    if loc_index >= 0:
        x[loc_index] = 1
        
    return lr_clf.predict([x])[0]

In [13]:
predict_price('Jos Terminus, Ahmadu Bello Way, Jos', 2)

152396.14243323446

In [14]:
predict_price('Rayfield Road, Jos', 3)

283509.6439169139

In [15]:
import pickle
with open('homegram_home_prices_model.pickle', 'wb') as f:
    pickle.dump(lr_clf, f)

In [16]:
import json
columns = {
    'data_columns' : [col.lower() for col in X.columns]
}
with open('columns.json', "w") as f:
    f.write(json.dumps(columns))