# To find the price of the house when the location and area in sqft is given.

In [None]:
import pandas as pd

In [None]:
df=pd.read_csv('/content/multilocation - Sheet1.csv')
df

Unnamed: 0,area,sqft,price
0,R.S.Puram,850,8500000
1,R.S.Puram,1000,9200000
2,R.S.Puram,1100,9500000
3,R.S.Puram,1150,9650000
4,R.S.Puram,1250,9800000
5,R.S.Puram,1500,10250000
6,Thudiyalur,850,3300000
7,Thudiyalur,1000,3700000
8,Thudiyalur,1100,3850000
9,Thudiyalur,1150,3945000


#One Hot Encoding technique 
Since we are having more than 1 location data we cannot directly train it! So we need to convert the cities/area into a numerical value. But we cannot assign like City1 - 1, City2 - 2 like categorizing. Because ML will understand that as direct value and not as a category! So we are going to use a technique called as One Hot Encoding!

One Hot Encoding ( OHE ) technique will create a dummy values for the respective columns and row values.

For example: For area column in this data set we are going to use OHE and create the following thing



In [None]:
#create a variable to store a dummy valeues
dummies=pd.get_dummies(df.area)
dummies

Unnamed: 0,R.S.Puram,Saibaba Colony,Thudiyalur,Vadavalli,saravanampatti
0,1,0,0,0,0
1,1,0,0,0,0
2,1,0,0,0,0
3,1,0,0,0,0
4,1,0,0,0,0
5,1,0,0,0,0
6,0,0,1,0,0
7,0,0,1,0,0
8,0,0,1,0,0
9,0,0,1,0,0


In [None]:
mergeddf=pd.concat([df,dummies],axis='columns')
mergeddf

Unnamed: 0,area,sqft,price,R.S.Puram,Saibaba Colony,Thudiyalur,Vadavalli,saravanampatti
0,R.S.Puram,850,8500000,1,0,0,0,0
1,R.S.Puram,1000,9200000,1,0,0,0,0
2,R.S.Puram,1100,9500000,1,0,0,0,0
3,R.S.Puram,1150,9650000,1,0,0,0,0
4,R.S.Puram,1250,9800000,1,0,0,0,0
5,R.S.Puram,1500,10250000,1,0,0,0,0
6,Thudiyalur,850,3300000,0,0,1,0,0
7,Thudiyalur,1000,3700000,0,0,1,0,0
8,Thudiyalur,1100,3850000,0,0,1,0,0
9,Thudiyalur,1150,3945000,0,0,1,0,0


In [None]:
mergeddf

Unnamed: 0,area,sqft,price,R.S.Puram,Saibaba Colony,Thudiyalur,Vadavalli,saravanampatti
0,R.S.Puram,850,8500000,1,0,0,0,0
1,R.S.Puram,1000,9200000,1,0,0,0,0
2,R.S.Puram,1100,9500000,1,0,0,0,0
3,R.S.Puram,1150,9650000,1,0,0,0,0
4,R.S.Puram,1250,9800000,1,0,0,0,0
5,R.S.Puram,1500,10250000,1,0,0,0,0
6,Thudiyalur,850,3300000,0,0,1,0,0
7,Thudiyalur,1000,3700000,0,0,1,0,0
8,Thudiyalur,1100,3850000,0,0,1,0,0
9,Thudiyalur,1150,3945000,0,0,1,0,0


In [None]:
#dimensionality reduction
finaldf=mergeddf.drop(['area','Saibaba Colony'],axis=1)
finaldf

Unnamed: 0,sqft,price,R.S.Puram,Thudiyalur,Vadavalli,saravanampatti
0,850,8500000,1,0,0,0
1,1000,9200000,1,0,0,0
2,1100,9500000,1,0,0,0
3,1150,9650000,1,0,0,0
4,1250,9800000,1,0,0,0
5,1500,10250000,1,0,0,0
6,850,3300000,0,1,0,0
7,1000,3700000,0,1,0,0
8,1100,3850000,0,1,0,0
9,1150,3945000,0,1,0,0


In [None]:
X=finaldf.drop('price',axis=1)

In [None]:
len(X.columns)

5

In [None]:
y=finaldf.price

#Building the Machine Learning model 

In [None]:
from sklearn import linear_model

In [None]:
reg=linear_model.LinearRegression()

In [None]:
reg.fit(X,y)

LinearRegression()

In [None]:
reg.predict([[2000,0,1,0,0]])



array([6288310.00562114])

In [None]:
reg.predict([[2000,0,0,0,1]])



array([6668310.00562114])

In [None]:
reg.predict([[2000,0,0,0,0]])



array([10872476.6722878])

#Creating predictive System

In [None]:
import numpy as np

In [None]:
def predictprice(location,sqft):
    # we have to write the logic so that will get like below format
    #reg.predict([[2000,0,0,0,0]])
    
    #need to create a numpy array with 0, for X length x=[0,0,0,0,0]
    #x[0]=sqft --> x=[sqft,0,0,0,0]
    #we have to find out the index of that particular location in the X value iin the variable locindex
    #locindex=3
    #x[3]=1 x=[sqft,0,0,1,0]
    #step1: #need to create a numpy array with 0, for X length x=[0,0,0,0,0]
    x=np.zeros(len(X.columns)) 
    #step2:#x[0]=sqft --> x=[sqft,0,0,0,0]
    x[0]=sqft
    #step 3:    #we have to find out the index of that particular location in the X value iin the variable locindex
    locindex=np.where(X.columns==location)[0][0]
    #step 4: allocate value 1 in the found index
    if locindex>=0:
        x[locindex]=1 #--> x=[sqft,0,0,1,0]
    return reg.predict([x])[0]


In [None]:
np.where(X.columns=='R.S.Puram')[0][0]


1

In [None]:
location=input("Enter the location")
sqft=int(input("Enter the sqft"))
predictedprice=predictprice(location,sqft)
print(predictedprice)

Enter the locationR.S.Puram
Enter the sqft950
8979608.768971331




In [None]:
locindex=np.where(X.columns==location)[0][0]

#Downloading the Model

In [None]:
import pickle
with open('Multilocationmodelds2','wb')as f:
  pickle.dump(reg,f)

#Downloading the columns

In [None]:
import json

In [None]:
columns={
    'Data_columns':[col.lower()for col in X.columns]
}
with open('columns.json','w')as f:
  f.write(json.dumps(columns))