In [2]:
import pandas as pd
import seaborn as sbs
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score,mean_squared_error
import os

In [3]:
os.chdir(r"D:\machine learning")
data = pd.read_csv("cairo_house.csv")
data

Unnamed: 0,Square_Meter,Age_of_house,Total_rooms,Total_Bath,Floor,City,Price
0,75,1,1,1,1,Helwan,1674939.9
1,100,4,2,1,3,El_Zamalek,3579991.5
2,125,3,3,2,4,Helwan,2504969.8
3,115,2,3,1,9,Shobra,2435038.5
4,79,5,1,1,5,El_Maadi,1984928.5
...,...,...,...,...,...,...,...
57,191,5,4,2,7,Shobra,3624928.7
58,210,2,5,3,4,El_Maadi,4110090.0
59,161,2,3,2,2,Helwan,3044968.8
60,190,2,3,2,6,Helwan,3480038.2


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Square_Meter  62 non-null     int64  
 1   Age_of_house  62 non-null     int64  
 2   Total_rooms   62 non-null     int64  
 3   Total_Bath    62 non-null     int64  
 4   Floor         62 non-null     int64  
 5   City          62 non-null     object 
 6   Price         62 non-null     float64
dtypes: float64(1), int64(5), object(1)
memory usage: 3.5+ KB


In [5]:
data.describe()

Unnamed: 0,Square_Meter,Age_of_house,Total_rooms,Total_Bath,Floor,Price
count,62.0,62.0,62.0,62.0,62.0,62.0
mean,147.016129,2.451613,2.983871,1.806452,4.612903,3272029.0
std,45.745014,1.237096,1.234636,0.62302,2.569396,867313.0
min,75.0,1.0,1.0,1.0,1.0,1674940.0
25%,105.5,1.0,2.0,1.0,3.0,2624970.0
50%,148.5,2.0,3.0,2.0,4.0,3310000.0
75%,188.75,3.0,4.0,2.0,6.75,3838835.0
max,250.0,5.0,5.0,3.0,9.0,5189939.0


In [6]:
data["City"]

0         Helwan
1     El_Zamalek
2         Helwan
3         Shobra
4       El_Maadi
         ...    
57        Shobra
58      El_Maadi
59        Helwan
60        Helwan
61        Shobra
Name: City, Length: 62, dtype: object

In [7]:
data["City"].unique()

array(['Helwan', 'El_Zamalek', 'Shobra', 'El_Maadi'], dtype=object)

In [8]:
data_city = pd.get_dummies(data["City"],dtype="int64")
data_city

Unnamed: 0,El_Maadi,El_Zamalek,Helwan,Shobra
0,0,0,1,0
1,0,1,0,0
2,0,0,1,0
3,0,0,0,1
4,1,0,0,0
...,...,...,...,...
57,0,0,0,1
58,1,0,0,0
59,0,0,1,0
60,0,0,1,0


In [10]:
data = pd.concat([data,data_city],axis=1)
data

Unnamed: 0,Square_Meter,Age_of_house,Total_rooms,Total_Bath,Floor,City,Price,El_Maadi,El_Zamalek,Helwan,Shobra
0,75,1,1,1,1,Helwan,1674939.9,0,0,1,0
1,100,4,2,1,3,El_Zamalek,3579991.5,0,1,0,0
2,125,3,3,2,4,Helwan,2504969.8,0,0,1,0
3,115,2,3,1,9,Shobra,2435038.5,0,0,0,1
4,79,5,1,1,5,El_Maadi,1984928.5,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
57,191,5,4,2,7,Shobra,3624928.7,0,0,0,1
58,210,2,5,3,4,El_Maadi,4110090.0,1,0,0,0
59,161,2,3,2,2,Helwan,3044968.8,0,0,1,0
60,190,2,3,2,6,Helwan,3480038.2,0,0,1,0


In [11]:
data = data.drop(["City"],axis=1)
data

Unnamed: 0,Square_Meter,Age_of_house,Total_rooms,Total_Bath,Floor,Price,El_Maadi,El_Zamalek,Helwan,Shobra
0,75,1,1,1,1,1674939.9,0,0,1,0
1,100,4,2,1,3,3579991.5,0,1,0,0
2,125,3,3,2,4,2504969.8,0,0,1,0
3,115,2,3,1,9,2435038.5,0,0,0,1
4,79,5,1,1,5,1984928.5,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...
57,191,5,4,2,7,3624928.7,0,0,0,1
58,210,2,5,3,4,4110090.0,1,0,0,0
59,161,2,3,2,2,3044968.8,0,0,1,0
60,190,2,3,2,6,3480038.2,0,0,1,0


In [12]:
Y = data["Price"]
X = data.drop("Price",axis=1)
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=10)

In [14]:
model = LinearRegression()
model.fit(X_train,Y_train)

In [15]:
model.score(X_test,Y_test)

0.9901863411044746

In [16]:
model.coef_

array([  12610.3167779 ,  -26116.05384856,   90733.41679575,
         44948.43812978,    8750.87833391, -266429.29847992,
       1149264.44536374, -503813.58169097, -379021.56519285])

In [17]:
model.intercept_

1148156.4762605876

In [18]:
s = int(input())
a = int(input())
r = int(input())
b = int(input())
f = int(input())
h = int(input())
z = int(input())
sh = int(input())
m = int(input())
model.predict([[s,a,r,b,f,h,z,sh,m]])

150
1
3
2
1
0
0
1
0




array([2880622.36238689])