# Regression Project

**Estimate the housie prices with the information given**

### Importing Needed modules

In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, PolynomialFeatures
from sklearn.metrics import r2_score

### Reading Dataset
- **Area**
- **Room**
- **Parking**
- **Warehouse**
- **Elevator**
- **Address**
- **Price**

In [6]:
df = pd.read_csv("1632300362534233.csv")
df.head()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,Shahran,1850000000.0,61666.67
1,60,1,True,True,True,Shahran,1850000000.0,61666.67
2,79,2,True,True,True,Pardis,550000000.0,18333.33
3,95,2,True,True,True,Shahrake Qods,902500000.0,30083.33
4,123,2,True,True,True,Shahrake Gharb,7000000000.0,233333.33


## Optimizing Dataset

### Converting address from string type to numeric type using LabelEncoder from sklearn

In [7]:
le = LabelEncoder()

df["Address"] = le.fit_transform(df["Address"])
df.head()

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,156,1850000000.0,61666.67
1,60,1,True,True,True,156,1850000000.0,61666.67
2,79,2,True,True,True,117,550000000.0,18333.33
3,95,2,True,True,True,152,902500000.0,30083.33
4,123,2,True,True,True,150,7000000000.0,233333.33


### Removing irregular **Area** values

In [8]:
df["Area"] = df["Area"].str.replace(",", "")
df["Area"] = df["Area"].astype("int64")
df = df[df["Area"] < 2000]

## converting **Parking**, **Warehouse** and **Elevator** availability into Binary form

In [9]:
df = df.replace({True : 1,
                 False : 0})

## Splitting dataset into **Train** and **Test** parts

In [47]:
msk = np.random.rand(len(df))  < 0.8

train = df[msk]
test  = df[~msk]

## Creating arrays to train our model

In [48]:
train_x = np.asanyarray(train[["Area","Room","Parking","Warehouse","Elevator","Address"]])
train_y = np.asanyarray(train["Price(USD)"])

test_x = np.asanyarray(train[["Area","Room","Parking","Warehouse","Elevator","Address"]])
test_y = np.asanyarray(train["Price(USD)"])

# Polynomial regression

poly = PolynomialFeatures(degree=2)
train_x_poly = poly.fit_transform(train_x)

## Training Model

In [49]:
regr = LinearRegression()
regr.fit(train_x_poly, train_y)

print(f"coefficient : {regr.coef_}")
print(f"Intercept : {regr.intercept_}")

coefficient : [-1.87976605e-08  1.65588215e+03 -1.55500455e+05 -1.49019802e+04
  4.20976467e+03  2.85732088e+04 -3.22896669e+02 -3.38266619e+00
  1.01493878e+03 -1.61220322e+03 -5.73504745e+01  1.53073333e+03
  1.04018582e+01 -3.04482735e+03  5.35485336e+04  3.80735050e+04
 -3.27965126e+04 -3.73641417e+02 -1.49019802e+04  4.04749231e+04
  1.65506305e+04  2.71908120e+02  4.20976467e+03 -3.92454380e+04
 -4.43360332e+02  2.85732088e+04 -7.66227158e+02  3.98975237e+00]
Intercept : 33745.704192271834


## Model **Evaluation** with the Test data

In [50]:
test_x_poly = poly.fit_transform(test_x)
test_y_ = regr.predict(test_x_poly)

print("MAE: %.2f" % np.mean(np.absolute(test_y_ - test_y)))
print("MSE: %.2f" % np.mean((test_y_ - test_y) ** 2))
print("R2 : %.2f" % r2_score(test_y,test_y_ ) )

MAE: 81564.18
MSE: 21459692591.92
R2 : 0.71




## By Sina Kazemi