# **Import Libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import warnings

warnings.simplefilter('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
print("="*100)
print("Ready to launch!!!!")
print("="*100)

# **Load Data**

In [None]:
house_data = pd.read_csv('../input/house-price-tehran-iran/housePrice.csv') # Load Data

# **Data Preprocessing**

In [None]:
house_data.head() # Checking the first 5 rows of Data

In [None]:
house_data.tail() # Checking the last 5 rows of Data

In [None]:
house_data.dtypes.to_frame() # Checking the type of Data

In [None]:
house_data.isna().mean().to_frame() # Let's hunt the null value

> **okay, since the missing value is not really big, i think there's 20 - 30 missing value in address since the percentage of address is 0.006611%, let's fill it with the data that appears most often.**

In [None]:
house_data['Address'] = house_data['Address'].fillna(house_data['Address'].mode()[0]) # Fill it with mode/modus

In [None]:
house_data.isna().mean() # Check the data again

> **Okay, everything looks fine now, and let's encode and change the type of our data.**

In [None]:
from sklearn.preprocessing import LabelEncoder # Let's encode the data

encoder = LabelEncoder() # Let's gooo!!!

In [None]:
for i in house_data.columns: # A for loop to encode our object data
    if house_data[i].dtype == 'object':
        encoder.fit_transform(list(house_data[i].values))
        house_data[i] = encoder.transform(house_data[i].values)
        
for j in house_data.columns: # A for loop to change the type of data
    if house_data[j].dtype == 'bool':
        house_data[j] = house_data[j].astype('int64')
                
for k in house_data.columns: # A for loop to change the type of data
    if house_data[k].dtype == 'int':
        house_data[k] = house_data[k].astype('float64')

In [None]:
house_data.head() # Check the first 5 rows of data

In [None]:
house_data.tail() # Check the last 5 rows of data

In [None]:
house_data.dtypes.to_frame() # Check the type of data

> **Okay, now the data is ready.**

# **Divide Data**

In [None]:
X = house_data.drop(['Price', 'Price(USD)'], axis=1) # X data
y = house_data['Price(USD)'] # y data

In [None]:
f"Shape of X : {X.shape}, Shape of y : {y.shape}, Price : {y.sum()}" # Checking the shape

In [None]:
plt.hist(X, 30) # Plot the X Data
plt.show()

> **As you can see, there's so many outlier**

In [None]:
plt.hist(y, 30) # Plot the Data y
plt.show()

# **Modelling**

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline

In [None]:
model = KNeighborsRegressor().fit(X, y) # Model And Fit

pipe = Pipeline([ # Pipeline
    ('scaler', StandardScaler()), # Our Scaler
    ('model', KNeighborsRegressor(n_neighbors=1)) # Our Model
])

pipe.fit(X, y)

In [None]:
pred = pipe.predict(X)

# **Model Evaluate**

In [None]:
print(f"MSE : {mean_squared_error(pred, y)}")
print(f'R-2 : {r2_score(pred, y)}')

In [None]:
# Predicted Value & Actual Value
test = pd.DataFrame({'Predicted value':pred, 'Actual value':y})
fig= plt.figure(figsize=(16,8))
test = test.reset_index()
test = test.drop(['index'],axis=1)
plt.plot(test[:50])
plt.legend(['Actual value','Predicted value'])

In [None]:
output = pd.DataFrame({
    'Train Actual Price': y,
    'Train Predicted Price ': pred})

output.to_csv('prediction.csv', index=False)

In [None]:
prediction = pd.read_csv('./prediction.csv')
prediction.head(10)

# **Make A Predictive System**

In [None]:
X.head()

### **Desc :**

- Area
- Room
- Parking : {1.0: True, 0.0: False}
- Elevator : {1.0: True, 0.0: False}
- Address

input_data = (Area, Room, Parking, Elevator, Address)

In [None]:
input_data = (219.0, 2.0, 1.0, 1.0, 1.0, 117.0) # input your data here

# Changing the data type to numpy array
change_input = np.asarray(input_data)

# Reshape the numpy array as we are predicting for one instance
reshape_array = change_input.reshape(1, -1)

preds = model.predict(reshape_array)
print(f'The price of this house is : {preds}$')

> **That's it! don't forget to give me feedback and upvote if you like it! thanks in advance!**