In [3]:
import pandas as pd
import scipy as sc
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import r2_score

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import seaborn as sns
import matplotlib as mpl
from matplotlib import pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

from sklearn.externals.joblib import dump, load
import pickle
#open a folder for app files
import os
if not os.path.exists("rentwebapp"):
    os.mkdir("rentwebapp")

### import rent df

In [2]:
rent_df=pd.read_csv(r'C:\Users\moshi\Downloads\notebooks\Project\2.Data cleaning\rent_clean.csv',sep=',',low_memory=False)

In [3]:
##dropping unwanted cols for the ml 
rent_df=rent_df.drop(["Unnamed: 0","sub_area",'street','post_updated_date'], axis=1)

## Linear Regression model

In [4]:
X =rent_df.drop(['price'],axis=1)
y =rent_df['price']

### Label encoding and feature scaling

In [5]:
X = X.apply(LabelEncoder().fit_transform)

In [6]:
cols=X.columns.tolist()
for i in range(0, len(cols)):
    X[cols[i]] = scale(X[cols[i]])

### Modeling and scores

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [8]:
linear_model = LinearRegression()

linear_model.fit(X_train,y_train)

y_pred = linear_model.predict(X_test)
y_train_pred = linear_model.predict(X_train)

In [9]:
print("r2 score with LinearRegression model")
print("Accuracy on training data: " , r2_score(y_train, y_train_pred))
print("Accuracy on test data: ", r2_score(y_test, y_pred))

r2 score with LinearRegression model
Accuracy on training data:  0.36658969421250776
Accuracy on test data:  0.36854194069939394


## Lasso model

### label encoding and scaling for categorial featurs

In [10]:
featurs =rent_df.drop(['price'],axis=1)

In [11]:
##create dummies and combine 
combine_dummies = pd.get_dummies(featurs)
result = combine_dummies.values

In [12]:
scaler = StandardScaler()
result = scaler.fit_transform(result)

In [13]:
X= result[:rent_df.shape[0]]
y=rent_df['price'].values

### Modeling and scores

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [15]:
lasso_model= Lasso()

lasso_model.fit(X_train, y_train)

y_pred = lasso_model.predict(X_test)
y_train_pred = lasso_model.predict(X_train)

In [16]:
print("r2 score with Lasso model")
print("Accuracy on training data: " , r2_score(y_train, y_train_pred))
print("Accuracy on test data: ", r2_score(y_test, y_pred))

r2 score with Lasso model
Accuracy on training data:  0.677749765764358
Accuracy on test data:  0.6796979702371588


## Rendom Forest Regressor

### Encoding categorial values

In [7]:
featurs=rent_df.drop(['price'],axis=1)
categ_col = ["property_type", "city"]
featurs_dum = pd.get_dummies(featurs, columns=categ_col)
result=featurs_dum.values

### Scaling

In [8]:
scaler = StandardScaler()
origin_scale_rf=StandardScaler().fit(result)
result = scaler.fit_transform(result)

In [9]:
X=result[:rent_df.shape[0]]
y=rent_df['price'].values

### Hyperparameter tuning

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'bootstrap': [True],
    'max_depth': [10, 20, 40, 70],
    'max_features': ['auto'],
    'min_samples_leaf': [2, 4],
    'min_samples_split': [2, 4],
    'n_estimators': [100, 150, 300, 600]
    }
rf = RandomForestRegressor()
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 3, n_jobs = -1, verbose = 2)
grid_search.fit(X_train,y_train)

##grid_search.best_params_ result
#   {'bootstrap': True,
#    'max_depth': 90,
#    'max_features': 'auto',
#    'min_samples_leaf': 2,
#    'min_samples_split': 2,
#    'n_estimators': 400}

### Modeling and scores

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [11]:
#rf_model=grid_search.best_estimator_
rf_model=RandomForestRegressor(bootstrap= True,max_depth=50,max_features=16,min_samples_leaf=2,min_samples_split= 2,n_estimators=400)
rf_model.fit(X_train,y_train)

y_pred = rf_model.predict(X_test)
y_train_pred = rf_model.predict(X_train)

print("r2 score with RendomForestReg model")
print("Accuracy on training data: " , r2_score(y_train, y_train_pred))
print("Accuracy on test data: ", r2_score(y_test, y_pred))

r2 score with RendomForestReg model
Accuracy on training data:  0.7477783344159517
Accuracy on test data:  0.7136565749340338


### add predicted prices to original df

In [None]:
final_labels = rf_model.predict(X)
final_result = pd.DataFrame({'RentPrice': final_labels})

In [16]:
rent_df["predicted price"]=final_result['RentPrice']

In [17]:
price_avg=rent_df["price"].mean()
pred_price_avg=rent_df["predicted price"].mean()

print("True prices average:",round(price_avg,2))
print("Predicted prices average:",round(pred_price_avg,2))

True prices average: 3799.63
Predicted prices average: 3800.02


### web app

In [21]:
#saving nessecry files for app
pickle.dump(rf_model,open("rentwebapp/model_new.pickle", 'wb'))

dump(origin_scale_rf, 'rentwebapp/std_scaler.bin', compress=True)

tmpdummies=featurs_dum[:1]
tmpdummies.to_csv('rentwebapp\dum_col.csv', index = False)

In [1]:
#connect to server
import anvil.server

anvil.server.connect("T3Y2KBBTL6NAII3JKFTCI5ND-FGSMEQH23K7FJACC")

Connecting to wss://anvil.works/uplink
Anvil websocket open
Authenticated OK


In [4]:
@anvil.server.callable
def predict_rent(property_type,rooms,city,floor,elevator,parking,conditioner,renovated,window_borders,sun_boiler,bomb_shelter,storeroom):
    tmpdf={"property_type":property_type,
           "rooms":rooms,
           "city":city,
           "floor":floor,
           "elevator":int(elevator),
           "parking":int(parking),
           "conditioner":int(conditioner),
           "renovated":int(renovated),
           "window_borders":int(window_borders),
           "sun_boiler":int(sun_boiler),
           "bomb_shelter":int(bomb_shelter),
           "storeroom":int(storeroom)
          }
    tmpdf=pd.DataFrame(tmpdf, index=[0])
    
    dummy_new = pd.get_dummies(tmpdf)
    dum_col=pd.read_csv('rentwebapp\dum_col.csv',sep=',')
    dummy_new=dummy_new.reindex(columns = dum_col.columns, fill_value=0)
    res=dummy_new.values
    sc=load('rentwebapp\std_scaler.bin')
    new_res = sc.transform(res)
    
    laso_model=pickle.load(open('rentwebapp/model_new.pickle','rb'))
    
    pred_price=laso_model.predict(new_res)
    pred_price=int(pred_price)
    
    return pred_price