In [None]:
# importing libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# loading dataset

In [None]:

df = pd.read_csv("../input/brasilian-houses-to-rent/houses_to_rent.csv")
df.head()

In [None]:
df.drop('Unnamed: 0',axis=1,inplace=True)

In [None]:
df.describe(include='O')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

# Data Preprocessing / Data Analysis

In [None]:
df['rooms'].value_counts()

In [None]:
df['bathroom'].value_counts()

In [None]:
df['parking spaces'].value_counts()

In [None]:
df['animal'].value_counts()

In [None]:
df['furniture'].value_counts()

In [None]:
df['floor'].value_counts()

In [None]:
df.replace('-',np.nan,inplace=True)

In [None]:
df.head()

In [None]:
# creation of new attribute
df['New_hoa'] = df['hoa'].apply(lambda x: x[2:])
df['New_hoa'].head()

In [None]:
df['New_rent_amount'] = df['rent amount'].apply(lambda x: x[2:])
df['New_property_tax'] = df['property tax'].apply(lambda x: x[2:])
df['New_fire_insurance'] = df['fire insurance'].apply(lambda x: x[2:])
df['New_total'] = df['total'].apply(lambda x: x[2:])

In [None]:
col = ['rent amount','property tax','fire insurance','total']
df.drop(df[col],axis=1,inplace=True)

In [None]:
df.drop('hoa',axis=1,inplace=True)

In [None]:
df.head()

In [None]:
df.replace(np.nan,0,inplace=True)

In [None]:
df['floor'].dtype
df['floor'] = df['floor'].astype('int')

In [None]:
df.head()

In [None]:
animal_value = pd.get_dummies(df['animal'],drop_first=True)
furniture_value = pd.get_dummies(df['furniture'],drop_first=True)

In [None]:
df = pd.concat([df,animal_value,furniture_value],axis=1)

In [None]:
df.head()

In [None]:
df.drop('animal',axis=1,inplace=True)
df.drop('furniture',axis=1,inplace=True)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
# df['New_hoa'] = df['New_hoa'].astype('int')
df['New_hoa'].replace(to_replace=',',value='',regex=True,inplace=True)

In [None]:
df['New_hoa'].replace(to_replace=',',value='',regex=True,inplace=True)

In [None]:
df['New_rent_amount'].replace(to_replace=',',value='',regex=True,inplace=True)
df['New_property_tax'].replace(to_replace=',',value='',regex=True,inplace=True)
df['New_fire_insurance'].replace(to_replace=',',value='',regex=True,inplace=True)
df['New_total'].replace(to_replace=',',value='',regex=True,inplace=True)

In [None]:
df['New_hoa'].replace(to_replace='m info',value='0',inplace=True)

In [None]:
df['New_hoa'].replace(to_replace='cluso',value='0',inplace=True)


In [None]:
df.isin(['cluso']).any()

In [None]:
df['New_property_tax'].replace(to_replace='cluso',value='0',inplace=True)

In [None]:
df = df.astype(dtype=np.int64)

In [None]:
df.head()

In [None]:
df['city'].value_counts()

# Exploratory Data analysis

In [None]:
sns.countplot(df['rooms'])

In [None]:
sns.countplot(df['bathroom'])

In [None]:
sns.countplot(df['parking spaces'])

# spliting the data 

In [None]:

X = df.drop('city',axis=1)
y = df['city']

In [None]:
print(X.shape,",",y.shape)

In [None]:
from imblearn.over_sampling import RandomOverSampler

In [None]:
rs = RandomOverSampler()

In [None]:
X,y = rs.fit_resample(X,y)

In [None]:
print(X.shape)
print(y.shape)

In [None]:
X.head()

In [None]:
y.head()

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
scaler.fit(X)
X = scaler.transform(X)

In [None]:
pd.DataFrame(X)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [None]:
print(X_train.shape,",",y_train.shape)
print(X_test.shape,",",y_test.shape)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [None]:
L_model = LogisticRegression()
S_model = SVC()
M_model = MLPClassifier(hidden_layer_sizes=(16,16))

In [None]:
L_model.fit(X_train,y_train)
S_model.fit(X_train,y_train)
M_model.fit(X_train,y_train)

In [None]:
print(L_model.score(X_test,y_test))
print(S_model.score(X_test,y_test))
print(M_model.score(X_test,y_test))

In [None]:
from sklearn.metrics import confusion_matrix,classification_report

In [None]:
y_predL = L_model.predict(X_test)
y_predS = S_model.predict(X_test)
y_predM = M_model.predict(X_test)

In [None]:
print(confusion_matrix(y_test,y_predL))

In [None]:
confusion_matrix(y_test,y_predS)

In [None]:
confusion_matrix(y_test,y_predM)

In [None]:
print(classification_report(y_test,y_predL))

In [None]:
print(classification_report(y_test,y_predS))

In [None]:
print(classification_report(y_test,y_predM))

In [None]:
from sklearn.ensemble import RandomForestClassifier
RD = RandomForestClassifier()

In [None]:
RD.fit(X_train,y_train)

In [None]:
y_predR = RD.predict(X_test) 

In [None]:
print(RD.score(X_test,y_test))

In [None]:
print(confusion_matrix(y_test,y_predR))

In [None]:
print(classification_report(y_test,y_predR))