## Importing package

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score as acc
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split as tts
from sklearn.svm import SVC as svc
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

## Importing Dataset

In [2]:
dataset = pd.read_csv('Main.csv')

In [3]:
dataset.columns

Index(['Unnamed: 0', 'Price', 'Area', 'Location', 'No. of Bedrooms', 'Resale',
       'MaintenanceStaff', 'Gymnasium', 'SwimmingPool', 'LandscapedGardens',
       'JoggingTrack', 'RainWaterHarvesting', 'IndoorGames', 'ShoppingMall',
       'Intercom', 'SportsFacility', 'ATM', 'ClubHouse', 'School',
       '24X7Security', 'PowerBackup', 'CarParking', 'StaffQuarter',
       'Cafeteria', 'MultipurposeRoom', 'Hospital', 'WashingMachine',
       'Gasconnection', 'AC', 'Wifi', 'Children'splayarea', 'LiftAvailable',
       'BED', 'VaastuCompliant', 'Microwave', 'GolfCourse', 'TV',
       'DiningTable', 'Sofa', 'Wardrobe', 'Refrigerator', 'city'],
      dtype='object')

In [4]:
le1 = LabelEncoder()
le2 = LabelEncoder()

In [5]:
dataset['location'] = le1.fit_transform(dataset['Location'])
dataset['city1'] = le2.fit_transform(dataset['city'])

In [6]:
X = dataset[['Area', 'No. of Bedrooms', 'location',
       'MaintenanceStaff', 'Gymnasium', 'SwimmingPool', 'LandscapedGardens',
       'JoggingTrack', 'RainWaterHarvesting', 'IndoorGames', 'ShoppingMall',
       'Intercom', 'SportsFacility', 'ATM', 'ClubHouse', 'School',
       '24X7Security', 'PowerBackup', 'CarParking', 'StaffQuarter',
       'Cafeteria', 'MultipurposeRoom', 'Hospital',
       'Gasconnection', "Children'splayarea", 'LiftAvailable', 'VaastuCompliant', 'GolfCourse', 'Wardrobe']].values

In [7]:
y = dataset['city1'].values

## Training-Testing Split

In [8]:
X_train, X_test, y_train, y_test = tts(X, y, train_size=0.8, random_state=0)

## Modeling

In [9]:
classifier1 = RandomForestClassifier()
classifier2 = svc(kernel='rbf', C=50, gamma='auto')
classifier3 = LogisticRegression(penalty='l2', C=50)
classifier4 = KNeighborsClassifier()

In [10]:
classifier1.fit(X_train, y_train)

RandomForestClassifier()

In [11]:
classifier2.fit(X_train, y_train)

SVC(C=50, gamma='auto')

In [12]:
classifier3.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(C=50)

In [13]:
classifier4.fit(X_train, y_train)

KNeighborsClassifier()

## Model Evaluation

In [14]:
y_pred1 = classifier1.predict(X_test)
y_pred2 = classifier2.predict(X_test)
y_pred3 = classifier3.predict(X_test)
y_pred4 = classifier4.predict(X_test)

In [15]:
print("RandomForest: ",acc(y_pred1, y_test)) 
print("SVC: ",acc(y_pred2, y_test)) 
print("LogisticRegression: ",acc(y_pred3, y_test))
print("KNeigborClassifier: ",acc(y_pred4, y_test))

RandomForest:  0.8845745487638405
SVC:  0.7796147429091461
LogisticRegression:  0.31548612164416806
KNeigborClassifier:  0.7309267404823298


## Ensembling part

In [16]:
le2.classes_

array(['Bangalore', 'Chennai', 'Delhi', 'Hyderabad', 'Kolkata', 'Mumbai'],
      dtype=object)

Bangalore: 0<br/>
Chennai: 1
<br/>Delhi: 2
<br/>Hyderabda: 3
<br/>Kolkata: 4
<br/>Mumbai: 5

In [17]:
city = []
n = len(y_test)
y_main_pred = []
for i in range(n):
    city = [0, 0, 0, 0, 0, 0]
    city[y_pred1[i]] += 1
    city[y_pred2[i]] += 1
    city[y_pred4[i]] += 1
    
    y_main_pred.append(city.index(max(city)))

In [18]:
acc(y_main_pred, y_test)

0.8524192325193387