In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
from sklearn.linear_model import LogisticRegression
import statsmodels.api as sn 
import statsmodels.discrete.discrete_model as sm
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier


In [None]:
df= pd.read_csv("House-Price.csv", header=0)
df.head()

In [None]:
df.describe()

In [None]:
sns.boxplot(y="n_hos_beds", data=df)

In [None]:
sns.boxplot(y="n_hot_rooms", data=df)

In [None]:
sns.countplot(x="airport", data=df)

In [None]:
sns.countplot(x="waterbody", data=df)

In [None]:
sns.countplot(x="bus_ter", data=df)

In [None]:
np.percentile(df.n_hot_rooms,[99])

np.percentile(df.n_hot_rooms,[99])[0]

uv= np.percentile(df.n_hot_rooms,[99])[0]

In [None]:
df[(df.n_hot_rooms>uv)]

In [None]:
df.n_hot_rooms[(df.n_hot_rooms>3*uv)] = 3*uv

In [None]:
np.percentile(df.rainfall,[1])[0]
lv = np.percentile(df.rainfall, [1])[0]
df[(df.rainfall<lv)]
df.describe()

In [None]:
df.n_hos_beds = df.n_hos_beds.fillna(df.n_hos_beds.mean())

df = df.fillna(df.mean())

In [None]:
df['avg_dist'] = (df.dist1 + df.dist2 + df.dist3 + df.dist4)/4

In [None]:
df.describe()

In [None]:
del df['dist3']
del df['dist1']
del df['dist2']
del df['dist4']
del df['bus_ter']

In [None]:
df.head()

In [None]:
df = pd.get_dummies(df)

In [None]:
del df['airport_NO']

del df['waterbody_None']

In [None]:
df.head()

##LOGISTIC REGRESSION 


In [None]:
X = df[['price']]
y = df[['Sold']]
X.head()
y.head()


clf_lrs = LogisticRegression()

clf_lrs.fit(X,y)

In [None]:
clf_lrs.intercept_


In [None]:
X_const = sn.add_constant(X)

X_const.head()

In [None]:
logit = sm.Logit(y,X_const).fit()

In [None]:
logit.summary()





## Logistic with Multiple Prediction

In [None]:
X = df.loc[:, df.columns != 'Sold']
y = df['Sold']

In [None]:
clf_lr = LogisticRegression()

In [None]:
clf_lr.fit(X,y)

In [None]:
clf_lr.coef_

In [None]:
clf_lr.intercept_

In [None]:
X_cons = sn.add_constant(X)

In [None]:
logit = sm.Logit(y,X_cons).fit()

In [None]:
logit.summary()

## Prediction and Confusion matric 

In [None]:
clf_lr.predict_proba(X)

In [None]:
y_pred = clf_lr.predict(X)

In [None]:
y_pred

In [None]:
y_pred_03 = (clf_lr.predict_proba(X)[:,1] >= 0.3).astype(bool)

In [None]:
y_pred_03


In [None]:
confusion_matrix(y,y_pred)

In [None]:
confusion_matrix(y,y_pred_03)

##Performance Metrics
## Precision
## Recall
## AUC(ROC)

In [None]:
precision_score(y,y_pred)

In [None]:
recall_score(y,y_pred)

In [None]:
roc_auc_score(y,y_pred)


In [None]:
##Linear Discriminant Analysis
clf_lda = LinearDiscriminantAnalysis()

In [None]:
clf_lda.fit(X,y)

In [None]:
y_pred_lda = clf_lda.predict(X)

In [None]:
confusion_matrix(y,y_pred_lda)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [None]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
clf_lr = LogisticRegression()
clf_lr.fit(X_train, y_train)

In [None]:
y_test_pred =clf_lr.predict(X_test)

In [None]:
confusion_matrix(y_test,y_test_pred)

In [None]:
accuracy_score(y_test, y_test_pred)


In [None]:
##K-NEAREST NEIGHBORS

In [None]:
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)

In [None]:
scaler = preprocessing.StandardScaler().fit(X_test)
X_test_s = scaler.transform(X_test)
X_test_s

In [None]:
clf_knn_1 = KNeighborsClassifier(n_neighbors=1)
clf_knn_1.fit(X_train_s, y_train)

In [None]:
confusion_matrix(y_test, clf_knn_1.predict(X_test_s))

In [None]:
accuracy_score(y_test, clf_knn_1.predict(X_test_s))

#$$

In [None]:
clf_knn_3 = KNeighborsClassifier(n_neighbors=3)
clf_knn_3.fit(X_train_s, y_train)

In [None]:
confusion_matrix(y_test, clf_knn_3.predict(X_test_s))

In [None]:
accuracy_score(y_test, clf_knn_3.predict(X_test_s))

In [None]:

##KNEIGHBOR CLASSIER 

In [None]:
params ={'n_neighbors': [1,2,3,4,5,6,7,8,9,10]}
grid_search_cv = GridSearchCV(KNeighborsClassifier(), params)
grid_search_cv.fit(X_train_s, y_train)



In [None]:
grid_search_cv.best_params_

In [None]:
optimised_KNN = grid_search_cv.best_estimator_

In [None]:
y_test_pred = optimised_KNN.predict(X_test_s)

In [None]:
confusion_matrix(y_test, y_test_pred)

In [None]:
accuracy_score(y_test, y_test_pred)