In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

In [None]:
data= pd.read_csv('australia covid.csv')

In [None]:
data

In [None]:
data.describe().T

In [None]:
data.isnull().sum()

In [None]:
data.dtypes

In [None]:
recover=[]
for i in data.Recovered:
    if i==0:
        recover.append(0)
    else:
        recover.append(1)
        

In [None]:
data['Recover']=recover

In [None]:
x=data['Recover'].value_counts()

In [None]:
plt.figure(figsize=(3,4))
sns.barplot(data=data, x=x.index, y=x.values)
sns.set_style('darkgrid')
plt.show()

In [None]:
data.Recover.value_counts()

In [None]:
data

### Resampling

In [None]:
from sklearn.utils import resample

In [None]:
data_recovered = data[data.Recover==1]
data_not_recovered = data[data.Recover==0]

In [None]:
data_nt_upsampled = resample(data_recovered, 
                                 replace=True,     # sample with replacement
                                 n_samples=343,    # to match majority class
                                 random_state=123)

In [None]:
data_upsampled = pd.concat([data_not_recovered,data_nt_upsampled])

In [None]:
data_upsampled.Recover.value_counts()

In [None]:
data_upsampled

In [None]:
y=data_upsampled['Recover'].value_counts()
plt.figure(figsize=(3,4))
sns.barplot(data=data, x=y.index, y=y.values)
sns.set_style('darkgrid')
plt.show()

In [None]:
data_upsampled.drop(columns=['Province/State','Country/Region','Date','Recovered','WHO Region'],inplace=True)

In [None]:
data_upsampled

In [None]:
X = data_upsampled[['Lat','Long','Confirmed','Deaths','Active']]
y=data_upsampled['Recover']

In [None]:
train_X, test_X, train_y, test_y = train_test_split(X,y,
                                                  test_size = 0.3, random_state=123)

In [None]:
knn_clf = KNeighborsClassifier(n_neighbors=6)
knn_clf.fit(train_X, train_y )

In [None]:
print(metrics.classification_report(test_y, knn_clf.predict(test_X)))

In [None]:
tuned_parameters = [{'n_neighbors': np.arange(1,9)}]

knn_clf = GridSearchCV(KNeighborsClassifier(),
                   tuned_parameters,
                   cv=10,
                   scoring='accuracy')

In [None]:
knn_clf.fit(train_X, train_y )

In [None]:
knn_clf.best_score_

In [None]:
knn_clf.best_params_

In [None]:
print(metrics.classification_report(test_y, knn_clf.predict(test_X)))

In [None]:
knn_cm=metrics.confusion_matrix( test_y, knn_clf.predict( test_X ))
knn_cm

In [None]:
sns.heatmap(knn_cm, annot=True,  fmt='.2f', xticklabels = [" Not Recoverd", " Recovered"] , yticklabels = ["Not Recoverd", " Recovered"] )
plt.ylabel('True label')
plt.xlabel('Predicted label')

In [None]:
print('Accuracy score:',metrics.accuracy_score( test_y, knn_clf.predict( test_X ) ))