In [3]:
%pip install pyod scikit-learn

import pandas as pd
from pyod.models.knn import KNN
from sklearn.preprocessing import StandardScaler






In [4]:
covidtotals = pd.read_csv('data/covidtotals.csv')
covidtotals.set_index('iso_code', inplace=True)

In [5]:
standardizer = StandardScaler()
analysisvars = ['location', 'total_cases_pm', 'total_deaths_pm', 'pop_density', 'median_age', 'gdp_per_capita']
covidanalysis = covidtotals.loc[:, analysisvars].dropna()
covidanalysisstand = standardizer.fit_transform(covidanalysis.iloc[:, 1:])

In [6]:
clf_name = 'KNN'
clf = KNN(contamination=0.1)
clf.fit(covidanalysisstand)

KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0)

In [8]:
y_pred = clf.labels_
y_scores = clf.decision_scores_


In [9]:
pred = pd.DataFrame(zip(y_pred, y_scores), columns=['outliers', 'scores'], index=covidanalysis.index)

pred.sample(10, random_state=2)


Unnamed: 0_level_0,outliers,scores
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1
BHR,1,2.69427
BRA,0,0.75214
ZWE,0,0.210114
BGR,1,1.619251
CHN,0,0.935879
BGD,1,1.52091
GRD,0,0.681296
UZB,0,0.36932
MMR,0,0.370487
ECU,0,0.584605


In [10]:
pred.outliers.value_counts()

outliers
0    162
1     18
Name: count, dtype: int64

In [11]:
pred.groupby(['outliers'])[['scores']].agg(['min', 'median', 'max'])

Unnamed: 0_level_0,scores,scores,scores
Unnamed: 0_level_1,min,median,max
outliers,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
0,0.07821,0.600947,1.397616
1,1.421653,1.653276,11.940163


In [12]:
covidanalysis.join(pred).loc[pred.outliers == 1, ['location', 'total_cases_pm', 'total_deaths_pm','scores']].sort_values(by='scores', ascending=False).head(10)

Unnamed: 0_level_0,location,total_cases_pm,total_deaths_pm,scores
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SGP,Singapore,531183.841,346.637,11.940163
QAT,Qatar,190908.716,256.017,3.037338
BHR,Bahrain,473167.024,1043.31,2.69427
LUX,Luxembourg,603439.463,1544.161,2.485225
PER,Peru,133238.998,6507.656,2.265343
BRN,Brunei,763475.441,396.435,2.259517
MDV,Maldives,356423.66,603.286,1.984898
MLT,Malta,227422.824,1687.628,1.961672
ARE,United Arab Emirates,113019.214,248.805,1.687302
BGR,Bulgaria,195767.887,5703.518,1.619251
