# Random Forests Classifier for Diabetic Retinopathy Detection

In [2]:
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from skimage.io import imread
from skimage.feature import hog

from load_dataset import load_dataset

import warnings
warnings.filterwarnings('ignore')

### Loading and Dimensionality Reduction of Data

In [4]:
train_x, test_x, train_y, test_y = load_dataset(5000)

lda = LinearDiscriminantAnalysis()
lda.fit(train_x, train_y)

train_x = lda.transform(train_x)
test_x = lda.transform(test_x)

### Training of Classifier

In [5]:
rfc = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='log2', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

rfc.fit(train_x, train_y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='log2', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [6]:
preds = rfc.predict(test_x)

## Accuracy Score

In [7]:
print(metrics.accuracy_score(test_y, preds))

0.994


## Classification Report

In [8]:
print(metrics.classification_report(test_y, preds))

             precision    recall  f1-score   support

          0       0.99      1.00      1.00       737
          1       0.98      0.94      0.96        69
          2       0.99      0.99      0.99       150
          3       1.00      1.00      1.00        24
          4       1.00      1.00      1.00        20

avg / total       0.99      0.99      0.99      1000



## Confusion Matrix

In [9]:
print(metrics.confusion_matrix(test_y, preds))

[[736   1   0   0   0]
 [  3  65   1   0   0]
 [  1   0 149   0   0]
 [  0   0   0  24   0]
 [  0   0   0   0  20]]


## Prediction from an Image

In [12]:
img = imread('./data/processed/9980_left.jpeg')

fd = hog(img, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1,1), visualise=False)

fd.shape = (1, -1)

fd = lda.transform(fd)

print('Predicted: {}'.format(rfc.predict(fd)))

Predicted: [0]
