In [5]:
# provisioning
import sklearn as sk
import numpy as np
import pandas as pd
import os
import cv2
import random

#visualization
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.image import imread
import seaborn as sns

#prediction
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from sklearn.model_selection import GridSearchCV

%matplotlib inline

In [6]:
dir = 'organic_and_recyclable' # 'dataset'

In [7]:
features = ['O', 'R'] # ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash'] 
data = []

In [8]:
def create_training_data():
    for feature in features:
        path = os.path.join(dir, feature)
        label =  features.index(feature)
        
        for image in os.listdir(path):
            image_path = os.path.join(path, image)

            # cv2.IMREAD_COLOR: It specifies to load a color image. 
            # Any transparency of image will be neglected. It is the default flag. 
            # Alternatively, we can pass integer value 1 for this flag.
            # cv2.IMREAD_GRAYSCALE: It specifies to load an image in grayscale mode. 
            # Alternatively, we can pass integer value 0 for this flag.
            # cv2.IMREAD_UNCHANGED: It specifies to load an image as such including alpha channel. 
            # Alternatively, we can pass integer value -1 for this flag.
            waste_image = plt.imread(image_path)
#             print(waste_image.shape)
#             imgs = np.reshape(waste_image, (200,200))
            try:
                # normalize image
                waste_image =  np.resize(waste_image, (200,200))
                
                images = np.array(waste_image).flatten()
                # # confirm pixel range is 0-255
                # print('Data Type: %s' % images.dtype)
                # print('Min: %.3f, Max: %.3f' % (images.min(), images.max()))
                
                # convert from integers to floats
                images = images.astype('float32')
                # normalize to the range 0-1
                images /= 255.0
                # # confirm the normalization
                # print('Min: %.3f, Max: %.3f' % (images.min(), images.max()))
                data.append([images, label])
            except Exception as e:
                pass
create_training_data()


In [9]:
# Shuffle the data so that the data is not have 75% organic and 25% recycle data for example.
# When that's the case the machine will not learn and perform good. 
# So, if we shuffle, we can have a random % of organic and recycle data
random.shuffle(data)

In [10]:
print(len(data))

5780


In [11]:
X = []
y = []

for d, feature in data[0:1000]:    
    X.append(d)
    y.append(feature)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [13]:
param_grid = { 'C':[0.1,1,100,1000],'kernel':['rbf','poly','sigmoid','linear'],'degree':[1,2,3,4,5,6],'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}
gridSC = GridSearchCV(SVC(),param_grid)
gridSC.fit(X_train,y_train)
print(gridSC.best_score_)
print(gridSC.best_params_) # best_score_ best_params_

0.9442857142857143
{'C': 0.1, 'degree': 1, 'gamma': 1, 'kernel': 'rbf'}


In [14]:
grid_predictions = gridSC.predict(X_test)
print("Accuracy in percentage:", metrics.accuracy_score(y_test, grid_predictions)*100)
print(classification_report(y_test, grid_predictions,  labels=np.unique(y)))

Accuracy in percentage: 96.66666666666667
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        10
           1       0.97      1.00      0.98       290

    accuracy                           0.97       300
   macro avg       0.48      0.50      0.49       300
weighted avg       0.93      0.97      0.95       300



  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
from sklearn import metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [27]:
tn, fp, fn, tp = confusion_matrix(y_test, grid_predictions).ravel()
print(tn, fp, fn, tp)

cm = confusion_matrix(y_test, grid_predictions)
print(cm)

0 10 0 290
[[  0  10]
 [  0 290]]
