# Deep Learning Project - Template for test accuracy

In [2]:
import numpy as np
import pandas as pd
import keras
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import os
import matplotlib.pyplot as plt

## Predicting test set images

In [3]:
# Load a saved model
model = keras.models.load_model('model_all_data_3ConvLayers_down_changed_architecture_100_100_16to30')

In [6]:
directory = r'/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs'
files_list = []
for filename in os.listdir(directory):
    if filename.endswith(".JPG") or filename.endswith(".png"):
        #print(os.path.join(directory, filename))
        files_list.append(os.path.join(directory, filename))
    else:
        continue
files_list

['/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs/IMG_0564.JPG',
 '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs/IMG_0558.JPG',
 '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs/IMG_0559.JPG',
 '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs/IMG_0565.JPG',
 '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs/IMG_0571.JPG',
 '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs/IMG_0598.JPG',
 '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs/IMG_0573.JPG',
 '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs/IMG_0600 copy.JPG',
 '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic

In [13]:
# test_dir = "/Users/franz/Desktop/DL Project/Test Folder/"
test_dir = '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/lisbon_traffic_signs'

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(100, 100),
    batch_size=20,
    shuffle=False,
    class_mode="categorical",
    color_mode="grayscale"
)

test_generator.reset() 
pred_scores = model.predict_generator(test_generator)
class_pred = np.argmax(pred_scores, axis=-1)

Found 61 images belonging to 1 classes.


In [14]:
class_pred

array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5])

In [107]:
# Get the true values and compute accuracy

class_true = pd.read_csv("/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/Test Folder/gt_test.csv", ";")["ClassId"]

print("Test set accuracy:", accuracy_score(class_pred, class_true))

Test set accuracy: 0.8699920823436262


In [None]:
# Compute confusion matrix

#!pip install git+http://github.com/scikit-learn/scikit-learn.git

# multilabel_confusion_matrix(class_pred, class_true)
cm = pd.DataFrame(confusion_matrix(class_true, class_pred))

In [None]:
# Double check accuracy
#np.trace(cm) / cm.sum().sum()

In [None]:
# Save the confusion matrix to an excel file
cm.to_excel('cm_all_data_dropout_0_9_15_epochs.xlsx')

## Analysing the results

In [2]:
res = pd.read_csv("cm_all_data_dropout_0_9_20_epochs.csv", ";").drop(columns="Unnamed: 0")
number_per_class = res.sum(axis=1)

true_pos = []
for i in range(43):
    true_pos.append(res.iloc[i,i])

analysis = pd.DataFrame(true_pos, number_per_class).reset_index().rename(columns={"index":"actual_number", 0:"true_positives"})
analysis["acc"] = analysis["true_positives"] / analysis["actual_number"]

In [3]:
#setting the training directory (the training images can be found there, already sorted by class into folders)
base_training_dir = "/Users/franz/Desktop/DL Project/Train/Final_Training/Images"

#setting the directory where the selected training and validation images will be stored in
created_dir = "/Users/franz/Desktop/DL Project/Selected"

#storing all the folder names that belong to the respective classes
all_classes = sorted(i for i in os.listdir(base_training_dir) if i.startswith("0"))

In [4]:
#getting the number of images within each class of the training data
amount_per_class = {}

for i in range(len(all_classes)):
    directory = base_training_dir + "/" + all_classes[i]
    amount_per_class[i] = len(sorted(i for i in os.listdir(directory) if i.startswith("0")))

amount_per_class_df = pd.DataFrame.from_dict(amount_per_class, orient='index').rename(columns={0:"amount"})
amount_per_class_df.index.name = 'class'

In [6]:
res = pd.concat([analysis, amount_per_class_df],axis = 1).rename(columns={"amount":"class_size_in_training"})
res.to_excel('cm_all_data_dropout_0_9_20_epochs_analysed.xlsx')
res

Unnamed: 0,actual_number,true_positives,acc,class_size_in_training
0,60,21,0.35,210
1,720,675,0.9375,2220
2,750,657,0.876,2250
3,450,351,0.78,1410
4,660,582,0.881818,1980
5,630,509,0.807937,1860
6,150,106,0.706667,420
7,450,329,0.731111,1440
8,450,364,0.808889,1410
9,480,462,0.9625,1470


In [None]:
res.corr()

#### 20 epochs

In [102]:
# Provide the name of this model
name = '_all_data_dropout_0_9_weights_20_epochs'

# Load a saved model
model = keras.models.load_model('model' + name)

# Define the test set directory
#test_dir = "/Users/franz/Desktop/DL Project/Test Folder/"
test_dir = '/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/Test Folder'

# Define the generator
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(150, 150),
        batch_size=20,
        shuffle=False,
        class_mode=None)
test_generator.reset()

# Predict
pred_scores = model.predict_generator(test_generator)
class_pred = np.argmax(pred_scores, axis=-1)


# Get the true values and compute, save and print accuracy
class_true = pd.read_csv("/Users/philippmetzger/Documents/GitHub/Deep_Learning_Project_Group_10/Data/Test Folder/gt_test.csv", ";")["ClassId"]
acc = accuracy_score(class_pred, class_true)
accs[name] = acc
print("Test set accuracy:", acc)

# Get the confusion matrix
cm = pd.DataFrame(confusion_matrix(class_true, class_pred))

# Double check accuracy
#print('Accuracy double checked:', np.trace(cm) / cm.sum().sum())

# Print the confusion matrix and save it to an excel file
save_path = 'cm' + name + '.xlsx'
cm.to_excel(save_path)

Found 12630 images belonging to 1 classes.




Test set accuracy: 0.8669833729216152
Accuracy double checked: 0.8669833729216152


In [103]:
accs

{'_all_data_dropout_0_9_weights_5_epochs': 0.8235946159936659,
 '_all_data_dropout_0_9_weights_10_epochs': 0.8517022961203484,
 '_all_data_dropout_0_9_weights_15_epochs': 0.8586698337292161,
 '_all_data_dropout_0_9_weights_20_epochs': 0.8669833729216152}