In [1]:
#imports


from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.metrics import confusion_matrix

from keras.utils import np_utils
from keras.preprocessing import image
from keras.utils.vis_utils import plot_model
from keras.callbacks import ModelCheckpoint
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential  


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

from glob import glob
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from google.colab import drive

In [2]:
def drive_atuh_for_dataset():
  # Authenticate and create the PyDrive client.
  # This only needs to be done once per notebook.
  auth.authenticate_user()
  gauth = GoogleAuth()
  gauth.credentials = GoogleCredentials.get_application_default()
  drive = GoogleDrive(gauth)

In [3]:
def mount_drive():
  drive.mount('/content/gdrive')

In [5]:
def download_and_unzip_dataset():
  file_id = '1dyWEXzKoQs0O8JGnrTQhgE5oESYixAD-' # URL id. 
  downloaded = drive.CreateFile({'id': file_id})
  downloaded.GetContentFile('driver.zip')
  !unzip driver.zip

In [6]:
# define function to load datasets
def load_dataset(path):
    data = load_files(path)
    files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), 10)
    return files, targets



In [8]:
def print_image_data(train_files,valid_files):
  # print statistics about the dataset
  print('There are %s total images.\n' % len(np.hstack([train_files, valid_files])))
  print('There are %d training images.' % len(train_files))
  print('There are %d total training categories.' % len(names))
  print('There are %d validation images.' % len(valid_files))

In [None]:
def data_exploration(df):
  ##Data Exploration
  df = pd.read_csv("/content/driver_imgs_list.csv",header='infer')
  print(df['classname'].head(3))
  print(df.iloc[:,1].describe())
  print("\n Image Counts")
  print(df['classname'].value_counts(sort=False))

In [9]:
def data_visualization(df):
  #Visualization
  # Pretty display for notebooks
  %matplotlib inline

  nf = df['classname'].value_counts(sort=False)
  labels = df['classname'].value_counts(sort=False).index.tolist()
  y = np.array(nf)
  width = 1/1.5
  N = len(y)
  x = range(N)

  fig = plt.figure(figsize=(20,15))
  ay = fig.add_subplot(211)

  plt.xticks(x, labels, size=15)
  plt.yticks(size=15)

  ay.bar(x, y, width, color="blue")

  plt.title('Bar Chart',size=25)
  plt.xlabel('classname',size=15)
  plt.ylabel('Count',size=15)

  plt.show()

In [10]:
def path_to_tensor(img_path):
    img = image.load_img(img_path, target_size=(64, 64))
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 64, 64, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [11]:
def print_accuracy_plots(fit_output):
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
  ax1.plot(fit_output.history['loss'], color='b', label="Training loss")
  ax1.plot(fit_output.history['val_loss'], color='r', label="validation loss")
  ax1.set_xticks(np.arange(1, 25, 1))
  ax1.set_yticks(np.arange(0, 1, 0.1))

  ax2.plot(fit_output.history['accuracy'], color='b', label="Training accuracy")
  ax2.plot(fit_output.history['val_accuracy'], color='r',label="Validation accuracy")
  ax2.set_xticks(np.arange(1, 25, 1))

  legend = plt.legend(loc='best', shadow=True)
  plt.tight_layout()
  plt.show()

In [12]:
def print_confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=14):
    df_cm = pd.DataFrame(
        confusion_matrix, index=class_names, columns=class_names, 
    )
    fig = plt.figure(figsize=figsize)
    try:
        heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [13]:
def print_heatmap(n_labels, n_predictions, class_names):
    labels = n_labels #sess.run(tf.argmax(n_labels, 1))
    predictions = n_predictions #sess.run(tf.argmax(n_predictions, 1))

#     confusion_matrix = sess.run(tf.contrib.metrics.confusion_matrix(labels, predictions))
    matrix = confusion_matrix(labels.argmax(axis=1),predictions.argmax(axis=1))
    row_sum = np.sum(matrix, axis = 1)
    w, h = matrix.shape

    c_m = np.zeros((w, h))

    for i in range(h):
        c_m[i] = matrix[i] * 100 / row_sum[i]

    c = c_m.astype(dtype = np.uint8)
    heatmap = print_confusion_matrix(c, class_names, figsize=(18,10), fontsize=20)

In [14]:
def print_metrics(ypred,valid_targets):
  #manipulates data
  ypred_class = np.argmax(ypred,axis=1)
  ytest = np.argmax(valid_targets,axis=1)

  accuracy = accuracy_score(ytest,ypred_class)
  print('Accuracy: %f' % accuracy)
  # precision tp / (tp + fp)
  precision = precision_score(ytest, ypred_class,average='weighted')
  print('Precision: %f' % precision)
  # recall: tp / (tp + fn)
  recall = recall_score(ytest,ypred_class,average='weighted')
  print('Recall: %f' % recall)
  # f1: 2 tp / (2 tp + fp + fn)
  f1 = f1_score(ytest,ypred_class,average='weighted')
  print('F1 score: %f' % f1)  