In [None]:
#/kaggle/input/state-farm-distracted-driver-detection/sample_submission.csv
#/kaggle/input/state-farm-distracted-driver-detection/driver_imgs_list.csv
#/kaggle/input/state-farm-distracted-driver-detection/imgs/train/c4/img_16261.jpg



In [None]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageEnhance
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [None]:
tf.__version__

In [None]:
sample_path = "/kaggle/input/state-farm-distracted-driver-detection/sample_submission.csv"
imgs_list_path = "/kaggle/input/state-farm-distracted-driver-detection/driver_imgs_list.csv"
train_path = "/kaggle/input/state-farm-distracted-driver-detection/imgs/train"

## 1.Check data distribution

In [None]:
driver_imgs_list = pd.read_csv(imgs_list_path)
driver_imgs_list.head()

In [None]:
os.listdir(train_path)

In [None]:
def pair_sort(className,values):
    for j in range(0,len(className)-1):
        for i in range(0,len(className)-1):
            if values[i] > values[i+1]:
                temp =  values[i+1]
                values[i+1] = values[i]
                values[i] = temp

                N_temp =  className[i+1]
                className[i+1] = className[i]
                className[i] = N_temp
    
    return className,values

In [None]:
from matplotlib.pyplot import figure
figure(num=None, figsize=(15, 5), dpi=80, facecolor='w', edgecolor='k')

class_names = np.unique(driver_imgs_list['classname'])
class_image_list = [len(driver_imgs_list[driver_imgs_list['classname'] == current_class]) for current_class in class_names]

class_names,class_image_list=  pair_sort(class_names,class_image_list)

#plt.figure()
plt.suptitle('Number of images per Class')
plt.bar(class_names,class_image_list,color=(0.2, 0.4, 0.6, 0.6))
plt.show()

In [None]:
from matplotlib.pyplot import figure
sub_names = np.unique(driver_imgs_list['subject'])
sub_image_list = [len(driver_imgs_list[driver_imgs_list['subject'] == current_sub]) for current_sub in sub_names]
sub_names,sub_image_list=  pair_sort(sub_names,sub_image_list)

figure(num=None, figsize=(15, 10), dpi=80, facecolor='w', edgecolor='k')

y_pos = np.arange(len(sub_names))
# Create horizontal bars
plt.barh(y_pos, sub_image_list,color=(0.2, 0.4, 0.6, 0.6))
 
# Create names on the y-axis
plt.yticks(y_pos,sub_names )
plt.suptitle('Number of images per subject')

# Show graphic
plt.show()

In [None]:
img_width,img_height = (128,128)
model_input_shape = (img_width,img_height,3)
batch_size = 16
input_image = (img_width,img_height)

def load_image(path):
    read_path = train_path+"/"+path
    image = Image.open(read_path)
    image = image.resize(input_image)
    
    return np.asarray(image)

In [None]:
def show_images(image_ids,class_names):
    pixels = [load_image(path) for path in image_ids]
    
    num_of_images = len(image_ids)
    
    fig, axes = plt.subplots(
        1, 
        num_of_images, 
        figsize=(5 * num_of_images, 5 * num_of_images),
        
    )
   
    
    for i, image_pixels in enumerate(pixels):
        axes[i].imshow(image_pixels)
        axes[i].axis("off")
        axes[i].set_title(class_names[i])

## 2.Plot class images

In [None]:
sub_names_imgs = [ current_class+"/"+driver_imgs_list[driver_imgs_list['classname'] == current_class]['img'].values[0] for current_class in class_names]

show_images(sub_names_imgs[:5],class_names[:5])
show_images(sub_names_imgs[5:],class_names[5:])

 ## 3. Split and load Train/Validation 

In [None]:
train_path = "/kaggle/input/state-farm-distracted-driver-detection/imgs/train"
test_path = "/kaggle/input/state-farm-distracted-driver-detection/imgs/test"

In [None]:
x_train = []
y_train = []

x_val = []
y_val = []


split_rate = 0.8
for current_class in class_names:
    select_df = driver_imgs_list[driver_imgs_list['classname'] == current_class ]
    image_list = (select_df['img'].values)
    train_amount = int( len(image_list)*split_rate)
    train_list = image_list[:train_amount]
    val_list = image_list[train_amount:]
    

    
    
    for filename in train_list:
        x_train.append(load_image(current_class+"/"+filename))
        y_train.append(current_class.replace('c',''))

    for filename in val_list:
        x_val.append(load_image(current_class+"/"+filename))
        y_val.append(current_class.replace('c',''))


## 4. Encode Labels

In [None]:
x_train = np.asarray(x_train)
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
x_val = np.asarray(x_val)
y_val =tf.keras.utils.to_categorical(y_val, num_classes=10)
print("Train x Shape: ",x_train.shape)
print("Test x Shape: ",x_val.shape)


In [None]:
print("Train y Shape: ",y_train.shape)
print("Test y Shape: ",y_val.shape)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagenerator = ImageDataGenerator(
    rotation_range=40,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range = 0.2, # Randomly zoom image 
    width_shift_range=0.3,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.3,  # randomly shift images vertically (fraction of total height)
     horizontal_flip = True
    
)
datagenerator.fit(x_train)

## 5. Create Model


In [None]:
base_model  = tf.keras.applications.resnet.ResNet50(include_top = False,
                                                  weights = 'imagenet',
                                                  input_shape = model_input_shape)
base_model.summary()

In [None]:
x = base_model.output
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(0.5)(x)

output =tf.keras.layers.Dense(units = len(class_names),activation = tf.nn.softmax)(x)
model = tf.keras.models.Model(inputs=base_model.inputs, outputs=output)

model.compile(optimizer=tf.keras.optimizers.Adam(0.0001),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits = False),
              metrics=['accuracy'])

model.summary()

In [None]:
num_epochs = 2
batchSize = 8
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.000001)

In [None]:
history = model.fit(
     datagenerator.flow(x_train,y_train, batch_size=batchSize),
      validation_data=(x_val,y_val),
      steps_per_epoch=int(len(x_train)/batchSize),
      epochs=num_epochs,
    callbacks = [learning_rate_reduction],
      verbose=1)

## 6. Model Evaluation

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

ax[0].set_title('Accuracy')
ax[0].plot(history.history['accuracy'])
ax[0].plot(history.history['val_accuracy'])

ax[1].set_title('Loss')
ax[1].plot(history.history['loss'])
ax[1].plot(history.history['val_loss'])

In [None]:
model.save("test_model.h5")

In [None]:
model_path = os.path.join('', 'test_model.h5')
model = tf.keras.models.load_model("test_model.h5")


In [None]:
x_train = []
test_ids = []
test_path = "/kaggle/input/state-farm-distracted-driver-detection/imgs/test"
read_path = (os.listdir(test_path))
test_ids =  read_path[:]

In [None]:

for file in read_path[:]:
    read_path = test_path+"/"+file
    image = Image.open(read_path)
    image = image.resize(input_image)
    x_train.append(np.asarray(image))
    print(file)
    


In [None]:
x_train = np.array(x_train)

In [None]:
x_train.shape

In [None]:
def create_submission(predictions, test_id):
    result1 = pd.DataFrame(predictions, columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
    result1.loc[:, 'img'] = pd.Series(test_id, index=result1.index)
   
    if not os.path.isdir('subm'):
        os.mkdir('subm')
    suffix = "test_result"
    sub_file = os.path.join('subm', 'submission_' + suffix + '.csv')
    result1.to_csv(sub_file, index=False)

In [None]:
test_result = model.predict(x_train, batch_size=128, verbose=1)
create_submission(test_result,test_ids)

In [None]:
sample = pd.read_csv(sample_path)
sample.head()

In [None]:
test_result_path = os.path.join('subm', 'submission_test_result.csv')
sample = pd.read_csv(test_result_path)
sample.head()