In [1]:
import tensorflow as tf
import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import BatchNormalization, Conv2D, MaxPool2D, UpSampling2D, GlobalMaxPool2D, GlobalAveragePooling2D, Conv2DTranspose, concatenate
from tensorflow.keras.layers import Dense, Dropout, Activation, Reshape, Flatten, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical, plot_model

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications import NASNetMobile, Xception, DenseNet121, MobileNetV2, InceptionV3, InceptionResNetV2, vgg16, resnet50, inception_v3, xception, DenseNet201
from tensorflow.keras.applications.vgg16 import VGG16

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import metrics
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import sklearn
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import jaccard_score

from scipy import stats

import seaborn as sns

import skimage
from skimage.transform import rotate

from tqdm import tqdm
from datetime import datetime

import numpy as np
import os
import cv2
import pandas as pd
# import imutils
import random
from PIL import Image
import matplotlib.pyplot as plt

import pickle
import torch

# Load Clip Filename

In [1]:
def get_video_filename(filenames):
    date2clip = {}
    for filename in filenames:
        for path in os.listdir(filename):
            if path == '.ipynb_checkpoints':
                continue

            path = filename+'/'+path
            int_path = int(path[-18:-4])
            paths = range(int_path-50, int_path+50)
            for i in paths:
                if str(i) in date2clip.keys():
                    int_path_date2clip = int(date2clip[str(i)][-18:-4])
                    if abs(i - int_path) < abs(i - int_path_date2clip):
                        date2clip[str(i)] = path
                else:
                    date2clip[str(i)] = path

    def clip(filename, date2clip):
        try:
            return date2clip[filename]
        except:
            return 'Error'
        
    return lambda x: clip(x, date2clip)

# Prepare Data

In [1]:
def get_data(condition):
    df = pd.DataFrame({})
    if condition == 'train':
        for path in os.listdir('../input/super-ai-engineer-denso-lasi/train_csv'):
            df = pd.concat([df, pd.read_csv('../input/super-ai-engineer-denso-lasi/train_csv/' + path)])
    if condition == 'test':
        df = pd.read_csv('../input/super-ai-engineer-denso-lasi/test.csv')

    # Drop sth shit
    df.drop(columns=['s_equipment_control'], inplace=True)
    df.rename(columns={'Unnamed: 0' : 'Ids'}, inplace=True)

    # Create file_datetime for finding filename in clip
    df['file_datetime'] = df['d_datetime'].replace({'-':'', ':':'', ' ':''}, regex=True)

    # Get video filename
    video_filenames = ['../input/denso-videos/denso-video', '../input/densotest']
    df['file_datetime'] = df['file_datetime'].apply(get_video_filename(video_filenames))
    
    df.index = df['Ids']
    df.drop(columns='Ids', inplace=True)
    
    if condition == 'train':
        df.drop(index=df.loc[df['has_actual_output'] == 'Corrupted Video'].index, inplace=True)
        
    return df

In [1]:
train = get_data('train')
test = get_data('test')

In [1]:
train

In [1]:
test

In [1]:
def get_image(df, condition='train'):
    number = range(5)
    file_datetime = df['file_datetime']
    n_ct = df['n_ct']
    cap = cv2.VideoCapture(file_datetime)
    fps = 15
    frames = []
    
    x_crop_start = 30
    x_crop_end = 240
    y_crop_start = 500
    y_crop_end = 620
    
    for i in number:
        ret, frame = cap.read()
        if ret == False:
            return 'Error'

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = np.array(frame)
        try:
            if condition == 'test':
                return frame[x_crop_start:x_crop_end, y_crop_start:y_crop_end, :]
            
            frames.append(frame[x_crop_start:x_crop_end, y_crop_start:y_crop_end, :])
        except:
            return 'Error'
        
    frames = np.array(frames)
    return frames
    
def get_image2df(df, condition='train'):
    df['image'] = df.apply(lambda x: get_image(x, condition), axis=1)
    df.drop(index=df.loc[df['image'] == 'Error'].index, inplace=True)
    return df

train = get_image2df(train)

In [1]:
fig = plt.figure(figsize = (50,100))
rows = train.loc[train['has_actual_output'] == 'Yes']['has_actual_output'].index
number = 5
for i, row in enumerate(tqdm(rows[:number])):
    im = np.array(train.loc[row]['image'])
    fig.add_subplot(1, number, i+1)
    plt.imshow(im[0])

In [1]:
fig = plt.figure(figsize = (50,100))
rows = train.loc[train['has_actual_output'] == 'No']['has_actual_output'].index
number = 5
for i, row in enumerate(tqdm(rows[:number])):
    im = np.array(train.loc[row]['image'])
    fig.add_subplot(1, number, i+1)
    plt.imshow(im[0])

# Washing Machine Training

# Model

In [1]:
im = np.array(train.loc[45049]['image'][0])
x_resolution, y_resolution, _ = im.shape
x_resolution, y_resolution

In [1]:
with tf.device('/device:GPU:0'):
    def get_model():
        inputs = Input(shape=(x_resolution, y_resolution, 3))
        
        x = Conv2D(16, kernel_size=(5,5), activation='relu')(inputs)
        x = MaxPool2D(pool_size=2)(x)
        x = BatchNormalization()(x)
        
        x = Conv2D(32, kernel_size=(5,5), activation='relu')(x)
        x = MaxPool2D(pool_size=2)(x)
        x = BatchNormalization()(x)
        
        x = Flatten()(x)
        x = Dense(32)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(0.2)(x)
        
        x = Dense(16)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dropout(0.2)(x)
        
        x = Dense(1)(x)
        outputs = Activation('sigmoid')(x)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

        return model
    
    get_model().summary()
plot_model(get_model(),show_shapes=True)

# Prepare Data For Training Model

In [1]:
# X = np.array([i for i in train['image']])
# y = train['has_actual_output'].replace({'No':0, 'Yes':1}, regex=True).to_numpy()
X = []
y = []
for i, out in zip(train['image'], train['has_actual_output'].replace({'No':0, 'Yes':1}, regex=True).to_numpy()):
    for j in i:
        X.append(j)
        y.append(out)
        
X = np.array(X)/255.
y = np.array(y)
X.shape, y.shape

# Traning Process

In [1]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)
batch_size = 32
earlystopping = 50
best_model = 'best_model.h5'

EarlyStopper = EarlyStopping(patience=earlystopping, verbose=1, monitor='val_accuracy', mode='max')
Checkpoint = ModelCheckpoint(best_model, verbose=1, monitor='val_accuracy', save_best_only=True, mode='max')

print('Train Size :',X_train.shape[0])
print('Validation Size :',X_valid.shape[0])

In [1]:
model = get_model()

model.fit(X_train, y_train, 
         validation_data=(X_valid, y_valid),
         batch_size=batch_size,
         epochs=200,
         verbose=1,
         callbacks=[EarlyStopper, Checkpoint]
        )

In [1]:
model = load_model(best_model)
loss_train, acc_train = model.evaluate(X_train, y_train, verbose=0)
loss_valid, acc_valid = model.evaluate(X_valid, y_valid, verbose=0)
loss_all, acc_all = model.evaluate(X, y, verbose=0)
print('Train Loss :', loss_train)
print('Train Accuracy :', acc_train*100)
print()
print('Valid Loss :', loss_valid)
print('Valid Accuracy :', acc_valid*100)
print()
print('All Loss :', loss_all)
print('All Accuracy :', acc_all*100)

# Submission Model

In [1]:
model_submission = get_model()

model_submission.fit(X, y,
         batch_size=batch_size,
         epochs=100,
         verbose=1
        )

In [1]:
loss_train, acc_train = model_submission.evaluate(X_train, y_train, verbose=0)
loss_valid, acc_valid = model_submission.evaluate(X_valid, y_valid, verbose=0)
loss_all, acc_all = model_submission.evaluate(X, y, verbose=0)
print('Train Loss :', loss_train)
print('Train Accuracy :', acc_train*100)
print()
print('Valid Loss :', loss_valid)
print('Valid Accuracy :', acc_valid*100)
print()
print('All Loss :', loss_all)
print('All Accuracy :', acc_all*100)

# Predict

In [1]:
# 44845,48788 ---> Error
test = get_image2df(test, 'test')
X_test = np.array([i for i in test['image']])/255.
actual_output = ['No' if i[0] < 0.5 else 'Yes' for i in model_submission.predict(X_test)]
actual_output[:20]

In [1]:
#Error
df_error = pd.DataFrame({'Ids':[44845,48788],
                        'actual_output':['No','No']})
df_error

In [1]:
submission = pd.DataFrame({'Ids': test.index})
submission['actual_output'] = actual_output

# Appending Error Clip
submission = submission.append(df_error, ignore_index=True)

submission = submission.sort_values(['Ids'])
submission.to_csv('actual_output.csv', index=False)

In [1]:
import math

fig = plt.figure(figsize = (50,100))
size = len(test.index)
size = int(math.sqrt(size))+1
for i, idx in enumerate(tqdm(test.index)):
    im = np.array(test.loc[idx]['image'])
    fig.add_subplot(size, size, i+1)
    print_output = submission.loc[submission['Ids'] == idx]['actual_output'].to_numpy()[0]
    plt.title(print_output, fontsize=70)
    plt.imshow(im)