# Project 4
Chen Liang

The target of this project is to detect 'hitting wall or something'. Dataset used is 'something-something-v2'. Since the output will be a probability of 'hitting' or not, the model is not designed to detect all categories in this dataset.

## Data Preprocessing

### Select Files from Json

In [11]:
import pandas as pd

In [12]:
df=pd.read_json('something-something-v2-train.json')
dfv=pd.read_json('something-something-v2-validation.json')

In [13]:
df.head(3)

Unnamed: 0,id,label,template,placeholders
0,78687,holding potato next to vicks vaporub bottle,Holding [something] next to [something],"[potato, vicks vaporub bottle]"
1,42326,spreading margarine onto bread,Spreading [something] onto [something],"[margarine, bread]"
2,100904,putting pen on a surface,Putting [something] on a surface,[pen]


In [14]:
dfhit=df[df['template']=='Hitting [something] with [something]']
dfnothit=df[df['template']!='Hitting [something] with [something]']
dfvhit=dfv[dfv['template']=='Hitting [something] with [something]']
dfvnothit=dfv[dfv['template']!='Hitting [something] with [something]']

In [15]:
dfhit.shape

(1738, 4)

In [16]:
dfvhit.shape

(235, 4)

### Generate Images from Video

In [17]:
from joblib import Parallel, delayed
from video_to_img import video_to_img
import numpy as np

#### Hit

In [18]:
num_processes = 12
_=Parallel(n_jobs=num_processes)(delayed(video_to_img)\
                               ('data/'+str(video_id)+'.webm','frames/hit/',is_merged=True,merge_size=3) for video_id in dfhit['id'])

#### Not Hit
Randomly choose same amount of video from dfnothit

In [19]:
num_processes = 12
_=Parallel(n_jobs=num_processes)(delayed(video_to_img)\
                               ('data/'+str(video_id)+'.webm','frames/nothit/',is_merged=True,merge_size=3)\
                                 for video_id in dfnothit.sample(n = dfhit.shape[0])['id'])

#### Validation

In [20]:
num_processes = 12
_=Parallel(n_jobs=num_processes)(delayed(video_to_img)\
                               ('data/'+str(video_id)+'.webm','frames/validation/hit/',is_merged=True,merge_size=3) for video_id in dfvhit['id'])

In [21]:
num_processes = 12
_=Parallel(n_jobs=num_processes)(delayed(video_to_img)\
                               ('data/'+str(video_id)+'.webm','frames/validation/nothit/',is_merged=True,merge_size=3)\
                                 for video_id in dfvnothit.sample(n = dfvhit.shape[0])['id'])

### Load and Organize Data

<b>Note:</b> due to limited memory on laptop, only part of data is used for training

In [14]:
from os import listdir
from os.path import isfile, join
import cv2
import numpy as np

In [12]:
frame_path='frames/'
hit_fnames=[join(frame_path+'hit/', f) for f in listdir(frame_path+'hit/') if isfile(join(frame_path+'hit/', f))]
nothit_fnames=[join(frame_path+'nothit/', f) for f in listdir(frame_path+'nothit/') if isfile(join(frame_path+'nothit/', f))]
fnames = hit_fnames+nothit_fnames

In [None]:
len(fnames)

In [None]:
frame_path_val='frames/validation/'
hitv_fnames=[join(frame_path_val+'hit/', f) for f in listdir(frame_path_val+'hit/') if isfile(join(frame_path_val+'hit/', f))]
nothitv_fnames=[join(frame_path_val+'nothit/', f) for f in listdir(frame_path_val+'nothit/') if isfile(join(frame_path_val+'nothit/', f))]
fnamesv = hitv_fnames+nothitv_fnames

In [None]:
len(fnamesv)

In [None]:
fnamesv[0]

#### Train

In [15]:
Xt=np.empty([30000,240,320,3],dtype='uint8')
i=0
for f in hit_fnames[0:15000]:
    Xt[i,:,:,:]=cv2.imread(f)
    i+=1
for f in nothit_fnames[0:15000]:
    Xt[i,:,:,:]=cv2.imread(f)
    i+=1

In [16]:
yt=[1]*15000+[0]*15000
yt=np.array(yt)

In [17]:
Xt.shape

(30000, 240, 320, 3)

In [18]:
Xv=np.empty([2000,240,320,3],dtype='uint8')
i=0
for f in hit_fnames[15000:16000]:
    Xv[i,:,:,:]=cv2.imread(f)
    i+=1
for f in nothit_fnames[15000:16000]:
    Xv[i,:,:,:]=cv2.imread(f)
    i+=1

In [19]:
yv=[1]*1000+[0]*1000
yv=np.array(yv)

In [33]:
cv2.imshow('qwe',Xt[29999,:,:,:])
cv2.waitKey(0)
cv2.destroyAllWindows()


In [34]:
#fnamesv=fnamesv[:100]
Xv=np.empty([len(fnames),240,320,3],dtype='uint8')
i=0
for f in fnames:
    Xt[i,:,:,:]=cv2.imread(f)
    i+=1
#     if Xt is None:
#         Xt=cv2.imread(f)
#     else:
#         print(Xt.shape)
#         print(cv2.imread(f).shape)
#         Xt=np.vstack((Xt,cv2.imread(f)))
    #images.append(cv2.imread(f))
#Xt=np.stack(images,axis=0)

MemoryError: Unable to allocate 28.7 GiB for an array with shape (133721, 240, 320, 3) and data type uint8

## Model Training

In [1]:
import matplotlib.pyplot as plt
import scipy
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from scipy import stats
import sklearn.metrics as metrices
import seaborn as sns

In [2]:
input_shape=(240, 320, 3)

In [9]:
baseModel = tf.keras.applications.ResNet50(weights="imagenet", include_top=False,
	input_tensor=layers.Input(shape=(240, 320, 3)))
# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = layers.AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = layers.Flatten(name="flatten")(headModel)
headModel = layers.Dense(512, activation="relu")(headModel)
headModel = layers.Dropout(0.5)(headModel)
headModel = layers.Dense(1, activation="softmax")(headModel)
# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = tf.keras.models.Model(inputs=baseModel.input, outputs=headModel)
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the training process
for layer in baseModel.layers:
	layer.trainable = False

In [46]:
def create_model():
    model1=models.Sequential()
    model1.add(layers.Conv2D(64,kernel_size=(3,3),strides=(2,2),activation='relu',input_shape=input_shape))
    model1.add(layers.MaxPooling2D((2, 2)))
    model1.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model1.add(layers.MaxPooling2D((2, 2)))
    model1.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model1.add(layers.MaxPooling2D((2, 2)))
    model1.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model1.add(layers.MaxPooling2D((2, 2)))
    model1.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model1.add(layers.MaxPooling2D((2, 2)))
    #model1.add(layers.Dropout(0.25))
    model1.add(layers.Flatten())
    model1.add(layers.Dense(128, activation='relu'))
    model1.add(layers.Dense(1, activation='softmax'))
    return model1

In [47]:
model=create_model()

In [20]:
opt = tf.keras.optimizers.SGD(clipnorm=1.)

model.compile(optimizer='adadelta',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(Xt, yt, epochs=75, 
                    validation_data=(Xv, yv))

Train on 30000 samples, validate on 2000 samples
Epoch 1/75
Epoch 2/75

ResourceExhaustedError: 2 root error(s) found.
  (0) Resource exhausted:   OOM when allocating tensor with shape[32,240,320,3] and type uint8 on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[{{node GatherV2}}]]
	 [[IteratorGetNext]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[IteratorGetNext/_4]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (1) Resource exhausted:   OOM when allocating tensor with shape[32,240,320,3] and type uint8 on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[{{node GatherV2}}]]
	 [[IteratorGetNext]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

0 successful operations.
0 derived errors ignored. [Op:__inference_distributed_function_41452]

Function call stack:
distributed_function -> distributed_function -> distributed_function -> distributed_function
