In [3]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import math
import random
from collections import defaultdict
import keras
from tsn import TSN

Using TensorFlow backend.


In [24]:
# from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())

In [5]:
#Create folder for storing the frames
 
splitfiledir = r"..\ucfTrainTestlist"
splitfile = "mytest.txt"
splitname = splitfile.split('.')[0]

In [8]:
def storeFramesAndFlows(framesdir,splitfiledir,splitfile,splitname):
    
    #Read splitfile
    lines = open(os.path.join(splitfiledir,splitfile),"r")
    for line in lines:
        arr = line.split(" ")
        vidclass = arr[1] 
        line = arr[0].split("/")
        action = line[0]
        filename = line[1]
#         if filename.split("_")[2] not in ('g08','g09'):
#             continue
        actionpath = os.path.join(framesdir,splitname,action)
        framepath = os.path.join(actionpath,filename,"frames")
        flowpath = os.path.join(actionpath,filename,"flows")
        
        #Create folder for Action
        if not os.path.exists(actionpath):
            os.mkdir(actionpath)
        
        #Create folder for videofile
        if not os.path.exists(os.path.join(actionpath,filename)):
            os.mkdir(os.path.join(actionpath,filename))
        
        #Create folder for frames
        if not os.path.exists(framepath):
            os.mkdir(framepath)
            
        #Create folder for flows
        if not os.path.exists(flowpath):
            os.mkdir(flowpath)

        #Read video and collect frames, flows
        vidcap = cv2.VideoCapture(os.path.join(path,action,filename))
        
#         print (int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)))
        count = 0 
        prevFrame = None
        nextFrame = None
        while True:
            success,image = vidcap.read()
            #Resize image to remain consistent with BN Inception model
            if not success:
                break
            image = cv2.resize(image,(224,224))
            frame = "frame_%d.jpg"%count
            flow_x = "flow_x_%d.jpg"%count
            flow_y = "flow_y_%d.jpg"%count
            framename = os.path.join(framepath,frame)
            flowname_x = os.path.join(flowpath,flow_x)
            flowname_y = os.path.join(flowpath,flow_y)
            if count == 0:
                prevFrame = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
                cv2.imwrite(framename,image)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
                count += 1
                continue
            
            nextFrame = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
            cv2.imwrite(framename,image)
            optical_flow = cv2.optflow.DualTVL1OpticalFlow_create()
            flow = optical_flow.calc(prevFrame, nextFrame, None)
            prevFrame = nextFrame
            flow[...,0] = cv2.normalize(flow[...,0],None,0,255,cv2.NORM_MINMAX)
            flow[...,1] = cv2.normalize(flow[...,1],None,0,255,cv2.NORM_MINMAX)
            cv2.imwrite(flowname_x,flow[...,0])
            cv2.imwrite(flowname_y,flow[...,1])

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            count += 1
            
        filename = os.path.join(actionpath,filename,"info.txt")
        #Store the frames count in txt file
        rate = open(filename,"w")
        rate.write("frames:"+str(count))
        rate.write("\n")
        rate.write("class:"+vidclass)
        rate.close()

        #Close the video object
        vidcap.release()

    print ("Stored")

In [6]:
#Store Frames
framesdir = r"../FramesFlows"

# path = r"../../UCF-101"
path = r"E:\capstone_adbi_data\UCF-101"

if not os.path.exists(os.path.join(framesdir,splitname)):
    os.mkdir(os.path.join(framesdir,splitname))
    storeFramesAndFlows(framesdir,splitfiledir,splitfile,splitname)
else:
    print ("Frames stored already!")

Frames stored already!


In [10]:
myFrameNumber = 50
cap = cv2.VideoCapture(os.path.join(path,"ApplyEyeMakeup","v_ApplyEyeMakeup_g01_c01.avi"))

# get total number of frames
totalFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT)

# check for valid frame number
if myFrameNumber >= 0 & myFrameNumber <= totalFrames:
    # set frame position
    cap.set(cv2.CAP_PROP_POS_FRAMES,myFrameNumber)

_, frame = cap.read()
frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
print (frame.shape)

(240, 320)


In [29]:
def getFF(classpath):
    frames = list()
    xflows = list()
    yflows = list()
    #Stack up frames and flows
    min_frames = float('inf')
    min_flows = float('inf')
    y_true = None
    for folder in os.listdir(classpath):
        folderpath = os.path.join(classpath,folder)
        # infopath = folderpath\info.txt
        infopath = os.path.join(folderpath,"info.txt")
        # Read info.txt for extracting class
        f = open(infopath,"r")
        y_true = int(f.readlines()[1].strip().split(':')[1])

        # Collect frames
        imgpath = os.path.join(folderpath,"frames")
        flowspath = os.path.join(folderpath,"flows")

        allframes = os.listdir(imgpath)
        allflows = os.listdir(flowspath)
        min_frames = min(len(allframes),min_frames)

        #Sort frames sequentially
        allframes.sort(key = lambda x: int(x.split('_')[1].split('.')[0]))

        #Sort flows sequentially
        allxflows = list(filter(lambda k: k.split('_')[1] == 'x',allflows))
        allyflows = list(filter(lambda k: k.split('_')[1] == 'y',allflows))
        allxflows.sort(key = lambda x: int(x.split('_')[2].split('.')[0]))
        allyflows.sort(key = lambda x: int(x.split('_')[2].split('.')[0]))
        min_flows = min(len(allxflows),min_flows)

        stack = list()
        for frame in allframes:
            img = cv2.imread(os.path.join(imgpath,frame))
            grayimg = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
            grayimg = np.expand_dims(grayimg,axis = 2)
            stack.append(grayimg)
        frames.append(np.array(stack))
        
        
        #Collect flows
        xstack = list()
        ystack = list()
        for xflow,yflow in zip(allxflows,allyflows):
            xfl = cv2.imread(os.path.join(flowspath,xflow))
            yfl = cv2.imread(os.path.join(flowspath,yflow))
            grayxfl = cv2.cvtColor(xfl,cv2.COLOR_BGR2GRAY)
            grayyfl = cv2.cvtColor(yfl,cv2.COLOR_BGR2GRAY)
            grayxfl = np.expand_dims(grayxfl,axis = 2)
            grayyfl = np.expand_dims(grayyfl,axis = 2)
            xstack.append(grayxfl)
            ystack.append(grayyfl)
        xflows.append(np.array(xstack))
        yflows.append(np.array(ystack))

    return frames, xflows, yflows, y_true


In [None]:
dfpath = r"../FramesFlows/trainlist01"
frames_dict = dict()
xflows_dict = dict()
yflows_dict = dict()
for each in os.listdir(dfpath):
    classpath = os.path.join(dfpath,each)
    frames, xflows, yflows, class_name = getFF(classpath)
    frames_dict[class_name] = frames
    xflows_dict[class_name] = xflows
    yflows_dict[class_name] = yflows


In [13]:
#Dictionary key is the class
#Second index represents video number in that class (starting from 0)
#Shape shows NO_OF_FRAMES*HEIGHT_OF_IMAGE*WIDTH_OF_IMAGE
frames_dict[2][0].shape

(149, 224, 224, 1)

In [10]:
#Decided to keep number of segments as 3 for now (consistent with the paper)
segments = 3

In [14]:
def one_hot_encode(data, classes = 101):
    """
    :param data: data to be one hot encoded
    :return: np array with one hot encoding
    """
    labels = np.zeros((data.size, classes))
    labels[np.arange(data.size), data - 1] = 1
    return labels

def getSegments(frames_dict,segments=3):
    #Get k(equal to segments) random samples(snippets) from frames
    framesSegments = defaultdict(list)
    ySegments = defaultdict(list)
    np.random.seed(0)
    for class_name in frames_dict.keys():
        videos = frames_dict[class_name]
        for video in videos:
            vsegs = np.array_split(video,segments)
            for x in range(len(vsegs)):
                idx = np.random.randint(vsegs[x].shape[0])
                #Append random snippet to segments dictionary
                framesSegments[x].append(vsegs[x][idx])
                ySegments[x].append(class_name)
    
    #One hot encoding
    for x in range(segments):
        ySegments[x] = one_hot_encode(np.array(ySegments[x]))

    return framesSegments, ySegments

In [15]:
framesSegments, yFramesTrueSegments = getSegments(frames_dict)
xFlowsSegments, yFlowsTrueSegments = getSegments(xflows_dict)
yFlowsSegments, yFlowsTrueSegments = getSegments(yflows_dict)

In [16]:
#key is segment number(0,1...segments) and second index in the image
framesSegments[0][1].shape
#key is segment number(0,1,...segments) and second index returns one hot encoded class for the respective image
# yFramesTrueSegments[2][4]

(224, 224, 1)

In [17]:
def getMixedSegments(x,y):
    final = dict()
    for key in x.keys():
        mylist = list()
        for a,b in zip(x[key],y[key]):
            new = np.stack((a,b),axis = 2)
            new = np.squeeze(new,axis = 3)
            mylist.append(new)
        final[key] = mylist
    return final

In [18]:
flows = getMixedSegments(xFlowsSegments, yFlowsSegments)

In [19]:
#We need separate networks (equal to segments) working in parallel 
#Parameters for BN-Inception network
#We need Inputs: a tensor of shape [batch_size, height, width, channels].
imageHeight = 224
imageWidth = 224
colorChannels = 1
num_classes = 101
learning_rate = 0.001
# X = tf.placeholder(tf.float32,(None,imageHeight,imageWidth,colorChannels))
# y = tf.placeholder(tf.float32,(None,num_classes))

In [38]:
class dataGenerator(keras.utils.Sequence):

    def __init__(self, listpath, ffpath, batch_size, ftype = "frames", segments = 3, test=False):
        self.filenames = list()
        self.labels = list()
        self.listpath = listpath
        self.batch_size = batch_size
        self.folderpath = ffpath
        self.ftype = ftype
        self.segments = segments
        
        with open(self.listpath,"r") as f:
            for line in f.readlines():
                arr = line.split(" ")
                self.filenames.append(arr[0])
                self.labels.append(int(arr[1].strip()))
        
    def __len__(self):
        return len(self.image_filenames)//self.batch_size

    def __getitem__(self, idx):
        batch_x = self.filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        if self.ftype == "frames":
            X = defaultdict(list)
            Y = defaultdict(list)
            for index,each in enumerate(batch_x):
                infopath = os.path.join(self.folderpath,each,"info.txt")
                imgpath = os.path.join(self.folderpath,each,self.ftype)
                f = open(infopath,"r")
                total_frames = int(f.readlines()[0].strip().split(':')[1])
                f.close()
                idxs = []
                base = total_frames//self.segments
                low = 0
                for _ in range(segments):
                    high = min(low + base, total_frames)
                    idxs.append(np.random.randint(low, high,1)[0])
                    low = high + 1 
                frames = getFrames(idxs, imgpath)
                for i in range(segments):
                    X[i].append(frames[i])
                    Y[i].append(batch_y[index])
            
        return [np.array(X[key]) for key in X.keys()],[np.array(Y[key]) for key in Y.keys()]
    
    
    def getFrames(self,idxs, imgpath):

        stack = list()
        for i in idxs:
            framename = "frame_"+str(i)+".jpg"
            img = cv2.imread(os.path.join(imgpath,framename))
            grayimg = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
            grayimg = np.expand_dims(grayimg,axis = 2)
            stack.append(grayimg)
            
        return np.array(stack)

In [39]:
listpath = "../ucfTrainTestlist"
ffpath = "../FramesFlows"
filename = "custom.txt"
dg = dataGenerator(os.path.join(folderpath,filename),,128)

In [40]:
x = dg.__getitem__(0)

FileNotFoundError: [Errno 2] No such file or directory: '../ucfTrainTestlist\\custom.txt\\ApplyEyeMakeup/v_ApplyEyeMakeup_g08_c01.avi\\info.txt'

In [22]:
#COMPILE MODEL
model = TSN()

Instructions for updating:
Colocations handled automatically by placer.


In [25]:
model.compile(optimizer= keras.optimizers.Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [26]:
model.fit([np.array(framesSegments[0]),np.array(framesSegments[1]),np.array(framesSegments[2]),np.array(flows[0]),np.array(flows[1]),np.array(flows[2])],yFramesTrueSegments[0],batch_size = 3, epochs = 1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/1


KeyboardInterrupt: 