## dependencies

In [10]:
import matplotlib.pyplot as plt
from skimage import data
from skimage import filters
from skimage import exposure
from skimage import io
from skimage import color
from skimage import morphology
from skimage import measure
from scipy import ndimage

import gc

import time

import numpy as np
import cv2
import ntpath

from os import listdir
import os
from os.path import isfile, isdir, join

## constants & onetimers

In [11]:
EROSION_SIZE = 15
DILATION_SIZE = 15

In [12]:
sift = cv2.xfeatures2d.SIFT_create()
bruteForceMatcher = cv2.BFMatcher()

## Classes

In [13]:
class Frame:
    def __init__(self, filePath):
        
        # load bitmaps
        start = time.time()
        self.filePath = filePath
        self.fileName = ntpath.basename(filePath)
        self.imageSkimage = io.imread(filePath)
        self.grey = color.rgb2grey(self.imageSkimage)
        self.imageCV = cv2.imread(filePath)
        end = time.time()
        self.group = []
        #print (end - start)
        
        # compute masks
        start = time.time()
        finger_mask, screen_mask = self.computeMasks()
        self.maskFinger = finger_mask
        self.maskScreen = screen_mask
        end = time.time()
        #print (end - start)
        
        self.kp = None # compute these when necessary
        self.desc = None # compute these when necessary
        
        self.assignedScene = None
        
        self.thisSceneNumber = None
        
    def dropWeight(self):
        self.imageSkimage = None
        self.grey = None
        self.imageCV = None
        self.maskFinger = None
        self.maskScreen = None
        gc.collect()
    
    def computeMasks(self):
        #locates area usable for image comparison
        
        # Otsu
        start = time.time()
        otsu_tresh = filters.threshold_otsu(self.grey)
        mask = (self.grey > otsu_tresh)
        end = time.time()
        #print (end - start)
        
        
        # Erosion & dilation
        start = time.time()        
        mask = morphology.binary_dilation(mask, morphology.square(DILATION_SIZE)).astype(int)
        mask = morphology.binary_erosion(mask, morphology.square(EROSION_SIZE))
        end = time.time()
        #print (end - start)        
        
        #For testing
        #mask = np.array([[1, 1, 1, 0],
        #                [1, 0, 0, 0],
        #                 [0, 0, 1, 0],
        #                 [0, 0, 1, 0],
        #                 [0, 0, 0, 0]])
        
        # Select largest component (screen)
        start = time.time()
        mask = measure.label(mask)
        regions = measure.regionprops(mask)
        largest_region_props = max(regions, key=lambda r: r.area)
        mask = (mask == largest_region_props.label).astype(int)
        mask = ndimage.binary_fill_holes(mask).astype(int)
        end = time.time()
        #print (end - start)

        # Get final masks (ignore fingers as much as possible)
        start = time.time() 
        finger_mask = mask
        #screen_mask = (morphology.convex_hull_image(mask)).astype(int) #takes too much time
        screen_mask = None
        end = time.time()
        #print (end - start)
        
        return finger_mask, screen_mask
    
    def computeSIFTDescriptors(self):
        start = time.time()
        gray = cv2.cvtColor(self.imageCV, cv2.COLOR_BGR2GRAY)
        self.kp, self.desc = sift.detectAndCompute(gray, mask=self.maskFinger.astype(np.uint8))
        end = time.time()
        #print ('SIFT descriptors ' + str(end - start))        
        return self.kp, self.desc
    
    def displayMasks(self):
        plt.figure(figsize=(9, 4))
        plt.subplot(131)
        plt.imshow(self.imageSkimage, cmap='gray', interpolation='nearest')
        plt.axis('off')
        plt.subplot(132)
        plt.imshow(self.maskFinger, cmap='gray', interpolation='nearest')
        plt.axis('off')
        plt.subplot(133)
        plt.imshow(self.maskScreen, cmap='gray', interpolation='nearest')
        plt.axis('off')

        plt.tight_layout()
        plt.show()

        
class EmptyFrame(Frame):
    def __init__(self):
        self.fileName = 'none'
        self.thisSceneNumber = 0
        
emptyFrame = EmptyFrame()

## routines

In [14]:
def similaritySIFT(frame, scene):
    start = time.time()
    matches = bruteForceMatcher.knnMatch(frame.desc, scene.desc, k=2)
    total = 0
    good = 0
    for m,n in matches:
        total +=1
        if m.distance < 0.75*n.distance:
            good += 1
    end = time.time()
    #print ('SIFT similarity duration ' + str(end - start))
    return good/total

In [None]:
def similaritySSIM(frame, scene):
    start = time.time()
    maskedFrame = frame.grey * frame.maskFinger
    maskedScene = scene.grey * frame.maskFinger #yes, frame's mask must be used
    res = measure.compare_ssim(maskedFrame, maskedScene, None)
    end = time.time()
    #print ('SSIM similarity duration ' + str(end - start))
    return res

# Main experiment

## Scene occurence computation scrip:

In [None]:
# SELECT EXPERIMENT SAMPLE
#sample_path = 'experiment-samples/extra-small'
#sample_path = 'experiment-samples/trivial'
sample_path = 'experiment-samples-nogit/medium'


# LOAD AND PREPROCESS SCENES (defined by the experiment analyst)

scenes_path = sample_path + '/scenes'
scene_files = [(scenes_path+'/'+f) for f in listdir(scenes_path) if isfile(join(scenes_path, f))]
scenes = []
for path in scene_files:
    sceneFrame = Frame(path)
    sceneFrame.computeSIFTDescriptors()
    sceneFrame.thisSceneNumber = int(os.path.splitext(sceneFrame.fileName)[0])
    print('loaded scene '+str(sceneFrame.thisSceneNumber))
    scenes.append(sceneFrame)

# PROCESS EACH RECORDING
recordings_path = sample_path + '/recordings'
recordings = [d for d in listdir(recordings_path) if isdir(join(recordings_path, d))]
recordingsResults = []
for recording in recordings:
    recording_path = recordings_path + '/' + recording
    frame_files = [(recording_path+'/'+f) for f in listdir(recording_path) if isfile(join(recording_path, f))]
    frames = []
    previousFrame = None
    recordingsResults.append(frames)
    # PROCESS EACH FRAME
    for path in frame_files:
        #PREPROCESS FRAME
        frame = Frame(path) # Preprocessing takes place here
        print('loaded frame ' + frame.fileName)
        frames.append(frame)
        
        #FRAME GROUPING (not implemented in this implementation yet)
        if (previousFrame != None):
            simSSIM = similaritySSIM(frame, previousFrame)
            if (simSSIM > 0.95):
                frame.dropWeight() # drop bitmaps, memory preservation
                frame.assignedScene = previousFrame.assignedScene
                previousFrame.group.append(frame)
                #purposefully, we keep the previous frame at the head of the group
                continue
        
        # COMPUTE SIMILARITY AND ASSIGN SCENE
        bestSum = 0
        bestScene = emptyFrame
        frame.computeSIFTDescriptors()
        for scene in scenes:
            simSIFT = similaritySIFT(frame, scene)
            simSSIM = similaritySSIM(frame, scene)
            if(bestSum < simSIFT + simSSIM and (1.15 < simSIFT + simSSIM or 0.45 < simSIFT)):
                bestSum = simSIFT + simSSIM
                bestScene = scene 
            #print ("SIFT: " + str(simSIFT))
            #print ("SSIM: " + str(simSSIM))
        frame.assignedScene = bestScene
                
        # Tidy up (preventing memory leaks) by removing unnecessary blobs
        if(previousFrame != None):
            previousFrame.dropWeight()
            
        
        # very last thing before we proceed to next frame
        previousFrame = frame
        #print ('     '+frame.assignedScene.fileName)

        
    # FILTER ORPHANS 
    for frameNumber, currentFrame in enumerate(frames):
        if frameNumber == 0 : continue
        if frameNumber == len(frames)-1: continue
        previousScene = frames[frameNumber-1].assignedScene
        currentScene  = frames[frameNumber  ].assignedScene # (currentFrame)
        nextScene     = frames[frameNumber+1].assignedScene
        # Rule 1:
        if (currentScene is not previousScene) and (previousScene is nextScene):
            currentFrame.assignedScene = previousScene
            print('Rule 1 applied on frame '+str(frameNumber))
            continue
        # Rule 2:
        if (currentScene is not previousScene) and (currentScene is not nextScene) and (currentScene is not emptyFrame):
            currentFrame.assignedScene = emptyFrame
            print('Rule 2 applied on frame '+str(frameNumber))
            continue
    


loaded scene 1
loaded scene 10
loaded scene 11
loaded scene 12
loaded scene 2
loaded scene 3
loaded scene 4
loaded scene 5
loaded scene 6
loaded scene 7
loaded scene 8
loaded scene 9
loaded frame pabk_01_01000.png
loaded frame pabk_01_01001.png
loaded frame pabk_01_01002.png
loaded frame pabk_01_01003.png
loaded frame pabk_01_01004.png
loaded frame pabk_01_01005.png
loaded frame pabk_01_01006.png


## Evaluation sequence

In [None]:
import pandas as pd
labels = pd.read_excel(sample_path+'/labels.xlsx', sheetname='labels')
labels['scene'] = pd.to_numeric(labels['scene'].replace('none','0'))
labels = labels.set_index('frame')
labels = (labels.to_dict())['scene']

totalCount = 0
totalCorrect = 0
for frames in recordingsResults:
    for frame in frames:
        totalCount += 1
        #print(type(frame.assignedScene.thisSceneNumber).__name__ + ' ' + type(labels[frame.fileName]).__name__)
        if(frame.assignedScene == None): continue
        if(frame.assignedScene.thisSceneNumber == labels[frame.fileName]):
            totalCorrect += 1

totalAccuracy = totalCorrect / totalCount

print('total:    ' + str(totalCount))
print('correct:  ' + str(totalCorrect))
print('accuracy: ' + str(totalAccuracy))
