In [1]:
import numpy as np
import cv2 as cv
import datetime
from sklearn import cluster,datasets
from scipy.stats import multivariate_normal
from sklearn.cluster import KMeans
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import random
from collections import namedtuple, Counter

In [2]:
def file_reading(file_path):
    #reading annotations file
    ann_file = open(file_path,"r") #opening file in read mode only
    strings = [x.strip() for x in ann_file.readlines()]
    stimes=[]
    etimes=[]
    for i in range(len(strings)):
        s1,s2=strings[i].split("-")
        stimes.append(s1.strip())
        etimes.append(s2.strip())
    return stimes,etimes

In [3]:
def compare(stime,etime,abnormal_stimes,abnormal_etimes):
    length = len(abnormal_stimes)
    for i in range(length):
        t1 = datetime.strptime(abnormal_stimes[i], '%M:%S').time()
        t2 = datetime.strptime(abnormal_etimes[i], '%M:%S').time()
        obj1 = timedelta(hours=t1.hour, minutes=t1.minute, seconds=t1.second)
        obj2 = timedelta(hours=t2.hour, minutes=t2.minute, seconds=t2.second)
        if (stime >= obj1 and etime <= obj2) or (stime < obj1 and etime > obj1) or (stime < obj2 and etime > obj2):
            return 1
    return -1

In [4]:
class labeling_objects:
    def __init__(self,clip_no,stime,etime,label):
        self.clip_no = clip_no
        self.stime = stime
        self.etime = etime
        self.label = label

In [5]:
def video_input(path,ann_file_path):
    cap = cv.VideoCapture(path)
    ret, frame1 = cap.read()

    #reading first frame
    prvs_gray = cv.cvtColor(frame1,cv.COLOR_BGR2GRAY)
    width=prvs_gray.shape[1]
    height = prvs_gray.shape[0]

    prvs = cv.resize(prvs_gray,(int(width/10),int(height/10)),interpolation = cv.INTER_AREA)

    #intializing all values
    cnt=0
    flow_array = []
    mag_list = []
    flow_array_array = [] #for storing all clips
    secs = 0
    clip = 0
    label_objects_array = []
    abnormal_stimes,abnormal_etimes = file_reading(ann_file_path)
    
    while(True):
        ret, frame2 = cap.read()
        cnt=cnt+1
        if cnt%50 == 0:
            flow_array_array.append(flow_array)
            flow_array=[]
            clip+=1
            #adding labels here after one clip is recorded.
            secs = secs+2
            stime = timedelta(seconds = secs-2)
            etime = timedelta(seconds = secs)
            label = compare(stime,etime,abnormal_stimes,abnormal_etimes)
            label_objects_array.append(labeling_objects(clip,stime,etime,label))
            
        if ret==False:
            break

        #converting frame into gray and resizing 
        gray2 = cv.cvtColor(frame2,cv.COLOR_BGR2GRAY)
        next = cv.resize(gray2,(int(width/10),int(height/10)),interpolation = cv.INTER_AREA)

        #calculating optical flow giving two consecutive frames as input
        flow = cv.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0)

        # appending to the flow array
        flow_array.append(flow)

        #changing current frame as previous frame
        prvs = next
    if len(flow_array)!= 0:
        flow_array_array.append(flow_array)
        clip+=1
        secs = secs+1
        stime = timedelta(seconds = secs-1)
        etime = timedelta(seconds = secs)
        label = compare(stime,etime,abnormal_stimes,abnormal_etimes)
        label_objects_array.append(labeling_objects(clip,stime,etime,label))
    flow_array_length = len(flow_array_array)
    print("Total no of clips",flow_array_length)
    print("total frames",cnt)

    mag_arr_all_clips=[]
    for j in range(flow_array_length):
        mag_list=[]
        flow_array=flow_array_array[j]
        for i in range(len(flow_array)):
            mag_list.append(cv.cartToPolar(flow_array[i][...,0],flow_array[i][...,1])[0])
        mag_arr = np.asarray(mag_list)
        mag_arr_all_clips.append(mag_arr)
    mag_arr_all_clips = np.asarray(mag_arr_all_clips)
    return mag_arr_all_clips,label_objects_array

In [6]:
#Function for distribution formation from the mag array of a clip
def func_distribution_formation(arr):
    dist_arr =[] 
    height,width=arr[0].shape
    for idx in range(int(height)):
        for j in range(int(width)):
            dist=[]
            for i in range(len(arr)):
                dist.append(arr[i][idx,j])
            dist=np.asarray(dist)
            dist_arr.append(dist)
    dist_arr=np.asarray(dist_arr)
    return dist_arr

In [7]:
class data_objects:
    def __init__(self,clip_no,stime,etime,weights_data,label):
        self.clip_no = clip_no
        self.stime = stime
        self.etime = etime
        self.weights_data = weights_data
        self.label = label

In [8]:
"""
#Function to run video input,distribution formation and input to GMM and get updated weights as output
def run(path,ann_file_path):
    mag_arr_all_clips,loa = video_input(path,ann_file_path)
    full_dist=[]
    for index in range(len(mag_arr_all_clips)):
        dist_arr = func_distribution_formation(mag_arr_all_clips[index])
        full_dist.append(dist_arr)
    return full_dist
"""

'\n#Function to run video input,distribution formation and input to GMM and get updated weights as output\ndef run(path,ann_file_path):\n    mag_arr_all_clips,loa = video_input(path,ann_file_path)\n    full_dist=[]\n    for index in range(len(mag_arr_all_clips)):\n        dist_arr = func_distribution_formation(mag_arr_all_clips[index])\n        full_dist.append(dist_arr)\n    return full_dist\n'

In [9]:
class MMGibbsSampling:
    def __init__(self,data,n_clusters,weights,means,alpha=1.0):
        self.n_clusters = n_clusters
        self.alpha = alpha
        self.data = data
        self.cluster_ids = range(n_clusters)
        self.cluster_variance = 4.0
        self.hyper_mean = 0
        self.hyper_variance = 1
        self.pi = weights
        self.cluster_means = means
        self.suffstats = [None for i in range(n_clusters)]
        self.assignment = [random.choice(self.cluster_ids) for _ in data]
        self.update_suffstats()
        
    def update_suffstats(self):
        SuffStat = namedtuple('SuffStat', 'theta N')
        for cluster_id, N in Counter(self.assignment).items():
            points_in_cluster = [x 
                for x, cid in zip(self.data,self.assignment)
                if cid == cluster_id
            ]
            mean = np.array(points_in_cluster).mean()
            self.suffstats[cluster_id] = SuffStat(mean, N)
            
    def log_assignment_score(self,data_id, cluster_id):
        x = self.data[data_id]
        theta = self.cluster_means[cluster_id]
        var = self.cluster_variance
        log_pi = np.log(self.pi[cluster_id])
        return log_pi + stats.norm.logpdf(x, theta, var)
    
    def assigment_probs(self,data_id):
        scores = [self.log_assignment_score(data_id,cid) for cid in self.cluster_ids]
        scores = np.exp(np.array(scores))
        return scores / scores.sum()


    def sample_assignment(self,data_id):
        p = self.assigment_probs(data_id)
        return np.random.choice(self.cluster_ids, p=p)


    def update_assignment(self):
        for data_id, x in enumerate(self.data):
            self.assignment[data_id] = self.sample_assignment(data_id)
        self.update_suffstats()
        
    def sample_mixture_weights(self):
        ss = self.suffstats
        alpha = [ss[cid].N + self.alpha / self.n_clusters for cid in self.cluster_ids]
        return stats.dirichlet(alpha).rvs(size=1).flatten()

    def update_mixture_weights(self):
        self.pi = self.sample_mixture_weights()
        
    def sample_cluster_mean(self,cluster_id):
        cluster_var = self.cluster_variance
        hp_mean = self.hyper_mean
        hp_var = self.hyper_variance
        ss = self.suffstats[cluster_id]

        numerator = hp_mean / hp_var + ss.theta * ss.N / cluster_var
        denominator = (1.0 / hp_var + ss.N / cluster_var)
        posterior_mu = numerator / denominator
        posterior_var = 1.0 / denominator
        return stats.norm(posterior_mu, np.sqrt(posterior_var)).rvs()
    
    def update_cluster_means(self):
        self.cluster_means = [self.sample_cluster_mean(cid)
                                    for cid in self.cluster_ids]

In [10]:
#Function to run video input,distribution formation and input to GMM and get updated weights as output
def run(path,ann_file_path):
    mag_arr_all_clips,loa = video_input(path,ann_file_path)
    
    data_object_array=[]
    for index in range(len(mag_arr_all_clips)):
        print("ith clip ",index)
        updated_weights=[]
        dist_arr = func_distribution_formation(mag_arr_all_clips[index])
        for i in range(dist_arr.shape[0]):
            k=4
            initial_weights = [(1-0.1)/k for i in range(k)]
            initial_weights = np.append(initial_weights,0.1) # adding extra weight element for background purpose
            means = np.random.choice(dist_arr[i].flatten(),k)
            means = np.append(means,0) # adding extra mean element for background purpose
            for x in range(3):
                gibbs_object = MMGibbsSampling(dist_arr[i],k+1,initial_weights.copy(),means.copy())
                gibbs_object.update_assignment()
                gibbs_object.update_mixture_weights() 
                gibbs_object.update_cluster_means()
            updated_weights.append(gibbs_object.pi)
        data_object_array.append(data_objects(loa[index].clip_no,loa[index].stime,loa[index].etime,np.asarray(updated_weights).flatten(),loa[index].label))        
    return data_object_array

In [None]:
#running our entire code.
ann_file_path = "E:\\Study\\Sem Project\\Data\\abnormal_times.txt"
path = "E:\\Study\\Sem Project\\Data\\traffic-junction.avi"
total_data_objects = run(path,ann_file_path) #path to video and path to annotations

Total no of clips 1327
total frames 66324
ith clip  0
ith clip  1
ith clip  2
ith clip  3
ith clip  4
ith clip  5
ith clip  6
ith clip  7
ith clip  8
ith clip  9
ith clip  10
ith clip  11
ith clip  12
ith clip  13
ith clip  14
ith clip  15
ith clip  16
ith clip  17
ith clip  18
ith clip  19
ith clip  20
ith clip  21
ith clip  22
ith clip  23
ith clip  24
ith clip  25
ith clip  26
ith clip  27
ith clip  28
ith clip  29
ith clip  30
ith clip  31
ith clip  32
ith clip  33
ith clip  34
ith clip  35
ith clip  36
ith clip  37
ith clip  38
ith clip  39
ith clip  40
ith clip  41
ith clip  42
ith clip  43
ith clip  44
ith clip  45
ith clip  46
ith clip  47
ith clip  48
ith clip  49
ith clip  50
ith clip  51
ith clip  52
ith clip  53
ith clip  54
ith clip  55
ith clip  56
ith clip  57
ith clip  58
ith clip  59
ith clip  60
ith clip  61
ith clip  62
ith clip  63
ith clip  64
ith clip  65
ith clip  66
ith clip  67
ith clip  68
ith clip  69
ith clip  70
ith clip  71
ith clip  72
ith clip  73
ith cl

ith clip  591
ith clip  592
ith clip  593
ith clip  594
ith clip  595
ith clip  596
ith clip  597
ith clip  598
ith clip  599
ith clip  600
ith clip  601
ith clip  602
ith clip  603
ith clip  604
ith clip  605
ith clip  606
ith clip  607
ith clip  608
ith clip  609
ith clip  610
ith clip  611
ith clip  612
ith clip  613
ith clip  614
ith clip  615
ith clip  616
ith clip  617
ith clip  618
ith clip  619
ith clip  620
ith clip  621
ith clip  622
ith clip  623
ith clip  624
ith clip  625
ith clip  626
ith clip  627
ith clip  628
ith clip  629
ith clip  630
ith clip  631
ith clip  632
ith clip  633
ith clip  634
ith clip  635
ith clip  636
ith clip  637
ith clip  638
ith clip  639
ith clip  640
ith clip  641
ith clip  642
ith clip  643
ith clip  644
ith clip  645
ith clip  646
ith clip  647
ith clip  648
ith clip  649
ith clip  650
ith clip  651
ith clip  652
ith clip  653
ith clip  654
ith clip  655
ith clip  656
ith clip  657
ith clip  658
ith clip  659
ith clip  660
ith clip  661
ith cl

In [None]:
#printing objects value.
print("total no of objects",len(total_data_objects))
for i in range(5):
    print("clip no=",total_data_objects[i].clip_no)
    print("start time=",total_data_objects[i].stime)
    print("end time=",total_data_objects[i].etime)
    print("weights =",total_data_objects[i].weights_data)
    print("label = ",total_data_objects[i].label,"\n")

In [None]:
import csv
#writing into the csv file
with open("E:\\Study\\Sem Project\\Data\\Data_exm.csv",'w',newline='') as file:
    writer = csv.writer(file)
    for i in range(len(total_data_objects)):
        llist=[]
        llist.append(total_data_objects[i].clip_no)
        llist.append(total_data_objects[i].stime)
        llist.append(total_data_objects[i].etime)
        llist.append(total_data_objects[i].label)
        for j in range(len(total_data_objects[i].weights_data)):
            llist.append(total_data_objects[i].weights_data[j])
        writer.writerow(llist)

In [None]:
"""
def plot_clusters(data,assignment):
    gby = pd.DataFrame({
            'data': data, 
            'assignment': assignment}
        ).groupby(by='assignment')['data']
    hist_data = [gby.get_group(cid).tolist() 
                 for cid in gby.groups.keys()]
    plt.hist(hist_data,bins=20,histtype='stepfilled', alpha=.5 )
"""