In [2]:
import struct
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2
import pandas as pd
import csv
import os
from os import listdir
from os.path import isfile, join
from pathlib import Path

In [3]:
def find(name, path):
    for root, dirs, files in os.walk(path):
        if name in files:
            return os.path.join(root, name)
        
def find2(name, path):
    for root, dirs, files in os.walk(path):
        if name in files:
            return root, name

# create readable structure from txt file
def create_frame_info(fi_name):
    with open(fi_name) as f:
        frame_info = f.readlines()
    return frame_info

# create a list of ids present in a readable structure
def get_ids_from_txt(frame_info):
    ids = []
    for line in frame_info:
        a, rest = line.split(",", 1)
        ids.append(int(a))
    return(ids)

#get set difference
def get_difference(a, b):
    return list(set(a).difference(set(b)))

def remove_rows(dframe, idlist):
    dframe = dframe.set_index("detection_id")
    dframe = dframe.drop(idlist, axis=0)
    return dframe

# get all points from a contour and store as a list of tuples
def get_contour_points(contour):
    cont = contour[2:-1]
    tuples = cont.split(";")
    l = []
    for item in tuples:
        a, b = item.split(" ")
        l.append((int(a), int(b)))
    return l

def process_image(img, x, y, cont_points):
    mask = np.zeros((100,100))
    xdiff = x - 10
    ydiff = y - 10
    xs = []
    ys = []
    for (a,b) in cont_points:
        v = a - xdiff
        u = b - ydiff
        xs.append(u)
        ys.append(v)
        img[u,v] = (0,0,255)
        mask[u,v] = 1
    return img, mask, xs, ys

def normalizedRGB(img):
    newimg=np.zeros((img.shape))
    for i, row in enumerate(img):
        for j, col in enumerate(row):
            b,g,r = col
            s = b+g+r
            newimg[i,j] = (b//s), (g//s), (r//s)
    return newimg        

def com(img, startpoint, mask):
    (xavg, yavg) = startpoint
    
    nextleft = (xavg, yavg-1)
    nextright = (xavg, yavg+1)
    nextup = (xavg+1, yavg)
    nextdown = (xavg-1, yavg)
    
    if (mask[nextleft]==0):
        mask[nextleft] = 1
        com(img, nextleft, mask)
    if (mask[nextright]==0):
        mask[nextright] = 1
        com(img, nextright, mask)
    if (mask[nextup]==0):
        mask[nextup] = 1
        com(img, nextup, mask)
    if (mask[nextdown]==0):
        mask[nextdown] = 1
        com(img, nextdown, mask)

In [4]:
with open('csvnames.txt', 'r') as f:
    csvfiles = f.readlines()

In [5]:
# static paths to csvs, videos, frame infos, and npys
path_to_csvs = "/media/sam/Sam\'s Drive/SUBSET/SQL/ALLYEARS/"
videos_dir = "/media/sam/My Passport/FISH4KNOWLEDGE/f4k_extracted_image/output/summaries/"
frame_info_dir = "/media/sam/My Passport/FISH4KNOWLEDGE/f4k_extracted_image/output/summaries/"
npy_dir = "/media/sam/My Passport/FISH4KNOWLEDGE/final/"

In [None]:
print("Filename\t\t\t\t\tStatus\t\t% Done\tFrames saved".format())
total = 4521
done = 0
index_error = []
others = []
saved_imgs = 0
for csvname in csvfiles:
    try:
        # format name to get full path to csv
        csv_path = path_to_csvs + csvname[:-1]

        # create initial dataframe
        df = pd.read_csv(csv_path)
        # get video ID from current df
        videoID = df.iloc[0][4][1:]
        # get names for files
        video_name_fix = "summary_" + videoID + ".avi"
        npy_name_fix = videoID + ".RESULT.npy"
        frame_info_fix = "frame_info_" + videoID + ".txt"

        # get paths to video, frame info and npy
        videopath, videoname = find2(video_name_fix, videos_dir)
        video_path = videopath + "/" + videoname
        frame_info_path = find(frame_info_fix, frame_info_dir)
        npy_path = find(npy_name_fix, npy_dir)

        # get ids from frame info file and from df, remove rows from df that dont exist in frame info
        frame_info_file = create_frame_info(frame_info_path)
        frame_info_ids = get_ids_from_txt(frame_info_file)
        df_ids = df["detection_id"].tolist()
        ids_to_remove = get_difference(df_ids, frame_info_ids)
        clean_df = remove_rows(df, ids_to_remove)

        # load npy file, this is used together with video to produce images
        npy = np.load(npy_path)
        vidcap = cv2.VideoCapture(video_path)
        success,image = vidcap.read()
        # counts every iteration
        count = 0
        # initialize df where we will save stats
        stats = pd.DataFrame(columns=["B", "G", "R", "nB", "nG", "nR"])
        stats.index.name = "detection_id"

        while success:
            if (npy[count]):
                index = str(clean_df.index[count])
                # save original image
                imgpath = "output/img/"+videoID+"#"+index+".jpg"
                contour = clean_df.iloc[count][13]
                x = clean_df.iloc[count][5]
                y = clean_df.iloc[count][6]
                contour_points = get_contour_points(contour)
                cont_img, mask, xs, ys = process_image(image, x, y, contour_points)
                # save image with contour
                contpath = "output/a/"+videoID+"#"+index+".jpg"
        #         plt.imshow(cont_img)
        #         plt.show()


                xavg = int(np.mean(xs))
                yavg = int(np.mean(ys))
                # set coordinates for CoM point
                startpoint = (xavg, yavg)

                # compute CoM-based shape
                try:
                    com(cont_img, startpoint, mask)
                    cv2.imwrite(imgpath, image)
                    cv2.imwrite(contpath, cont_img)
                except IndexError:
                    print("{}\tIndexE\t\t{:2.2f}% left\t{}".format(csvname[:-1], percent, saved_imgs))
                    count+= 1
                    success, image = vidcap.read()
                    continue
                # extract specific channels
                channel0 = image[:,:,0]
                channel1 = image[:,:,1]
                channel2 = image[:,:,2]
                # compute averages
                c0avg = np.mean(mask*channel0)
                c1avg = np.mean(mask*channel1)
                c2avg = np.mean(mask*channel2)
                # compute normalized image
                normRGB = normalizedRGB(image)
                # extract normalized RGB channels
                normc0 = normRGB[:,:,0]
                normc1 = normRGB[:,:,1]
                normc2 = normRGB[:,:,2]
                # compute averages
                normc0avg = np.mean(mask*normc0)
                normc1avg = np.mean(mask*normc1)
                normc2avg = np.mean(mask*normc2)
                # save to dataframe
                stats.loc[index] = [c0avg, c1avg,c2avg, normc0avg, normc1avg, normc2avg]
                saved_imgs+=1
            count+= 1
            success, image = vidcap.read()

        csvpath = "output/csv/"+videoID+".csv"
        stats.to_csv(csvpath)
        percent = 100 - (done / total)
        print("{}\tDone\t\t{:2.2f}% left\t{}".format(csvname[:-1], percent, saved_imgs))
    #     except IndexError:
    #         index_error.append(csvname)
    #         print("!!!: {} index error.\t{:2.2f}% left".format(csvname[:-1], percent))
    #         continue
    except:
        others.append(csvname)
        print("{}\tUnknownE\t\t{:2.2f}% left\t{}".format(csvname[:-1], percent, saved_imgs))

Filename					Status		% Done	Frames saved




data_siteHoBiHu_camera1_201008230800.csv	Done		100.00% left	3
data_siteHoBiHu_camera1_201008240800.csv	Done		100.00% left	9
IndexError in a65000c5e6f0e67bebb7c16a6d5f6269#201008260800, frame 28
data_siteHoBiHu_camera1_201008260800.csv	Done		100.00% left	52
IndexError in ecfd05e98afd954be60d87384896a2f4#201008270800, frame 19
IndexError in ecfd05e98afd954be60d87384896a2f4#201008270800, frame 20
IndexError in ecfd05e98afd954be60d87384896a2f4#201008270800, frame 21
IndexError in ecfd05e98afd954be60d87384896a2f4#201008270800, frame 22
IndexError in ecfd05e98afd954be60d87384896a2f4#201008270800, frame 27
IndexError in ecfd05e98afd954be60d87384896a2f4#201008270800, frame 35
data_siteHoBiHu_camera1_201008270800.csv	Done		100.00% left	83
data_siteHoBiHu_camera1_201008280800.csv	Done		100.00% left	107
data_siteHoBiHu_camera1_201008290800.csv	Done		100.00% left	228
data_siteHoBiHu_camera1_201009020800.csv	Done		100.00% left	251
IndexError in 4960e98115ad3ff5eb26f1ac989f92f8#201009030800, frame 2

In [2]:
print("Filename\t\t\t\t\tStatus\t\t% Done\tFrames saved".format())

Filename					Status		% Done	Frames saved


In [None]:
# format name to get full path to csv
csv_path = path_to_csvs + broken_file[:-1]

# create initial dataframe
df = pd.read_csv(csv_path)
# get video ID from current df
videoID = df.iloc[0][4][1:]
# get names for files
video_name_fix = "summary_" + videoID + ".avi"
npy_name_fix = videoID + ".RESULT.npy"
frame_info_fix = "frame_info_" + videoID + ".txt"

# get paths to video, frame info and npy
videopath, videoname = find2(video_name_fix, videos_dir)
video_path = videopath + "/" + videoname
frame_info_path = find(frame_info_fix, frame_info_dir)
npy_path = find(npy_name_fix, npy_dir)

# get ids from frame info file and from df, remove rows from df that dont exist in frame info
frame_info_file = create_frame_info(frame_info_path)
frame_info_ids = get_ids_from_txt(frame_info_file)
df_ids = df["detection_id"].tolist()
ids_to_remove = get_difference(df_ids, frame_info_ids)
clean_df = remove_rows(df, ids_to_remove)

# load npy file, this is used together with video to produce images
npy = np.load(npy_path)
vidcap = cv2.VideoCapture(video_path)
success,image = vidcap.read()
# counts every iteration
count = 0
# initialize df where we will save stats
stats = pd.DataFrame(columns=["B", "G", "R", "nB", "nG", "nR"])
stats.index.name = "detection_id"

while success:
    if (npy[count]):
        index = str(clean_df.index[count])
        # save original image
        imgpath = "output/img/"+videoID+"#"+index+".jpg"
        cv2.imwrite(imgpath, image)
        contour = clean_df.iloc[count][13]
        x = clean_df.iloc[count][5]
        y = clean_df.iloc[count][6]
        contour_points = get_contour_points(contour)
        cont_img, mask, xs, ys = process_image(image, x, y, contour_points)
        # save image with contour
        contpath = "output/a/"+videoID+"#"+index+".jpg"
#         plt.imshow(cont_img)
#         plt.show()


        xavg = int(np.mean(xs))
        yavg = int(np.mean(ys))
        # set coordinates for CoM point
        startpoint = (xavg, yavg)
        cont_img[xavg, yavg] = (255, 0, 0)
        
        
        cv2.imwrite(contpath, cont_img)
        # compute CoM-based shape
#         com(cont_img, startpoint, mask)

#         # extract specific channels
#         channel0 = image[:,:,0]
#         channel1 = image[:,:,1]
#         channel2 = image[:,:,2]
#         # compute averages
#         c0avg = np.mean(mask*channel0)
#         c1avg = np.mean(mask*channel1)
#         c2avg = np.mean(mask*channel2)
#         # compute normalized image
#         normRGB = normalizedRGB(image)
#         # extract normalized RGB channels
#         normc0 = normRGB[:,:,0]
#         normc1 = normRGB[:,:,1]
#         normc2 = normRGB[:,:,2]
#         # compute averages
#         normc0avg = np.mean(mask*normc0)
#         normc1avg = np.mean(mask*normc1)
#         normc2avg = np.mean(mask*normc2)
#         # save to dataframe
#         stats.loc[index] = [c0avg, c1avg,c2avg, normc0avg, normc1avg, normc2avg]

    count+= 1
    success, image = vidcap.read()

# csvpath = "output/csv/"+videoID+".csv"
# stats.to_csv(csvpath)
# percent = 100 - (done / total)
# print("{} done.\t\t\t{:2.2f}% left".format(csvname[:-1], percent))
# except IndexError:
# index_error.append(csvname)
# print("!!!: {} index error.\t{:2.2f}% left".format(csvname[:-1], percent))
# continue
# except:
# others.append(csvname)
# print("!!!: {} unknown reason.\t{:2.2f}% left".format(csvname[:-1], percent))

In [None]:
# idx 18

In [None]:
vidcap = cv2.VideoCapture(video_path)
success,image = vidcap.read()
count = 0
frames = 0
while success:
    if (npy[count]):
        frames+=1
        if(frames==19):
            plt.imshow(image)
            plt.show()
            cv2.imwrite("sad.jpg", image)
    count+=1
    success, image = vidcap.read()
print(frames)

In [None]:
df.loc[28]

In [None]:
c = df.loc[28][" contour"]
contour_points2 = get_contour_points(c)
cont_img, mask, xs, ys = process_image(image, x, y, contour_points)