In [1]:
import struct
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2
import pandas as pd
import csv
import os
from os import listdir
from os.path import isfile, join
from pathlib import Path
import math
from collections import Counter
import time

In [2]:
def find(name, path):
    for root, dirs, files in os.walk(path):
        if name in files:
            return os.path.join(root, name)
        
def find2(name, path):
    for root, dirs, files in os.walk(path):
        if name in files:
            return root, name

# create readable structure from txt file
def create_frame_info(fi_name):
    with open(fi_name) as f:
        frame_info = f.readlines()
    return frame_info

# create a list of ids present in a readable structure
def get_ids_from_txt(frame_info):
    ids = []
    for line in frame_info:
        a, rest = line.split(",", 1)
        ids.append(int(a))
    return(ids)

#get set difference
def get_difference(a, b):
    return list(set(a).difference(set(b)))

def remove_rows(dframe, idlist):
    dframe = dframe.set_index("detection_id")
    dframe = dframe.drop(idlist, axis=0)
    return dframe

# get all points from a contour and store as a list of tuples
def get_contour_points(contour):
    cont = contour[2:-1]
    tuples = cont.split(";")
    l = []
    for item in tuples:
        a, b = item.split(" ")
        l.append((int(a), int(b)))
    return l

def process_image(img, x, y, cont_points):
    mask = np.zeros((100,100), dtype=int)
    xdiff = x - 10
    ydiff = y - 10
    xs = []
    ys = []
    for (a,b) in cont_points:
        v = a - xdiff
        u = b - ydiff
        xs.append(u)
        ys.append(v)
        img[u,v] = (0,0,255)
        mask[u,v] = 1
    return img, mask, xs, ys

def normalizedRGB(img):
    newimg=np.zeros((img.shape))
    for i, row in enumerate(img):
        for j, col in enumerate(row):
            b,g,r = col
            s = b+g+r+0.0000000001
            newimg[i,j] = (b/s), (g/s), (r/s)
    return newimg        

def com(img, startpoint, mask):
    (xavg, yavg) = startpoint
    
    nextleft = (xavg, yavg-1)
    nextright = (xavg, yavg+1)
    nextup = (xavg+1, yavg)
    nextdown = (xavg-1, yavg)
    
    if (mask[nextleft]==0):
        mask[nextleft] = 1
        com(img, nextleft, mask)
    if (mask[nextright]==0):
        mask[nextright] = 1
        com(img, nextright, mask)
    if (mask[nextup]==0):
        mask[nextup] = 1
        com(img, nextup, mask)
    if (mask[nextdown]==0):
        mask[nextdown] = 1
        com(img, nextdown, mask)

In [3]:
def countlines(file):
    with open(file) as f:
        summ = sum(1 for line in f)
    return summ

In [4]:
mypath = "/home/sam/dissertation/output/hsrgc_unknown"
#videonames: 604d101362fce8d83d99ec1cfebab8bb#201012120800.csv
#csvnames: data_siteHoBiHu_camera1_201008230800.csv
videonames = [f for f in listdir(mypath) if isfile(join(mypath, f))]
#total unknown detections: 735794

In [9]:
path_to_csvs = "/home/sam/dissertation/output/hsrgc_unknown"
videos_dir = "/media/sam/My Passport/FISH4KNOWLEDGE/f4k_extracted_image/output/summaries/"
frame_info_dir = "/media/sam/My Passport/FISH4KNOWLEDGE/f4k_extracted_image/output/summaries/"
npy_dir = "/media/sam/My Passport/FISH4KNOWLEDGE/final/"
path_to_orig_csvs = "/media/sam/Sam\'s Drive/SUBSET/SQL/ALLYEARS/"

In [14]:
print("Filename\t\t\t\t\tStatus\t\t% Total\tThis df".format())
total = 0
for csvname in csvnames:
    csv_path = path_to_orig_csvs + csvname

    df = pd.read_csv(csv_path)
    videoID = df.iloc[0][4][1:]

    video_joined_name = videoID + ".csv"
    if (video_joined_name in videonames):
        video_name_fix = "summary_" + videoID + ".avi"
        npy_name_fix = videoID + ".RESULT.npy"
        frame_info_fix = "frame_info_" + videoID + ".txt"

        # get paths to video, frame info and npy
        videopath, videoname = find2(video_name_fix, videos_dir)
        video_path = videopath + "/" + videoname
        frame_info_path = find(frame_info_fix, frame_info_dir)
        npy_path = find(npy_name_fix, npy_dir)

        # get ids from frame info file and from df, remove rows from df that dont exist in frame info
        frame_info_file = create_frame_info(frame_info_path)
        frame_info_ids = get_ids_from_txt(frame_info_file)
        df_ids = df["detection_id"].tolist()
        ids_to_remove = get_difference(df_ids, frame_info_ids)
        clean_df = remove_rows(df, ids_to_remove)

        # load npy file, this is used together with video to produce images
        npy = np.load(npy_path)
        vidcap = cv2.VideoCapture(video_path)
        success,image = vidcap.read()

        count = 0
        saved_imgs = 0
        while success:
                if (npy[count] and df.iloc[count][' specie_id']==0):
                    index = str(clean_df.index[count])
                    imgpath = "output/new_img/"+videoID+"#"+index+".jpg"
                    cv2.imwrite(imgpath, image)
                    total += 1
                    saved_imgs += 1
                count+= 1
                success, image = vidcap.read()

    print("{}\tDone\t\t{:2.2f}% left\t{}".format(csvname, total, saved_imgs))

Filename					Status		% Total	This df
data_siteNPP-3_camera3_201302130800.csv	Done		4241.00% left	4241
data_siteNPP-3_camera3_201302140800.csv	Done		5180.00% left	939
data_siteNPP-3_camera3_201302150800.csv	Done		5180.00% left	939
data_siteNPP-3_camera3_201302160800.csv	Done		5180.00% left	939
data_siteNPP-3_camera3_201302180800.csv	Done		5948.00% left	768
data_siteNPP-3_camera3_201302200800.csv	Done		5948.00% left	768
data_siteNPP-3_camera4_201302050800.csv	Done		5948.00% left	768
data_siteNPP-3_camera4_201302060800.csv	Done		6166.00% left	218
data_siteNPP-3_camera4_201302070800.csv	Done		6166.00% left	218
data_siteNPP-3_camera4_201302080800.csv	Done		6166.00% left	218
data_siteNPP-3_camera4_201302090800.csv	Done		6166.00% left	218
data_siteNPP-3_camera4_201302100800.csv	Done		6333.00% left	167
data_siteNPP-3_camera4_201302110800.csv	Done		6463.00% left	130
data_siteNPP-3_camera4_201302120800.csv	Done		6706.00% left	243
data_siteNPP-3_camera4_201302130800.csv	Done		7394.00% left	688
da

data_siteNPP-3_camera1_201206280800.csv	Done		16633.00% left	92
data_siteNPP-3_camera1_201206290800.csv	Done		16661.00% left	28
data_siteNPP-3_camera1_201207010800.csv	Done		16661.00% left	28
data_siteNPP-3_camera1_201207030800.csv	Done		16661.00% left	28
data_siteNPP-3_camera1_201207060800.csv	Done		16688.00% left	27
data_siteNPP-3_camera1_201207070800.csv	Done		16688.00% left	27
data_siteNPP-3_camera1_201207090800.csv	Done		16735.00% left	47
data_siteNPP-3_camera1_201207110800.csv	Done		16735.00% left	47
data_siteNPP-3_camera1_201207120800.csv	Done		17182.00% left	447
data_siteNPP-3_camera1_200910040800.csv	Done		17205.00% left	23
data_siteNPP-3_camera1_201001190800.csv	Done		17205.00% left	23
data_siteNPP-3_camera1_201002050800.csv	Done		17205.00% left	23
data_siteNPP-3_camera1_201002180800.csv	Done		17205.00% left	23
data_siteNPP-3_camera1_201003110800.csv	Done		17205.00% left	23
data_siteNPP-3_camera1_201004030800.csv	Done		17205.00% left	23
data_siteNPP-3_camera1_201006040800.csv

data_siteHoBiHu_camera3_201012010800.csv	Done		30420.00% left	20
data_siteHoBiHu_camera3_201012020800.csv	Done		30427.00% left	7
data_siteHoBiHu_camera2_201103270800.csv	Done		30907.00% left	480
data_siteHoBiHu_camera2_201103280800.csv	Done		30907.00% left	480
data_siteHoBiHu_camera2_201103290800.csv	Done		31070.00% left	163
data_siteHoBiHu_camera2_201103300800.csv	Done		31251.00% left	181
data_siteHoBiHu_camera2_201103310800.csv	Done		31251.00% left	181
data_siteHoBiHu_camera2_201104010800.csv	Done		31684.00% left	433
data_siteHoBiHu_camera2_201104020800.csv	Done		31684.00% left	433
data_siteHoBiHu_camera2_201104030800.csv	Done		31684.00% left	433
data_siteHoBiHu_camera2_201104040800.csv	Done		31684.00% left	433
data_siteHoBiHu_camera2_201104060800.csv	Done		31730.00% left	46
data_siteHoBiHu_camera2_201104070800.csv	Done		32125.00% left	395
data_siteHoBiHu_camera2_201104080800.csv	Done		32125.00% left	395
data_siteHoBiHu_camera2_201104090800.csv	Done		32125.00% left	395
data_siteHoBiH

data_siteNPP-3_camera2_201107300800.csv	Done		48189.00% left	366
data_siteNPP-3_camera2_201107310800.csv	Done		48279.00% left	90
data_siteNPP-3_camera2_201108010800.csv	Done		48279.00% left	90
data_siteNPP-3_camera2_201108020800.csv	Done		48279.00% left	90
data_siteNPP-3_camera2_201108030800.csv	Done		48279.00% left	90
data_siteNPP-3_camera1_201004070800.csv	Done		48279.00% left	90
data_siteNPP-3_camera1_201004080800.csv	Done		48279.00% left	90
data_siteNPP-3_camera1_201004090800.csv	Done		48279.00% left	90
data_siteNPP-3_camera4_201209120800.csv	Done		48279.00% left	90
data_siteNPP-3_camera4_201209130800.csv	Done		49260.00% left	981
data_siteNPP-3_camera4_201209150800.csv	Done		49260.00% left	981
data_siteNPP-3_camera4_201209160800.csv	Done		49260.00% left	981
data_siteNPP-3_camera4_201209180800.csv	Done		49260.00% left	981
data_siteNPP-3_camera4_201209190800.csv	Done		50027.00% left	767
data_siteNPP-3_camera4_201209200800.csv	Done		52399.00% left	2372
data_siteNPP-3_camera4_201209210

data_siteNPP-3_camera4_201303200800.csv	Done		68104.00% left	67
data_siteNPP-3_camera4_201303210800.csv	Done		68648.00% left	544
data_siteNPP-3_camera4_201303220800.csv	Done		68648.00% left	544
data_siteNPP-3_camera4_201303230800.csv	Done		69545.00% left	897
data_siteNPP-3_camera4_201303240800.csv	Done		70706.00% left	1161
data_siteNPP-3_camera4_201303250800.csv	Done		71551.00% left	845
data_siteNPP-3_camera4_201303280800.csv	Done		71551.00% left	845
data_siteNPP-3_camera4_201303290800.csv	Done		71811.00% left	260
data_siteNPP-3_camera4_201303310800.csv	Done		71811.00% left	260
data_siteNPP-3_camera4_201304010800.csv	Done		72207.00% left	396
data_siteNPP-3_camera4_201304020800.csv	Done		72574.00% left	367
data_siteNPP-3_camera4_201304030800.csv	Done		72574.00% left	367
data_siteNPP-3_camera4_201304050800.csv	Done		73198.00% left	624
data_siteNPP-3_camera4_201304060800.csv	Done		74072.00% left	874
data_siteNPP-3_camera1_201109300800.csv	Done		74072.00% left	874
data_siteNPP-3_camera1_20

data_siteNPP-3_camera3_201110020800.csv	Done		102230.00% left	1136
data_siteNPP-3_camera3_201110030800.csv	Done		102230.00% left	1136
data_siteNPP-3_camera3_201110040800.csv	Done		102525.00% left	295
data_siteNPP-3_camera3_201110060800.csv	Done		102525.00% left	295
data_siteNPP-3_camera3_201110070800.csv	Done		102525.00% left	295
data_siteNPP-3_camera3_201110080800.csv	Done		102525.00% left	295
data_siteNPP-3_camera3_201110090800.csv	Done		102525.00% left	295
data_siteNPP-3_camera3_201110120800.csv	Done		103616.00% left	1091
data_siteNPP-3_camera4_201101220800.csv	Done		103616.00% left	1091
data_siteNPP-3_camera4_201102140800.csv	Done		103616.00% left	1091
data_siteNPP-3_camera4_201103050800.csv	Done		103616.00% left	1091
data_siteNPP-3_camera4_201103200800.csv	Done		103616.00% left	1091
data_siteNPP-3_camera4_201104140800.csv	Done		103616.00% left	1091
data_siteNPP-3_camera4_201105260800.csv	Done		103616.00% left	1091
data_siteNPP-3_camera4_201106110800.csv	Done		103616.00% left	1091


data_siteNPP-3_camera3_201212090800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201212110800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201212140800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201212160800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201212180800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102070800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102080800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102090800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102100800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102110800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102120800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102140800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102150800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102160800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_201102170800.csv	Done		124648.00% left	89
data_siteNPP-3_camera3_20

data_siteLanYu_camera1_201105300800.csv	Done		137657.00% left	2127
data_siteLanYu_camera1_201107140800.csv	Done		137657.00% left	2127
data_siteLanYu_camera1_201107150800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107180800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107190800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107200800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107210800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107220800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107230800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107240800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107250800.csv	Done		137657.00% left	2127
data_siteNPP-3_camera4_201107260800.csv	Done		137657.00% left	2127


ParserError: Error tokenizing data. C error: Expected 15 fields in line 41782, saw 29


In [12]:
csvnames.index(csvname)

640

In [13]:
csvnames = csvnames[640:]