# Logging

In [1]:
import logging
import sys
import time

# create a logger and set level to debug
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# set the logger to output to both stdout and a file
output_file_handler = logging.FileHandler("ml_run_%d.log"%time.time())
stdout_handler = logging.StreamHandler(sys.stdout)

# for the file, include the timestamp as well
formatter = logging.Formatter('%(asctime)s - %(message)s')
output_file_handler.setFormatter(formatter)

# add both stdout and file handlers to the logger
logger.addHandler(output_file_handler)
logger.addHandler(stdout_handler)

In [2]:
logger.debug("SPC ML Run beginning of execution.")

SPC ML Run beginning of execution.


# Status File Handling

In [3]:
spici_status_file = 'C:/Users/tonma/My Drive/Scripps/Jaffe Lab/Zooplankton ML/spici/spici_status.txt'
ml_status_file = 'C:/Users/tonma/My Drive/Scripps/Jaffe Lab/Zooplankton ML/ml_status.txt'
execution_file = 'C:/Users/tonma/My Drive/Scripps/Jaffe Lab/Zooplankton ML/spici/execution_status.txt'

In [4]:
def check_status(fname):
    f = open(fname, "r")
    text = f.read()
    f.close()
    return text == '1'

def update_status(fname,stat):
    f = open(fname, "w")
    f.write(stat)
    f.close()

def ml_status(stat):
    update_status(ml_status_file,stat)

In [5]:
def check_spici_status():
    n = 0 # counter
    while(check_status(spici_status_file)):
        time.sleep(1)
        if n%60==0: # log to file every minute
            logger.debug("Waiting for SPICI script: %d mins."%(n/60))
        n = n+1

def check_execution():
    return check_status(execution_file)

# ML Implementation

## Setup

In [6]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from torch.autograd import Variable
from PIL import Image
import glob

Note: NumExpr detected 32 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
NumExpr defaulting to 8 threads.
matplotlib data path: C:\Users\tonma\anaconda3\envs\spc_ml\lib\site-packages\matplotlib\mpl-data
CONFIGDIR=C:\Users\tonma\.matplotlib
interactive is False
platform is win32


CACHEDIR=C:\Users\tonma\.matplotlib
Using fontManager instance from C:\Users\tonma\.matplotlib\fontlist-v330.json
Loaded backend module://matplotlib_inline.backend_inline version unknown.
Loaded backend module://matplotlib_inline.backend_inline version unknown.


In [7]:
if torch.cuda.is_available():
    print('Default GPU Device: {}'.format(torch.cuda.get_device_name(0)))
else:
    print("No GPU is detected.")

Default GPU Device: Quadro RTX 4000


In [8]:
model_filename = 'models/pt_20220222_13C.pth'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=torch.load(model_filename)
n_class = 13

In [9]:
val_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

def predict_image(image,transform):
    '''image = PIL image,
    transform = Pytorch transform.'''
    image_tensor = transform(image).float()
    image_tensor = image_tensor.unsqueeze_(0)
    input = Variable(image_tensor)
    input = input.to(device)
    output = model(input)
    output_cpu = output.data.cpu()
    index = output_cpu.numpy().argmax()
    probs = torch.nn.functional.softmax(output_cpu, dim=1)
    return index, np.max(probs.numpy())

In [10]:
def recordData(fname,dateStr,dataArray):
    # convert numpy array to comma-delimited string
    x_arrstr = np.char.mod('%d', dataArray)
    x_str = ",".join(x_arrstr)
    # combine with date
    dataStr = dateStr +','+ x_str +'\n'
    f = open(fname, "a")
    f.write(dataStr)
    f.close()

In [11]:
def inferenceFolder(folder,n_class):
    image_list = glob.glob(folder+'*jpg')
    image_num = len(image_list)
    
    corrupt_n = 0 # number of corrupted files
    
    # initialize output array
    result = np.zeros(n_class,dtype='uint32')
    
    # handle the case where there is no image
    # simply return an array of zero
    if image_num==0:
        logger.debug("Found no images on this date. Skip inferencing.")
        return result
    
    # inferencing
    start = time.time()
    logger.debug("Inferencing %d images."%image_num)
    logger.debug("####################")
#     for i in range(10000):
    for i in range(len(image_list)):
        if i%10000==0:
            print("")
            print("Image %d/%d"%(i,image_num))
        if i%500==0:
            print("#",end="")
        try:
            im = Image.open(image_list[i])
            ind,_ = predict_image(im,val_transform)
            # add to the result array
            result[ind] = result[ind]+1
        except:
            corrupt_n = corrupt_n+1
            


    end = time.time()

    print("")
    logger.debug("Finished inferencing %d images in %.2f s."%(i+1,end-start))
    logger.debug("Average inferencing time = %.3f images/s"%((i+1)/(end-start)))
    if corrupt_n > 0:
        logger.debug("Found %d corrupted image files."%(corrupt_n))
    return result

## Execution

In [12]:
data_folder = 'spici/images/'
result_file = 'results/spc_ml_result_%d.csv'%time.time()

# spc_ml runs as long as execution still true
while(check_execution()):
    
    # first check spici flag
    check_spici_status()
    # read current date
    f = open('spici/spici_date.txt', "r")
    text = f.read()
    f.close()
    spici_date = text.split(',')[0]
    
    # start running scipi when ML script is inactive
    ml_status("1") # set flag to 1
    logger.debug("Start inferencing images from %s."%spici_date)
    
    result = inferenceFolder(data_folder,n_class)
    
    recordData(result_file,text,result)
    
    ml_status("0") # set flag back to 0 after done
    time.sleep(15) # wait a minute for SPICI script to register the new flag

Waiting for SPICI script: 0 mins.
Start inferencing images from 2018-06-30 00:00:00.
Inferencing 5446 images.
####################

Image 0/5446
###########
Finished inferencing 5446 images in 57.46 s.
Average inferencing time = 94.781 images/s
Start inferencing images from 2018-06-30 03:00:00.
Inferencing 2334 images.
####################

Image 0/2334
#####
Finished inferencing 2334 images in 22.00 s.
Average inferencing time = 106.103 images/s
Start inferencing images from 2018-06-30 06:00:00.
Inferencing 2246 images.
####################

Image 0/2246
#####
Finished inferencing 2246 images in 21.36 s.
Average inferencing time = 105.139 images/s
Start inferencing images from 2018-06-30 09:00:00.
Inferencing 4908 images.
####################

Image 0/4908
##########
Finished inferencing 4908 images in 48.03 s.
Average inferencing time = 102.186 images/s
Start inferencing images from 2018-06-30 12:00:00.
Inferencing 3861 images.
####################

Image 0/3861
########
Finished inf