In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import h5py
from IPython.display import display
import numpy as np
from os.path import join as pj
import pandas as pd
import random
import sys
import torch
from tqdm import tqdm
import visdom

# Logger
from IO.logger import Logger
# Data Augument
from dataset.classification.loader import create_validation_split, load_validation_data, create_train_data
# Model
from model.resnet.utils import define_weight
from model.resnet.predict import test_classification
# Evaluation
from evaluation.classification.evaluate import accuracy, confusion_matrix
# Statistics
from evaluation.classification.statistics import compute_each_size_df, compute_all_size_df
# Visualize
from evaluation.classification.visualize import create_confusion_matrix, plot_df_distrib_size

# Train Config

In [None]:
class args:
    # experiment_name
    experiment_name = "resnet101_b20_r45_lr1e-5_aaaaa"
    # data split
    train_ratio = 0.8 # unused parameters
    test_ratio = 0.2
    # paths
    all_data_path = "/home/tanida/workspace/Insect_Phenology_Detector/data/all_classification_data/classify_insect_std_resize_aquatic"
    model_root = pj("/home/tanida/workspace/Insect_Phenology_Detector/output_model/classification/ResNet101", experiment_name)
    figure_root = pj("/home/tanida/workspace/Insect_Phenology_Detector/figure/classification/ResNet101", experiment_name)
    # class names to visualize
    #labels =  ['Aquatic_insects', 'Other_insects']
    labels =  ['Diptera', 'Ephemeridae', 'Ephemeroptera', 
               'Lepidoptera', 'Plecoptera', 'Trichoptera']
    # train config
    model_name = "ResNet101" # choice ["ResNet18", "ResNet34", "ResNet50", "ResNet101"]
    bs = 20
    lr = 1e-5
    nepoch = 1
    rotate = 45
    pretrain = True
    # test config
    save_fig = True
    save_df = True
    # visdom
    visdom = True
    port = 8097

# Load Model

In [None]:
# Model
if args.model_name=="ResNet18":
    from model.resnet.resnet18 import ResNet18
elif args.model_name=="ResNet34":
    from model.resnet.resnet34 import ResNet34
elif args.model_name=="ResNet50":
    from model.resnet.resnet50 import ResNet50
elif args.model_name=="ResNet101":
    from model.resnet.resnet101 import ResNet101
else:
    print("error! write correct model name!")

# Train

In [None]:
def train(model, xtr, ytr, bs=10, lr=1e-4, nepoch=200, visdom=False):
    # calculate counts and count_sum
    _, counts = np.unique(ytr, return_counts=True)
    counts_sum = counts.sum()
    
    # define weight and create loss function
    ce = torch.nn.CrossEntropyLoss(define_weight(counts))
    
    # define optimizer
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    
    # set model train mode
    model.train()
    
    t = tqdm(range(nepoch),leave=False)
    # training
    for epoch in t:
        total_loss = 0
        index = random.sample(range(counts_sum),counts_sum)
        t.set_description("epoch=%s" % (epoch))
        for idx in range(0,counts_sum-bs,bs):
            idx = index[idx:idx+bs]
            x = xtr[idx].cuda()
            y = ytr[idx].cuda()
            opt.zero_grad()
            out = model(x)
            loss = ce(out, y)
            total_loss += loss
            loss.backward()
            opt.step()
        
        te_acc = accuracy(model, xte, yte, bs)
        total_loss = total_loss.item()
        if visdom:
            visualize(vis, epoch, total_loss, win_train_loss)
            visualize(vis, epoch, te_acc, win_test_acc)
        sys.stdout.write("\rtotal_loss=%f, te_acc=%f" % (total_loss,te_acc))
        sys.stdout.flush()

# Set Visdom

In [None]:
if args.visdom:
    # Create visdom
    vis = visdom.Visdom(port=args.port)
    
    """train_lossl"""
    win_train_loss = vis.line(
        X=np.array([0]),
        Y=np.array([0]),
        opts=dict(
            title='train_loss',
            xlabel='epoch',
            ylabel='loss',
            width=800,
            height=400
        )
    )
    """test_accuracy"""
    win_test_acc = vis.line(
        X=np.array([0]),
        Y=np.array([0]),
        opts=dict(
            title='test_accuracy',
            xlabel='epoch',
            ylabel='loss',
            width=800,
            height=400
        )
    )

In [None]:
def visualize(vis, phase, visualized_data, window):
    vis.line(
        X=np.array([phase]),
        Y=np.array([visualized_data]),
        update='append',
        win=window
    )

### Save args

In [None]:
args_logger = Logger(args)
args_logger.save()

# Cross Validation

In [None]:
model_save_path = pj(args.model_root, "final.pth")
if os.path.exists(args.model_root) is False:
    os.makedirs(args.model_root)
if os.path.exists(args.figure_root) is False:
    os.makedirs(args.figure_root)

In [None]:
valid_num = int(1.0/args.test_ratio)
with h5py.File(args.all_data_path) as f:
    X = f["X"][:]
    Y = f["Y"][:]
_, ntests = np.unique(Y, return_counts=True)
train_idxs, test_idxs = create_validation_split(Y, args.test_ratio)
result = []
for valid_count in range(valid_num):
    xtr, ytr, xte, yte = load_validation_data(X, Y, train_idxs[valid_count], test_idxs[valid_count])
    
    xtr, ytr = create_train_data(xtr, ytr, args.rotate)
    
    if args.model_name=="ResNet18":
        model = ResNet18(len(args.labels), pretrain=args.pretrain).cuda()
    elif args.model_name=="ResNet34":
        model = ResNet34(len(args.labels), pretrain=args.pretrain).cuda()
    elif args.model_name=="ResNet50":
        model = ResNet50(len(args.labels), pretrain=args.pretrain).cuda()
    elif args.model_name=="ResNet101":
        model = ResNet101(len(args.labels), pretrain=args.pretrain).cuda()
    else:
        print("error! write correct model name!")
    train(model, xtr, ytr, bs=args.bs, lr=args.lr, nepoch=args.nepoch, visdom=args.visdom)
    torch.save(model.state_dict(), model_save_path)
    
    matrix = confusion_matrix(model, xte, yte, args.labels, bs=args.bs)
    df = pd.DataFrame(matrix)
    display(df)
    if valid_count == 0:
        validation_matrix = matrix
        x_all = xte.cpu().numpy()
        y_all = yte.cpu().numpy()
    else:
        validation_matrix += matrix
        x_all = np.concatenate([x_all, xte.cpu().numpy()])
        y_all = np.concatenate([y_all, yte.cpu().numpy()])
    
    result.extend(test_classification(model, xte))

In [None]:
df = pd.DataFrame(validation_matrix)
if args.save_df is True:
    df.to_csv(pj(args.figure_root, "validation_matrix.csv"))
df

In [None]:
create_confusion_matrix(validation_matrix, ntests, args.labels, args.figure_root, save=args.save_fig)

In [None]:
each_df = compute_each_size_df(result, x_all, y_all)
if args.save_df is True:
    each_df.to_csv(pj(args.figure_root, "each_size_df.csv"))
each_df

In [None]:
all_df = compute_all_size_df(each_df)
if args.save_df is True:
    all_df.to_csv(pj(args.figure_root, "all_size_df.csv"))
all_df

In [None]:
plot_df_distrib_size(all_df, args.figure_root, save=args.save_fig)

### Load and Test model

In [None]:
if args.model_name="ResNet18":
    model = ResNet18(len(args.labels)).cuda()
elif args.model_name="ResNet34":
    model = ResNet34(len(args.labels)).cuda()
elif args.model_name="ResNet50":
    model = ResNet50(len(args.labels)).cuda()
elif args.model_name="ResNet101":
    model = ResNet101(len(args.labels)).cuda()
else:
    print("error! write correct model name!")

In [None]:
model.load_state_dict(torch.load(pj(args.model_save_path_root, "final.pth")))

In [None]:
create_dataset_from_all_data(args.all_data_path, args.train_data_path, args.test_data_path, args.test_ratio)

In [None]:
xtr, ytr, xte, yte, ntests = load_data(args.train_data_path, args.test_data_path)

In [None]:
matrix = confusion_matrix(model, xte, yte, args.labels, bs=args.bs)
create_confusion_matrix(matrix, ntests, args.labels, args.figure_root, save=args.save_fig)