<a href="https://colab.research.google.com/github/lzichi/Thin-Materials-ML/blob/main/RUN_2d_Evaluate_ResNet_(2022Summer).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 2d Codes

In [None]:
import os, argparse, time, random
from functools import partial
from shutil import copyfile

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
import torch.optim as optim
import torchvision.models as models
from PIL import Image

from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings("ignore")

In [None]:
import os

def makedirs(*dirnames):
    for dirname in dirnames:
        if not os.path.exists(dirname):
            os.makedirs(dirname)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

random.seed(41)
np.random.seed(41)
torch.manual_seed(41)
torch.cuda.manual_seed_all(41)


In [None]:
class FlakeDataset(Dataset):
    def __init__(self, df, raw_only, transform=None, material=None):
        paths, labels, materials, dat_type = [], [], [], []
        for idx, path in enumerate(df['paths']):
            file = path.split('/')[-1].split('.')[0].split('-')[-1]
            if '_crop' in file:
                file = file.split('_crop')[0]
            
            if material:
                _is_target = [file in mat for mat in material]
                if max(_is_target) is False:
                    continue
                # for mat in material:
                #     if file in mat: 
                #         break
                    # continue
            # else:
            if 'aug_' in path:
                dat_type_i = 'augment'
                if not raw_only:
                    dat_type.append(dat_type_i)
                    paths.append(path)
                    labels.append(df.labels[idx])
                    materials.append(file)
            else:
                dat_type_i = 'raw'
                dat_type.append(dat_type_i)
                paths.append(path)
                labels.append(df.labels[idx])
                materials.append(file)
            
            # print(valid)
            # raise NotADirectoryError()
        self.path = paths
        self.labels = torch.tensor(labels).float()
        self.materials = np.array(materials)
        self.dat_type = np.array(dat_type)
        
        if not transform:
            transform = transforms.Compose([transforms.ToTensor()])
        self.transform = transform

    def __len__(self):
        return len(self.path)
    
    def __getitem__(self, i):
        if torch.is_tensor(i):
            i = i.tolist()
            
        img = Image.open(self.path[i]).convert("RGB")
        img = self.transform(img)
        label = self.labels[[i]]
        material = self.materials[i]
        dat_type = self.dat_type[i]
        return img.float(), label, material, dat_type
    

In [None]:
def train_test_split(data, test_portion=0.75, train_subset_portion=1., seed=1):
    """
    This function creates train and test data.
    The test data is fixed to test_portion * len(data) .
    The train data contains (1 - test_portion) * train_subset_portion.
    (1 - test_portion) * (1 - train_subset_portion) samples will be discarded. 
    """
    np.random.seed(seed)
    assert 'names' in data.columns, f'`names` column is not found in df.'

    full_data = data.copy()
    full_data['original_name'] = full_data.names.apply(
        lambda x: '_'.join(x.split('_')[int('aug' in x or 'raw' in x) + int('aug' in x):]))
    original_imgs = np.unique(full_data['original_name'])
    assert original_imgs.shape[0] == 332
    train_img = set(np.random.choice(original_imgs, 
                                 int((1-test_portion) * len(original_imgs)),
                                 False))
    train_idx = full_data.original_name.apply(lambda x: x in train_img)
    train_data = full_data[train_idx]
    test_data = full_data[~train_idx]

    if train_subset_portion < 1:
        # Use a subset to train the model.
        train_imgs = np.unique(train_data['original_name'])
        train_subimg = set(np.random.choice(train_imgs, 
                                    int(train_subset_portion * len(train_imgs)),
                                    False))
        train_subidx = train_data.original_name.apply(lambda x: x in train_subimg)
        train_data = train_data[train_subidx]
    
    train_data = train_data.drop('original_name', 1).reset_index()
    test_data = test_data.drop('original_name', 1).reset_index()
    
    return train_data, test_data


def load_train_test(data, train_subset_portion):

    train, test = train_test_split(data, train_subset_portion=train_subset_portion, test_portion=.25)

    transform = transforms.Compose([
        transforms.Resize(size=(224, 224)),
        transforms.ToTensor()])

    materials = ['MoSe2_on_Si', 'MoSe2_on_si_PDMS', 'MoSe2_on_PDMS']

    trainset = FlakeDataset(train, raw_only=False,
                        transform=transform, 
                        material=materials)

    testset = FlakeDataset(test, raw_only=False,
                        transform=transform, 
                        material=materials)

    bsz = 128
    train_loader = DataLoader(trainset, batch_size=bsz, shuffle=True, pin_memory=True)
    test_loader = DataLoader(testset, batch_size=bsz, shuffle=True, pin_memory=True)

    return train_loader, test_loader


def load_model(path):
    net = models.resnet18(pretrained=False)
    fc_features = net.fc.in_features
    net.fc = nn.Linear(fc_features, 1)
    net = net.to(device)
    net.load_state_dict(
        torch.load(path, map_location=device))
    
    return net


def evaluate(net, *dataloaders):
    accus = []
    criterion = nn.BCEWithLogitsLoss()

    net.eval()
    with torch.no_grad():
        for dataloader in dataloaders:
            with tqdm(dataloader, desc='Evaluation') as t:
                sep_corr = {'MoSe2_on_si_PDMS': 0., 'MoSe2_on_Si': 0., 'MoSe2_on_PDMS': 0.}
                sep_total = {'MoSe2_on_si_PDMS': 0., 'MoSe2_on_Si': 0., 'MoSe2_on_PDMS': 0.}
                correct, total = 0, 0
                for i, (inputs, labels, material, dat_type) in enumerate(t):
                    inputs = inputs.to(device)
                    labels = labels.to(device)
                    outputs = net(inputs)
                    loss = criterion(outputs, labels)

                    pred = (outputs > 0)
                    correct += (pred==labels).sum().item()
                    total += labels.shape[0]
                    material = np.array(material)
                    for mat in ['MoSe2_on_si_PDMS', 'MoSe2_on_Si', 'MoSe2_on_PDMS']:
                        pred_mat = pred[material == mat]
                        label_mat = labels[material == mat]
                        sep_corr[mat] += (pred_mat == label_mat).sum().item()
                        sep_total[mat] += label_mat.shape[0]
                    t.set_postfix(accu=correct/total)
            accus.append(correct / total)
    
    return accus


In [None]:
result_path = '/content/drive/Shared drives/2d/results/to_compare'

train_accus = {}
test_accus = {}

ks = []
for file in os.listdir(result_path):
    
    # Finished evluatiion
    if file in ['quantized5', 'quantized10', 'quantized11']:
        continue

    if 'quantized' in file:
        experiment_path = os.path.join(result_path, file)
        k = file.replace('quantized', '')
        # Load data
        if k == '-1':
            # -1: no quantization, data were stored differently.
            data = pd.read_pickle(os.path.join('/content/drive/Shared drives/2d/data', 'pad_augment_data_final.pkl'))
            data['paths'] = data['paths'].apply(
                lambda x: '/content/drive/Shared drives/2d/data/pad_augment_data_final/' + x.split('/')[-1])
        else:
            data = pd.read_pickle(os.path.join('/content/drive/Shared drives/2d/data', f'quantized{k}_pad_augment_data.pkl'))
            data['paths'] = data['paths'].apply(
                lambda x: f'/content/drive/Shared drives/2d/data/quantized{k}_pad_augment_data/' + x.split('/')[-1])
            
        for file in os.listdir(experiment_path):
            if 'sub' in file:
                train_subset_portion = float(file.split('sub')[1].replace('.torch', ''))
            else:
                train_subset_portion = 1.
            print(f'Evaluate: {os.path.join(experiment_path, file)}')
            net = load_model(os.path.join(experiment_path, file))
            train_loader, test_loader = load_train_test(data, train_subset_portion)
            
            # test_accu = evaluate(net, test_loader)[0]
            # test_accus[f'{k}_sub{train_subset_portion:4.2f}'] = test_accu

            train_accu = evaluate(net, train_loader)[0]
            train_accus[f'{k}_sub{train_subset_portion:4.2f}'] = train_accu

            # print(f'Finish: quantized: {k}, \
            #         train_subset_portion: {train_subset_portion:4.2f}, \
            #         train_accuracy: {train_accu:4.3f}, \
            #         test_accuracy: {test_accu:4.3f}'
            #         )

            print(f'Finish: quantized: {k}, \
                    train_subset_portion: {train_subset_portion:4.2f}, \
                    train_accuracy: {train_accu:4.3f} \
                   '
                    )


Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized16/resnet18.torch


Evaluation:   0%|          | 0/61 [00:00<?, ?it/s]

Finish: quantized: 16,                     train_subset_portion: 1.00,                     train_accuracy: 0.995                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized15/resnet18.torch


Evaluation:   0%|          | 0/61 [00:00<?, ?it/s]

Finish: quantized: 15,                     train_subset_portion: 1.00,                     train_accuracy: 0.999                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized15/resnet18_sub0.10.torch


Evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Finish: quantized: 15,                     train_subset_portion: 0.10,                     train_accuracy: 1.000                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized15/resnet18_sub0.25.torch


Evaluation:   0%|          | 0/16 [00:00<?, ?it/s]

Finish: quantized: 15,                     train_subset_portion: 0.25,                     train_accuracy: 0.991                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized15/resnet18_sub0.50.torch


Evaluation:   0%|          | 0/31 [00:00<?, ?it/s]

Finish: quantized: 15,                     train_subset_portion: 0.50,                     train_accuracy: 1.000                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized20/resnet18.torch


Evaluation:   0%|          | 0/61 [00:00<?, ?it/s]

Finish: quantized: 20,                     train_subset_portion: 1.00,                     train_accuracy: 0.995                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized20/resnet18_sub0.10.torch


Evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Finish: quantized: 20,                     train_subset_portion: 0.10,                     train_accuracy: 1.000                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized20/resnet18_sub0.25.torch


Evaluation:   0%|          | 0/16 [00:00<?, ?it/s]

Finish: quantized: 20,                     train_subset_portion: 0.25,                     train_accuracy: 0.999                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized20/resnet18_sub0.50.torch


Evaluation:   0%|          | 0/31 [00:00<?, ?it/s]

Finish: quantized: 20,                     train_subset_portion: 0.50,                     train_accuracy: 1.000                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized-1/resnet18.torch


Evaluation:   0%|          | 0/61 [00:00<?, ?it/s]

Finish: quantized: -1,                     train_subset_portion: 1.00,                     train_accuracy: 1.000                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized-1/resnet18_sub0.50.torch


Evaluation:   0%|          | 0/31 [00:00<?, ?it/s]

Finish: quantized: -1,                     train_subset_portion: 0.50,                     train_accuracy: 1.000                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized-1/resnet18_sub0.25.torch


Evaluation:   0%|          | 0/16 [00:00<?, ?it/s]

Finish: quantized: -1,                     train_subset_portion: 0.25,                     train_accuracy: 0.990                    
Evaluate: /content/drive/Shared drives/2d/results/to_compare/quantized-1/resnet18_sub0.10.torch


Evaluation:   0%|          | 0/6 [00:00<?, ?it/s]

Finish: quantized: -1,                     train_subset_portion: 0.10,                     train_accuracy: 0.961                    



`Finish: quantized: -1, train_accuracy: 0.9997408990801917, test_accuracy: 0.7419354838709677`

`Finish: quantized: 5, train_accuracy: 0.9983158440212463, test_accuracy: 0.6999611348620287`

`Finish: quantized: 10, train_accuracy: 0.9994817981603835, test_accuracy: 0.7675864749319861`

`Finish: quantized: 11, train_accuracy: 0.9963725871226843, test_accuracy: 0.7384376214535562`

`Finish: quantized: 15, train_accuracy: 0.9985749449410546, test_accuracy: 0.7769141080450835`

`Finish: quantized: 16, train_accuracy: 0.9948179816038347, test_accuracy: 0.7609794014768753`

`Finish: quantized: 20, train_accuracy: 0.9948179816038347, test_accuracy: 0.7675864749319861`
