In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import Sampler
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
import pandas as pd
import random
import torch.nn.functional as F
import matplotlib.pyplot as plt


In [10]:
# fix random seeds for reproducibility
SEED = 123
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
random.seed(SEED)
np.random.seed(SEED)

def worker_init_fn(worker_id):                                                          
    np.random.seed(np.random.get_state()[1][0] + worker_id)


In [11]:
# support set means train set
# query set means test set
episode = 600
train_N_way = 5
train_K_shot = 1
train_N_query = 15

val_N_way = 5
val_K_shot = 1
val_N_query = 15


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
filenameToPILImage = lambda x: Image.open(x)
# mini-Imagenet dataset
class MiniDataset(Dataset):
    def __init__(self, csv_path, data_dir):
        self.data_dir = data_dir
        self.data_df = pd.read_csv(csv_path).set_index("id")

        self.transform = transforms.Compose([
            filenameToPILImage,
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])

    def __getitem__(self, index):
        path = self.data_df.loc[index, "filename"]
        label = self.data_df.loc[index, "label"]
        image = self.transform(os.path.join(self.data_dir, path))
        return image, label

    def __len__(self):
        return len(self.data_df)

In [14]:
class GeneratorSampler(Sampler):
    def __init__(self, episode_file_path):
        episode_df = pd.read_csv(episode_file_path).set_index("episode_id")
        self.sampled_sequence = episode_df.values.flatten().tolist()

    def __iter__(self):
        return iter(self.sampled_sequence) 

    def __len__(self):
        return len(self.sampled_sequence)

In [15]:
class Convnet(nn.Module):
    def __init__(self, in_chaneels=3, hid_channels=64, out_channels=64):
        super().__init__()
        self.encoder = nn.Sequential(
            conv_block(in_chaneels, hid_channels),
            conv_block(hid_channels, hid_channels),
            conv_block(hid_channels, hid_channels),
            conv_block(hid_channels, out_channels),
        )
    def forward(self, x):
        x = self.encoder(x)
        return x.view(x.size(0), -1)

def conv_block(in_chaneels, out_channels):
    bn = nn.BatchNorm2d(out_channels)
    return nn.Sequential(
        nn.Conv2d(in_chaneels, out_channels, 3, padding=1),
        bn,
        nn.ReLU(),
        nn.MaxPool2d(2)
    )

In [16]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(1600, 512),
            # nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(512, 1)
        )
        # self.num_layers = num_layers
        # h = [hidden_dim] * (num_layers - 1)
        # self.layers = nn.ModuleList(nn.Linear(n, k)
        #                             for n, k in zip([input_dim] + h, h + [output_dim]))
    def forward(self,  x):
        # for i, layer in enumerate(self.layers):
        #     x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
        return self.fc(x)


In [17]:
dist = MLP(1600, 512, 1, 2).to(device)

In [18]:
print(dist)

MLP(
  (fc): Sequential(
    (0): Linear(in_features=1600, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=512, out_features=1, bias=True)
  )
)


In [20]:
class train_sampler(Sampler):
    def __init__(self, csv_path, episode):
        data_df = pd.read_csv(csv_path).set_index("id")
        clses = data_df['label'].unique()
        img_choose = []
        self.img_choose = []
        for i in range(episode):
            choosed_support = []
            choosed_query = []

            choosed_cls = random.sample(range(len(clses)),train_N_way)
            for j in range(train_N_way):
                choosed_sq = random.sample(data_df[data_df['label']==clses[choosed_cls[j]]].index.tolist(),train_N_query+train_K_shot)

                choosed_query.append(choosed_sq[train_K_shot:])
                choosed_support.append(choosed_sq[:train_K_shot])

            flat_choosed_query =  [item for sublist in choosed_query for item in sublist]
            flat_choosed_support = [item for sublist in choosed_support for item in sublist]
            
            img_choose.append(flat_choosed_support + flat_choosed_query)

        self.img_choose = [item for sublist in img_choose for item in sublist]
        
    def __iter__(self):
        return iter(self.img_choose) 

    def __len__(self):
        return len(self.img_choose)

In [21]:
train_csv, train_data_dir = 'hw4_data/mini/train.csv', 'hw4_data/mini/train'
train_dataset = MiniDataset(train_csv, train_data_dir)
train_loader = DataLoader(
    train_dataset,
    batch_size=train_N_way*(train_N_query + train_K_shot),
    pin_memory=False,
    worker_init_fn=worker_init_fn,
    sampler=train_sampler('hw4_data/mini/train.csv',episode)
)


In [22]:
# val_csv, val_data_dir = 'hw4_data/mini/val.csv','hw4_data/mini/val'
# val_dataset = MiniDataset(val_csv, val_data_dir)
# val_loader = DataLoader(
#         val_dataset, batch_size=val_N_way * (val_N_query + val_K_shot),
#         pin_memory=False, worker_init_fn=worker_init_fn,
#         sampler=GeneratorSampler('hw4_data/mini/val_testcase.csv'))

In [23]:
val_csv, val_data_dir = 'hw4_data/mini/val.csv','hw4_data/mini/val'
val_dataset = MiniDataset(val_csv, val_data_dir)
val_loader = DataLoader(
        val_dataset, 
        batch_size=val_N_way * (val_N_query + val_K_shot),
        pin_memory=False, 
        worker_init_fn=worker_init_fn,
        sampler=train_sampler('hw4_data/mini/val.csv',episode))

In [24]:
def train(model, epoch):
    # eq 1e-4 cs 1e-3
    optimizer = optim.Adam([*model.parameters(),*dist.parameters()], lr=1e-4)
    # dist_optimizer = optim.Adam(dist.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()
    # dist_criterion = nn.CrossEntropyLoss()
    for ep in range(epoch):
        train_loss = 0
        correct = 0
        total = 0
        episodic_acc = []
        model.train()
        for i, (data, target) in enumerate(tqdm(train_loader)):
            # if(i==1):break
            # split data into support and query data
            support_input = data[:train_N_way * train_K_shot,:,:,:]
            query_input   = data[train_N_way * train_K_shot:,:,:,:]

            # create the relative label (0 ~ N_way-1) for query data
            label_encoder = {target[i * train_K_shot] : i for i in range(train_N_way)}
            query_label = torch.cuda.LongTensor([label_encoder[class_name] for class_name in target[train_N_way * train_K_shot:]])

            # TODO: extract the feature of support and query data
            # shape:torch.Size([N-way*K-shot, 1600]) torch.Size([N-way*N-query, 1600])
            support_feature = model(support_input.to(device))
            query_feature = model(query_input.to(device))
            support_feature = support_feature.view(train_N_way,train_K_shot,1600).mean(dim=1)

            # TODO: calculate the prototype for each class according to its support data
            # ref:https://discuss.pytorch.org/t/calculating-eucledian-distance-between-two-tensors/85141
            query_to_clses = None
            for i in query_feature:
                i = i.view(1,-1)
                # parametric function
                reg = None
                for j in support_feature:
                    dis = dist(i*j)

                    if reg is not None:
                        reg =torch.cat((reg,dis), 0)
                    else:
                        reg = dis
                reg = reg.view(1,-1)

                # cosine similarity
                # reg = -F.cosine_similarity(support_feature,i).view(1,-1)

                # Euclidean distance
                # reg = ((support_feature - i)**2).sum(1).view(1,-1)

                if query_to_clses is not None:
                    query_to_clses = torch.cat((query_to_clses, reg), 0)
                else:
                    query_to_clses = reg

            query_softmin = F.softmin(query_to_clses,dim=1)

            loss = criterion(query_softmin, query_label)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_ = len(query_to_clses)
            total += total_

            # TODO: classify the query data depending on the its distense with each prototype
            correct_ = query_to_clses.argmin(dim=1).eq(query_label).sum().item()
            correct += correct_
            train_loss += loss
            episodic_acc.append(correct_/total_)

        episodic_acc = np.array(episodic_acc)
        mean = episodic_acc.mean()
        std = episodic_acc.std()
    
        print(f'Epoch:{ep+1}\tAccuracy: {mean * 100:.2f} +- { 1.96 * std / (600)**(1/2) * 100:.2f} %\tLoss:{loss:.4f}')
        predict_1_3(model)
        
        torch.save(model, f'cspth/cs_{ep}_{mean * 100:.4f}.pth')
        
        train_acc_his.append(mean * 100)
        train_loss_his.append(loss)


        

In [25]:
def predict(model):
    with torch.no_grad():
        ans = None
        model.eval()
        # each batch represent one episode (support data + query data)
        for i, (data, target) in enumerate(tqdm(val_loader)):
            # split data into support and query data
            support_input = data[:val_N_way * val_K_shot,:,:,:] 
            query_input   = data[val_N_way * val_K_shot:,:,:,:]

            # create the relative label (0 ~ N_way-1) for query data
            label_encoder = {target[i * val_K_shot] : i for i in range(val_N_way)}
            query_label = torch.cuda.LongTensor([label_encoder[class_name] for class_name in target[val_N_way * val_K_shot:]])

            # TODO: extract the feature of support and query data
            # shape:torch.Size([N-way*K-shot, 1600]) torch.Size([N-way*N-query, 1600])
            support_feature = model(support_input.to(device))
            query_feature = model(query_input.to(device))
            support_feature = support_feature.view(val_N_way,val_K_shot,1600).mean(dim=1)

            # TODO: calculate the prototype for each class according to its support data
            # ref:https://discuss.pytorch.org/t/calculating-eucledian-distance-between-two-tensors/85141
            query_to_clses = None
            for i in query_feature:
                i = i.view(1,-1)
                # parametric function
                reg = None
                for j in support_feature:
                    dis = dist(i*j)

                    if reg is not None:
                        reg =torch.cat((reg,dis), 0)
                    else:
                        reg = dis
                reg = reg.view(1,-1)

                # cosine similarity
                # reg = -F.cosine_similarity(support_feature,i).view(1,-1)

                # Euclidean distance
                # reg = ((support_feature - i)**2).sum(1).view(1,-1)

                if query_to_clses is not None:
                    query_to_clses = torch.cat((query_to_clses, reg), 0)
                else:
                    query_to_clses = reg


            a = query_to_clses.argmin(dim=1).view(1,-1)
            # TODO: classify the query data depending on the its distense with each prototype
            if ans is not None:
                ans = torch.cat((ans, a), 0)
            else:
                ans = a
        pd.DataFrame(
            ans.tolist(),
            columns=[f'query{i}'for i in range(val_N_query*val_N_way)],
        ).to_csv('out.csv', index_label='episode_id')
        eval('out.csv')
    

In [26]:
def predict_1_3(model):
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        ans = None
        val_loss = 0
        correct = 0
        total = 0
        episodic_acc = []
        model.eval()
        # each batch represent one episode (support data + query data)
        for i, (data, target) in enumerate(tqdm(val_loader)):
            # split data into support and query data
            support_input = data[:val_N_way * val_K_shot,:,:,:] 
            query_input   = data[val_N_way * val_K_shot:,:,:,:]

            # create the relative label (0 ~ N_way-1) for query data
            label_encoder = {target[i * val_K_shot] : i for i in range(val_N_way)}
            query_label = torch.cuda.LongTensor([label_encoder[class_name] for class_name in target[val_N_way * val_K_shot:]])

            # TODO: extract the feature of support and query data
            # shape:torch.Size([N-way*K-shot, 1600]) torch.Size([N-way*N-query, 1600])
            support_feature = model(support_input.to(device))
            query_feature = model(query_input.to(device))
            support_feature = support_feature.view(val_N_way,val_K_shot,1600).mean(dim=1)

            # TODO: calculate the prototype for each class according to its support data
            # ref:https://discuss.pytorch.org/t/calculating-eucledian-distance-between-two-tensors/85141
            query_to_clses = None
            for i in query_feature:
                i = i.view(1,-1)
                reg = ((support_feature - i)**2).sum(1).view(1,-1)
                if query_to_clses is not None:
                    query_to_clses = torch.cat((query_to_clses, reg), 0)
                else:
                    query_to_clses = reg
            
            query_softmin = F.softmin(query_to_clses,dim=1)
            loss = criterion(query_softmin, query_label)
            total_ = len(query_to_clses)
            total += total_
            correct_ = query_to_clses.argmin(dim=1).eq(query_label).sum().item()
            correct += correct_
            val_loss += loss
            episodic_acc.append(correct_/total_)
            a = query_to_clses.argmin(dim=1).view(1,-1)
            # TODO: classify the query data depending on the its distense with each prototype
            if ans is not None:
                ans = torch.cat((ans, a), 0)
            else:
                ans = a
        episodic_acc = np.array(episodic_acc)
        mean = episodic_acc.mean()
        std = episodic_acc.std()
    
        print(f'-val-\tAccuracy: {mean * 100:.2f} +- { 1.96 * std / (600)**(1/2) * 100:.2f} %\tLoss:{loss:.4f}')
        
        val_acc_his.append(mean * 100)
        val_loss_his.append(loss)
    

In [27]:
def eval(fn):
    import csv

    # read your prediction file
    with open(fn, mode='r') as pred:
        reader = csv.reader(pred)
        next(reader, None)  # skip the headers
        pred_dict = {int(rows[0]): np.array(rows[1:]).astype(int) for rows in reader}

    # read ground truth data
    with open('hw4_data/mini/val_testcase_gt.csv', mode='r') as gt:
        reader = csv.reader(gt)
        next(reader, None)  # skip the headers
        gt_dict = {int(rows[0]): np.array(rows[1:]).astype(int) for rows in reader}

    episodic_acc = []
    for key, value in pred_dict.items():
        episodic_acc.append((gt_dict[key] == value).mean().item())

    episodic_acc = np.array(episodic_acc)
    mean = episodic_acc.mean()
    std = episodic_acc.std()

    val_acc_his.append(mean * 100)
    print('-Eval- Accuracy: {:.2f} +- {:.2f} %'.format(mean * 100, 1.96 * std / (600)**(1/2) * 100))


In [28]:
def plot_result():
    plt.plot(train_acc_his)
    plt.plot(val_acc_his)
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left') 
    plt.show()
    
    plt.plot(train_loss_his)
    plt.plot(val_loss_his)
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left') 
    plt.show()


In [29]:
model = Convnet().to(device)
print(model)
# predict(model)

Convnet(
  (encoder): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (1): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (2): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (3): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), str

In [34]:
train_acc_his, train_loss_his = [], []
val_acc_his, val_loss_his = [], []
# model = torch.load('99_31.1567.pth')
model = Convnet().to(device)
train(model, 50)
plot_result()

100%|██████████| 600/600 [02:35<00:00,  3.86it/s]


Epoch:1	Accuracy: 34.54 +- 0.75 %	Loss:1.4413


100%|██████████| 600/600 [01:43<00:00,  5.77it/s]


-val-	Accuracy: 32.09 +- 0.62 %	Loss:1.6515


100%|██████████| 600/600 [02:29<00:00,  4.01it/s]


Epoch:2	Accuracy: 40.00 +- 0.80 %	Loss:1.3290


100%|██████████| 600/600 [01:36<00:00,  6.21it/s]


-val-	Accuracy: 33.43 +- 0.67 %	Loss:1.7179


100%|██████████| 600/600 [02:27<00:00,  4.08it/s]


Epoch:3	Accuracy: 43.46 +- 0.83 %	Loss:1.3520


100%|██████████| 600/600 [01:28<00:00,  6.81it/s]


-val-	Accuracy: 34.13 +- 0.68 %	Loss:1.6378


100%|██████████| 600/600 [02:24<00:00,  4.14it/s]


Epoch:4	Accuracy: 46.35 +- 0.85 %	Loss:1.3118


100%|██████████| 600/600 [01:18<00:00,  7.67it/s]


-val-	Accuracy: 35.17 +- 0.69 %	Loss:1.6231


100%|██████████| 600/600 [02:24<00:00,  4.15it/s]


Epoch:5	Accuracy: 48.75 +- 0.85 %	Loss:1.2744


100%|██████████| 600/600 [01:15<00:00,  7.95it/s]


-val-	Accuracy: 35.36 +- 0.69 %	Loss:1.6780


100%|██████████| 600/600 [02:23<00:00,  4.18it/s]


Epoch:6	Accuracy: 51.01 +- 0.82 %	Loss:1.2267


100%|██████████| 600/600 [01:08<00:00,  8.71it/s]


-val-	Accuracy: 35.51 +- 0.68 %	Loss:1.6216


100%|██████████| 600/600 [02:23<00:00,  4.17it/s]


Epoch:7	Accuracy: 52.78 +- 0.84 %	Loss:1.2670


100%|██████████| 600/600 [01:04<00:00,  9.25it/s]


-val-	Accuracy: 35.52 +- 0.70 %	Loss:1.6503


100%|██████████| 600/600 [02:25<00:00,  4.14it/s]


Epoch:8	Accuracy: 54.44 +- 0.84 %	Loss:1.2575


100%|██████████| 600/600 [01:10<00:00,  8.51it/s]


-val-	Accuracy: 35.29 +- 0.68 %	Loss:1.6246


100%|██████████| 600/600 [02:27<00:00,  4.06it/s]


Epoch:9	Accuracy: 55.78 +- 0.83 %	Loss:1.2447


100%|██████████| 600/600 [01:11<00:00,  8.34it/s]


-val-	Accuracy: 35.76 +- 0.69 %	Loss:1.6389


 57%|█████▋    | 341/600 [01:21<01:02,  4.16it/s]


KeyboardInterrupt: 

In [19]:
plot_result()

NameError: name 'plot_result' is not defined