In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transfroms
import numpy as np
from torch.utils.data import DataLoader
from collections import OrderedDict
import flwr as fl
import math
import matplotlib.pyplot as plt
import yaml

from models import ginet_finetune
from sklearn.model_selection import train_test_split
from dataset.dataset_test import MolTestDatasetWrapper


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
BATCH_SIZE = 128
if torch.cuda.is_available():
    DEVICE = torch.device('cuda:3') # 해당 조의 GPU 번호로 변경 ex) 1조 : cuda:1
else:
    DEVICE = torch.device('cpu')

print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)

Using PyTorch version: 1.7.1+cu110  Device: cuda:3


In [27]:
def train(model, epoch, train_loader, optimizer, log_interval, loss_fn):
    model.train()
    for batch_idx, sample in enumerate(train_loader):
        image = sample
        label = sample['y']
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                                                    epoch, batch_idx * len(image), 
                                                    len(train_loader.dataset), 100. * batch_idx / len(train_loader), 
                                                    loss.item()))
            
''' 학습되는 과정 속에서 검증 데이터에 대한 모델 성능을 확인하는 함수 정의 '''
def evaluate(model, test_loader, loss_fn):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += loss_fn(output, label).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= (len(test_loader.dataset) / 64)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [28]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, model, trainloader, testloader, opt, loss_fn):
        self.model = model
        self.train_loader = trainloader
        self.test_loader = testloader
        self.optimizer = opt
        self.loss_fn = loss_fn

    def get_parameters(self, config):
        return [val.cpu().numpy() for _, val in self.model.state_dict().items()]

    def set_parameters(self, parameters): # pytorch 모델에 파라미터를 적용하는 코드가 복잡하여 함수로 정의
        params_dict = zip(self.model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        self.model.load_state_dict(state_dict, strict=True)

    def fit(self, parameters, config):
        self.set_parameters(parameters) # 위에서 정의한 set_parameters함수를 사용
        train(self.model, 1, self.train_loader, self.optimizer, 200, self.loss_fn)
        return self.get_parameters(config={}), len(self.train_loader.dataset), {}

    def evaluate(self, parameters, config):
        self.set_parameters(parameters)
        loss, accuracy = evaluate(self.model, self.test_loader, self.loss_fn)
        return loss, len(self.test_loader.dataset), {"accuracy": accuracy}


In [29]:
# device = "cuda" if torch.cuda.is_available() else "cpu"
# device = 'cpu'
config = yaml.load(open("config_finetune.yaml", "r"), Loader=yaml.FullLoader)
config['dataset']['task'] = 'regression'
config['dataset']['data_path'] = 'ms_mouse.csv'
#config['dataset']['data_path'] = 'ms_human.csv'

model_fl = ginet_finetune.GINet(config['dataset']['task'], **config["model"]).to(DEVICE)
criterion_fl = nn.CrossEntropyLoss().to(DEVICE)
optimizer_fl = torch.optim.Adam(model_fl.parameters())

In [6]:

csv_file = 't.csv'     # just test file to check whether the code works well or not

# split data into train and validation datasets by using train_test_split
train, test = train_test_split(csv_file, test_size=0.2, random_state=1)

train_loader_fl = DataLoader(train, batch_size=BATCH_SIZE)
test_loader_fl = DataLoader(test, batch_size=BATCH_SIZE)

# flwr_client = FlowerClient(model_fl, train_loader_fl, test_loader_fl, optimizer_fl, criterion_fl)

# fl.client.start_numpy_client(server_address="127.0.0.1:8080", client=flwr_client)

In [13]:
config = yaml.load(open("config_finetune.yaml", "r"), Loader=yaml.FullLoader)
config['dataset']['task'] = 'regression'
config['dataset']['data_path'] = 'ms_mouse.csv'
config['dataset']['target'] = 'MLM'
#config['dataset']['data_path'] = 'ms_human.csv'

model_fl = ginet_finetune.GINet(config['dataset']['task'], **config["model"]).to(DEVICE)
criterion_fl = nn.CrossEntropyLoss().to(DEVICE)
optimizer_fl = torch.optim.Adam(model_fl.parameters())

# %%

# split data into train and validation datasets by using train_test_split

# train_loader_fl = DataLoader(train_list, batch_size=BATCH_SIZE)
# test_loader_fl = DataLoader(test_list, batch_size=BATCH_SIZE)
dataset = MolTestDatasetWrapper(config['batch_size'], **config['dataset'])

In [31]:
train_loader, valid_loader, test_loader = dataset.get_data_loaders()

for i in train_loader:
    print(i['y'])

3498
3498
About to generate scaffolds
Generating scaffold 0/3498
Generating scaffold 1000/3498
Generating scaffold 2000/3498
Generating scaffold 3000/3498
About to sort in scaffold sets




tensor([[1.8860e+00],
        [9.9900e+01],
        [1.9840e+01],
        [3.9200e+00],
        [5.1700e+01],
        [2.6000e-01],
        [1.0000e+00],
        [7.6000e+01],
        [7.6170e+01],
        [9.6000e-01],
        [6.0000e-02],
        [2.4300e+00],
        [2.0684e+01],
        [6.4400e+00],
        [4.9000e-01],
        [8.6752e+01],
        [6.7640e+01],
        [1.6780e+00],
        [1.0050e+01],
        [6.2740e+01],
        [7.5930e+01],
        [7.0784e+01],
        [9.9990e+01],
        [3.4260e+01],
        [2.4680e+01],
        [6.1770e+00],
        [0.0000e+00],
        [9.5840e+01],
        [8.1485e+01],
        [5.5475e+01],
        [2.1226e+01],
        [7.0150e+00]])
tensor([[6.1556e+01],
        [4.3430e+01],
        [1.3400e+01],
        [8.9700e-01],
        [8.5710e+01],
        [2.5980e+00],
        [5.5785e+01],
        [1.9685e+01],
        [9.2350e+01],
        [9.5670e+01],
        [1.6240e+01],
        [8.3972e+01],
        [9.9500e+00],
        [

In [9]:
a = torch.tensor([[59.5024],
        [45.0722],
        [21.6544],
        [28.0247],
        [70.4545],
        [56.0656],
        [60.1492],
        [44.6700],
        [77.5637],
        [15.4316],
        [64.7965],
        [14.1059],
        [38.2778],
        [12.9553],
        [34.8992],
        [46.0185],
        [22.4915],
        [33.7172],
        [21.2361],
        [31.4631],
        [42.1644],
        [30.1330],
        [27.4722],
        [34.1984],
        [33.3891],
        [35.3193],
        [29.2001],
        [24.1560],
        [43.6553],
        [29.9696],
        [49.0181],
        [23.0954]])

# a.max(1, keepdim = True)[1].sum().item()
a[0][0].item()

59.50239944458008