In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

In [27]:
class CFG:
    train_path=f"data/file_name"
    test_path=f"data/file_name"
    model_path=f'model'
    n_epoch=100
    batch_size=128
    lr=0.001
    loss_L1=nn.L1Loss()
    loss_cross=nn.CrossEntropyLoss()
    opt=torch.optim.Adam
    
cfg=CFG()

In [17]:
def get_device(cuda_preference=True):
    print('cuda available:', torch.cuda.is_available(), 
        '; cudnn available:', torch.backends.cudnn.is_available(),
        '; num devices:', torch.cuda.device_count())
    
    use_cuda = False if not cuda_preference else torch.cuda.is_available()
    device = torch.device('cuda:0' if use_cuda else 'cpu')
    device_name = torch.cuda.get_device_name(device) if use_cuda else 'cpu'
    print('Using device', device_name)
    return device

device=get_device()
num_cpus = os.cpu_count()
print(num_cpus, 'CPUs available')

cuda available: True ; cudnn available: True ; num devices: 1
Using device Quadro RTX 6000
64 CPUs available


In [18]:
from torch.utils.data import DataLoader, TensorDataset, random_split

def get_value(df):
    feature=df['column_name']
    target=df['target']

    return feature.value,target.value

def get_dataloaders(df,cfg.batch_size):
    feature,target=get_value(df)
    x_tensor=torch.Tensor(feature).to(device)
    y_tensor=torch.Tensor(target).to(device)

    full_dataset=TensorDataset(x_tensor,y_tensor)
    train_dataset,valid_dataset=random_split(full_dataset,[0.8,0.2])

    train_dataloader=DataLoader(train_dataset,batch_size=cfg.batch_size,shuffle=True)
    valid_dataloader=DataLoader(valid_dataset,batch_size=2*cfg.batch_size)

    return train_dataloader,valid_dataloader

train_data,valid_data=get_dataloaders(df)

SyntaxError: invalid syntax (2421592839.py, line 9)

In [21]:
def accuracy(outputs, labels):
        _, preds = torch.max(outputs, dim=1)
        return torch.tensor(torch.sum(preds == labels).item() / len(preds))

- validation_stepとvalidation_epoch_endとepoch_endは引数にtaskの種類を入れること

In [29]:
class Basic(nn.Module):
    def training_step(self,batch,loss_function):
        feature,target=batch
        out=self(feature)
        loss=loss_function(out,target)
        return loss
    
    def validatio_step(self,batch,loss_function,task):
        feature,target=batch
        out=self(feature)
        loss=loss_function(out,target)
        if task == 'Regression':
            return {'valid_loss':loss.detach() }
        elif task == 'Classification':
            acc=accuracy(out,target)
            return {'val_loss':loss.detach()  , 'val_acc' : acc}

    def validation_epoch_end(self, outputs,task):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean() # Combine losses
        if task=='Regresison':
            return {'val_loss': epoch_loss.item()}
        elif task=='Classification':
            batch_accs = [x['val_acc'] for x in outputs]
            epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
            return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result,task):
        if task =='Regression':
            print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss']))

        elif task=='Classificatioin':
            print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch+1, result['train_loss'], result['val_loss'], result['val_acc']))

- layersでneuralnetworkのレイヤーの数を決めるだけでいい
- NeuralNetworkクラスでBasicを継承している

In [30]:
layers=[64,64,32,32,16,16,8,8]
class NeuralNetwork(Basic):
    def __init__(self):
        super().__init__()
        self.relu_stack=nn.Sequential(
            nn.Linear(len(feature),layers[0]),
            nn.ReLU()
        )

        for i in range(len(layers)-1):
            self.relu_stack.appned(nn.Dropout(0.25))
            self.relu_stack.append(nn.Linear(layers[i],layers[i+1]))
            self.relu_stack.append(nn.ReLU())
        self.relu_stack.append(nn.Linear(layers[-1],1))

    def forward(self,x):
        output=self.relu_stack(x)
        return output
    
    def init_weights(m):
        if isinstance(m, nn.Linear):
            torch.nn.init.kaiming_normal_(m.weight)
            m.bias.data.fill_(0.01)
            if m.out_features == 1:
                torch.nn.init.xavier_normal_(m.weight)

model=NeuralNetwork()
model

NameError: name 'feature' is not defined

In [33]:
@torch.no_grad()
def evaluate(model,valid_data):
    model.val()
    outputs=[model.validation_step(batch) for batch in valid_data]
    return model.validation_epoch_end(outputs)

def fit (model,train_data,valid_data,opt_func=cfg.opt,lr=cfg.lr,epochs=cfg.n_epoch):
    history=[]
    best_accuracy=0.0
    optimizer=opt_func(model.parameters(),lr)
    for epoch in range(epochs):
        #training phase
        model.train()
        train_losses=[]
        for batch in train_data:
            loss=model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()#Batchごとに実行しているので勾配を０にしないといけない
        #validation phase
        result=evaluate(model,valid_data)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)

        if history[epoch]['val_acc']>best_accuracy:
            torch.save(model.state_dict(),'cfg.model_path/best_checkpoint.model')
            best_accuracy=history[epoch['val_acc']]
            print('model is saved !')
    return history


In [31]:
model.apply(init_weights)
model.to(device)


NameError: name 'model' is not defined

In [32]:
history=fit(model,train_data,valid_data,opt_func,lr,epochs)

NameError: name 'model' is not defined

In [None]:
def plot_accuracy(history):
    accuracy=[x['val_acc'] for x in history]
    plt.plot(accuracy, '-')
    plt.xlabel('epoch')
    plt.ylabel('accuracy')
    plt.title('Accuracy vs. No. of epochs')

def plot_losses(history):
    train_losses = [x.get('train_loss') for x in history]
    val_losses = [x['val_loss'] for x in history]
    plt.plot(train_losses,color='red')
    plt.plot(val_losses,color='blue')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['Training', 'Validation'])
    plt.title('Loss vs. No. of epochs')

In [None]:
from torch.autograd import Variable
import torch.nn.functional as F

model.load_state_dict(torch.load('cfg.model_path/best_checkpoint.model'))
test_accuracy=0.0
for i, (feature,target) in enumerate(test_dl):
    if torch.cuda.is_available():
        feature=F(feature.cuda())
        target=Variable(target.cuda())

    outputs=model(feature)
    _,prediction=torch.max(outputs.data,1)
    test_accuracy+=int(torch.sum(prediction==target.data))

test_accuracy=test_accuracy/test_count
print("Test accuracy: ", test_accuracy)