<a href="https://colab.research.google.com/github/syous154/Project/blob/main/Dacon_1_project_fin.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings(action='ignore') 
#from efficientnet_pytorch import EfficientNet
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 디바이스 설정
print(device)

cuda:0


In [None]:
#Hyperparameter Setting
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':10,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':32,
    'SEED':41
}

In [None]:
#Fixed RandomSeed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [None]:
# connect google drive
from google.colab import drive
drive.mount('/content/drive')
!unzip -qq '/content/drive/MyDrive/Colab Notebooks/open.zip'
df = pd.read_csv('/content/train.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
replace sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
def get_labels(df):
    return df.iloc[:,2:].values

In [None]:
df = df.sample(frac=1)
train, test, train_labels, test_labels = train_test_split(df, get_labels(df),test_size=0.2, random_state=42)
val, test, val_labels, test_labels = train_test_split(test, get_labels(test),test_size=0.5, random_state=42)

In [None]:
#CustomDataset
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = torch.FloatTensor(self.label_list[index])
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [None]:
train_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

test_transform = A.Compose([
                            A.Resize(CFG['IMG_SIZE'],CFG['IMG_SIZE']),
                            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, always_apply=False, p=1.0),
                            ToTensorV2()
                            ])

In [None]:
train_dataset = CustomDataset(train['img_path'].values, train_labels, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val_labels, test_transform)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
len(train_labels)

26395

Model Define


In [None]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=10):
        super(BaseModel, self).__init__()
        self.backbone =  models.convnext_large(pretrained=True)
        self.dropout = torch.nn.Dropout(0.5)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.dropout(x)
        x = F.sigmoid(self.classifier(x))
        return x

Train

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_acc = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(imgs)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_acc = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val ACC : [{_val_acc:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_acc)
            
        if best_val_acc < _val_acc:
            best_val_acc = _val_acc
            best_model = model
    
    return best_model

In [None]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    val_acc = []
    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            probs = model(imgs)
            
            loss = criterion(probs, labels)
            
            probs  = probs.cpu().detach().numpy()
            labels = labels.cpu().detach().numpy()
            preds = probs > 0.5
            batch_acc = (labels == preds).mean()
            
            val_acc.append(batch_acc)
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_acc = np.mean(val_acc)
    
    return _val_loss, _val_acc

Run!!

In [None]:
model = BaseModel()
model.to(device)
model.eval()

optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.18634] Val Loss : [0.07915] Val ACC : [0.97013]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.06461] Val Loss : [0.04630] Val ACC : [0.98275]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.04499] Val Loss : [0.04349] Val ACC : [0.98434]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.03481] Val Loss : [0.04834] Val ACC : [0.98368]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.02916] Val Loss : [0.04157] Val ACC : [0.98573]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.02479] Val Loss : [0.04334] Val ACC : [0.98570]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.02348] Val Loss : [0.04208] Val ACC : [0.98639]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.02088] Val Loss : [0.05418] Val ACC : [0.98516]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.01958] Val Loss : [0.04355] Val ACC : [0.98723]


  0%|          | 0/825 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.01780] Val Loss : [0.04479] Val ACC : [0.98654]


Inference

In [None]:
test = pd.read_csv('/content/test.csv')
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
        #for imgs,labels in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
          
            probs = model(imgs)

            probs  = probs.cpu().detach().numpy()
            preds = probs > 0.5
            preds = preds.astype(int)
            predictions += preds.tolist()
    return predictions

In [None]:
preds = inference(model, test_loader, device)

  0%|          | 0/46 [00:00<?, ?it/s]

In [None]:
#accuracy_score(test_labels, preds)

ValueError: ignored

In [None]:
#list=[]
#for idx in range(len(preds)):
 #   if (preds[idx] == test_labels[idx]).all() :
  #   list.append(1)
   # else :
    #  list.append(0)
#list=np.array(list)
#acc = list.sum() / len(list)
#print(acc)

  0%|          | 0/12 [00:00<?, ?it/s]

TypeError: ignored

Submission


In [None]:
submit = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/shootingStar_submission.csv')

In [None]:
submit.iloc[:,1:] = preds
submit.head()

Unnamed: 0.1,Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,TEST_00000,1,1,0,0,0,0,1,0,1,0
1,TEST_00001,1,1,0,0,0,1,0,0,0,0
2,TEST_00002,1,0,0,0,1,1,0,1,0,1
3,TEST_00003,1,0,0,0,1,1,0,1,1,0
4,TEST_00004,0,0,0,0,1,1,0,0,0,0


In [None]:
submit.to_csv('/content/drive/MyDrive/Colab Notebooks/shootingStar_submission.csv', index=False)

In [None]:
df_re = pd.read_csv('/content/train.csv')

In [None]:
train, test, train_labels, test_labels = train_test_split(df_re, get_labels(df_re),test_size=0.2, random_state=42)

train1, train2, train1_labels, train2_labels = train_test_split(train, get_labels(train),test_size=0.2, random_state=42)

val, test, val_labels, test_labels = train_test_split(test, get_labels(test),test_size=0.5, random_state=42)

In [None]:
train_dataset = CustomDataset(train['img_path'].values, train_labels, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

train1_dataset = CustomDataset(train1['img_path'].values, train1_labels, train_transform)
train1_loader = DataLoader(train1_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

train2_dataset = CustomDataset(train2['img_path'].values, train2_labels, train_transform)
train2_loader = DataLoader(train2_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val_labels, test_transform)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def train_trans(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_acc = 0
    best_model = None
    
    for epoch in range(1, 5+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(imgs)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_acc = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val ACC : [{_val_acc:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_acc)
            
        if best_val_acc < _val_acc:
            best_val_acc = _val_acc
            best_model = model
    
    return best_model

In [None]:
infer_model = train_trans(model, optimizer, train1_loader, val_loader, scheduler, device)

  0%|          | 0/660 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.02440] Val Loss : [0.01493] Val ACC : [0.99528]


  0%|          | 0/660 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.01452] Val Loss : [0.01516] Val ACC : [0.99522]


  0%|          | 0/660 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.01555] Val Loss : [0.02629] Val ACC : [0.99172]


  0%|          | 0/660 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.01659] Val Loss : [0.01696] Val ACC : [0.99477]
Epoch 00014: reducing learning rate of group 0 to 1.5000e-04.


  0%|          | 0/660 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.00513] Val Loss : [0.01369] Val ACC : [0.99606]


  0%|          | 0/660 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
infer_model = train_trans(model, optimizer, train2_loader, val_loader, scheduler, device)

  0%|          | 0/165 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.01587] Val Loss : [0.01047] Val ACC : [0.99627]


  0%|          | 0/165 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.00611] Val Loss : [0.01304] Val ACC : [0.99615]


  0%|          | 0/165 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.00598] Val Loss : [0.01364] Val ACC : [0.99621]


  0%|          | 0/165 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.00188] Val Loss : [0.01507] Val ACC : [0.99618]
Epoch 00019: reducing learning rate of group 0 to 7.5000e-05.


  0%|          | 0/165 [00:00<?, ?it/s]

  0%|          | 0/104 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.00172] Val Loss : [0.01422] Val ACC : [0.99657]


In [None]:
test = pd.read_csv('/content/test.csv')
test_dataset = CustomDataset(test['img_path'].values, None, test_transform)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for imgs in tqdm(iter(test_loader)):
            imgs = imgs.float().to(device)
            
            probs = model(imgs)

            probs  = probs.cpu().detach().numpy()
            preds = probs > 0.5
            preds = preds.astype(int)
            predictions += preds.tolist()
    return predictions

In [None]:
preds_re = inference(model, test_loader, device)

  0%|          | 0/46 [00:00<?, ?it/s]

In [None]:
submit = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/shootingStar_submission.csv')

In [None]:
submit.iloc[:,1:] = preds
submit.head()

Unnamed: 0.1,Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,TEST_00000,1,1,0,0,0,0,1,0,1,0
1,TEST_00001,1,1,0,0,0,1,0,0,0,0
2,TEST_00002,1,0,0,0,1,1,0,1,0,1
3,TEST_00003,1,0,0,0,1,1,0,1,1,0
4,TEST_00004,0,0,0,0,1,1,0,0,0,0


In [None]:
submit.to_csv('/content/drive/MyDrive/Colab Notebooks/shootingStar_submission.csv', index=False)