In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
'''
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
'''
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#load train.csv

train_df = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
train_df.head(5)

In [None]:
#문자 라벨을 숫자 라벨로 변환
#make string label to numeric label
from sklearn.preprocessing import LabelEncoder

label = LabelEncoder()
label.fit(train_df['labels'])
train_df['label_id'] = label.transform(train_df['labels'])
label_dic = dict(sorted(train_df[['label_id', 'labels']].values.tolist())) #save for submission
print(label_dic)
classes = len(train_df['labels'].value_counts()) #12

del train_df['labels'] #we don't need this Series anymore

image_names = np.array(train_df['image'].values)
image_labels = np.array(train_df['label_id'].values)

print(image_names.shape) #18632

In [None]:
import matplotlib.pyplot as plt
from glob import glob
import cv2, torch
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

#커스텀 데이터셋 설정
#dataset and dataloader for train
class dataset(Dataset) :
    def __init__(self, image_list, image_names, image_labels, transform, device) :
        self.image_list = image_list
        self.image_names = image_names
        self.image_labels = image_labels
        self.transform = transform
    
    def __len__(self) :
        return len(self.image_list)
    
    def __getitem__(self, index) :
        x = cv2.imread(self.image_list[index])
        x = self.transform(x).to(device)
        
        image_name = image_list[index][49:]
        y = self.image_labels[np.where(image_names == image_name)]
        y = torch.LongTensor([y,]).to(device)
        
        return x, y


#load train_images
image_list = glob('../input/plant-pathology-2021-fgvc8/train_images/*.jpg')


train_data = dataset(image_list, image_names, image_labels, transform, device)
train_data = DataLoader(train_data, batch_size = 32, shuffle = True)

In [None]:
#! pip install torchsummaryX

In [None]:
import torch.nn as nn
from torch.optim import Adam
from torchvision.models import resnet34
#from torchsummaryX import summary

class resnet(nn.Module) :
    def __init__(self, output) :
        super().__init__()
        self.model = resnet34(pretrained=False) #use ResNet
        self.model.fc = torch.nn.Linear(512, output) #change the last FC layer
    def forward(self, x) :
        output = self.model(x)
        
        return output

model = resnet(classes).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)

#summary(model, torch.rand((1, 3, 224, 224)).float().to(device))

In [None]:
#load model that I trained on local
model.load_state_dict(torch.load('../input/modelpt/model.pt'))

In [None]:
'''
n_epoch = 10
torch.cuda.empty_cache()

model.train()
print('batchs : ', len(train_data))

for epoch in range(n_epoch) :
    epoch_loss = 0
    epoch_acc = 0
    for i, (x, y) in tqdm(enumerate(train_data)) :
        y = y.reshape(-1)
        
        predict = model(x)
        loss = criterion(predict, y)
        
        epoch_loss += loss / len(train_data)
        correct_prediction = torch.argmax(predict, 1) == y
        correct_prediction = correct_prediction.sum()
        epoch_acc += correct_prediction
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    epoch_acc = epoch_acc / (32 * len(train_data))
    print('Epoch : {}/{},   loss : {:.5f},    acc : {:.5f}'.format(epoch+1, n_epoch, epoch_loss, epoch_acc))
    
    if epoch_acc > 0.98 : break
        
#계속 죽으니까 10번씩 돌리고 저장
#save model to continue learning
torch.save(model.state_dict(), 'model.pt')
'''

In [None]:
valid_image_list = glob('../input/plant-pathology-2021-fgvc8/test_images/*.jpg')


#TEN_CROP
valid_transform = transforms.Compose([
      transforms.ToTensor(),
      transforms.Resize(256),
      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
      transforms.TenCrop([224, 224])])

model.eval()
predict_list = []
image_name_list = []
for i, image in tqdm(enumerate(valid_image_list)) :
    image_name = image[48:]
    
    img = cv2.imread(image)
    img = valid_transform(img)
    
    result_list = torch.FloatTensor(np.zeros((classes))).to(device)
    for j, x in enumerate(img) :
        x = x.to(device)
        x = x.reshape(-1, 3, 224, 224)
        predict = model(x)
        predict = predict.reshape(-1)
        result_list += predict
    
    predict_list.append(torch.argmax(result_list).item())
    image_name_list.append(image_name)
    
predict_list = np.array(predict_list)
image_name_list = np.array(image_name_list)
print(image_name_list)

submission_df = pd.DataFrame()
submission_df['image'] = image_name_list
submission_df['label_id'] = predict_list
submission_df['labels'] = submission_df['label_id'].map(label_dic)
del submission_df['label_id']
submission_df.head()

In [None]:
submission_df.to_csv("submission.csv", index = False)