## ✔️ 필요한 모듈 Install

In [None]:
# !pip install torch==1.10.1+cu111 torchvision==0.11.2+cu111 torchaudio==0.10.1 -f https://download.pytorch.org/whl/torch_stable.html
!pip install timm # install pytorch image models
!pip install torchmetrics

## ✔️ 필요한 Libray Import

In [None]:
import torch
import os
import pandas as pd
import numpy as np
import random 

import albumentations as A
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold
import timm

import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision.models as models
import torch.nn.functional as F
from torch import nn
import torchmetrics 
from torch.nn.modules.loss import _Loss
from  torch.cuda.amp import autocast, GradScaler

## ✔️ Model Configure
### ➿ 사용 모델 - efficientnet_b3
-----
* **Train batch 사이즈 - 8**
* **validation batch 사이즈 - 32**
* **사용 모델 - efficientnet_b3**
* **Learning rate - 3e-5**
* **Epoch - 20**

In [None]:
class GlobalConstantsConfigure():
    def __init__(self):
        self.continue_training = True
        self.last_model = '../input/sorghum-100-cultivar-identification/resnext50d_32x4d_60_last.pt' 
        self.num_epochs_done = 40
        self.seed = 127 # 107
        self.fold = 1
        self.num_folds = 4
        self.num_classes = 100
        self.biggest_loss = 999
        self.training_size_rate = 0.8
        self.training_dir = '../input/sorghum-id-fgvc-9/train_images'
        self.model_name = 'efficientnet_b3'
        self.model_path = './rexnet_150.pt'
        self.image_size = 512
        self.batch_size = 8
        self.val_batch_size = 32
        self.lr = 3e-5 # 3e-5
        self.num_epochs = 1
        self.steps_per_decay = 5
        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.num_workers = 1  # if torch.cuda.is_available() else 4
        
gcc = GlobalConstantsConfigure()

In [None]:
def set_seed(seed) : 
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
set_seed(gcc.seed)

### ✔️ Train DataSet 확인 & Nan 값 제거

In [None]:
df_all = pd.read_csv('../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv')
print(len(df_all))

df_all.dropna(inplace=True)
print(len(df_all))

df_all.head()

## ✔️ Train DataSet 전처리 & 모델 입력값 수정

In [None]:
### class 개수 확인 & 유일한 값 추출
unique_cultivars = list(df_all["cultivar"].unique())

In [None]:
# Train DataSet 파일 불러오기 & DataFrame에 저장
df_all["file_path"] = df_all["image"].apply(lambda image: '../input/sorghum-id-fgvc-9/train_images/' + image)

# 본래 Train DataSet 파일 속 class 이름 속 인덱스 저장
# 유일한 값에 해당 & 유일한 값의 인덱스 저장
df_all["cultivar_index"] = df_all["cultivar"].map(lambda item: unique_cultivars.index(item))

# 파일 이름 중 존재 하는 경우 저장
df_all["is_exist"] = df_all["file_path"].apply(lambda file_path: os.path.exists(file_path))

# 존재하는 파일만 저장하기 때문에 존재 여부 True
df_all = df_all[df_all.is_exist==True]

# Train DataSet Head 추출
df_all.head()

## ✔️ Train DataSet & Validation DataSet 분리
-----
* **KFold 방법론 사용**
* **KFold 횟수 - 4**

In [None]:
# 총 KFold 수 - 4번
skf = StratifiedKFold(n_splits=gcc.num_folds, shuffle=True, random_state=gcc.seed)

# DataSet 담을 List 선언
train_folds = []
val_folds = []

for train_idx, valid_idx in skf.split(df_all['image'], df_all["cultivar_index"]):
    train_folds.append(train_idx)
    val_folds.append(valid_idx)

df_train = df_all.iloc[train_folds[gcc.fold]]
df_valid = df_all.iloc[val_folds[gcc.fold]]

print(f"train size: {len(df_train)}")
print(f"valid size: {len(df_valid)}")

print(df_train.cultivar.value_counts())
print(df_valid.cultivar.value_counts())

## ✔️ Image 전처리
-----
* **Random Box를 이용하여 이미지를 자름**
* **랜덤으로 추출한 Postion을 이용하여 해당 위치부터 Random 박스 이용하여 자름**
* **본래 가지고 있던 Image 사이즈를 원하는 사이즈인 512로 바꾸기 위한 과정**

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 20,40

# Random 박스 생성 & 결국에는 새로운 Image 생성
# 다른 전처리를 하지는 않고 자르기만 진행
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int64(W * cut_rat)
    cut_h = np.int64(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    return bbx1, bby1, bbx2, bby2


# 자르기 위한 전처리 과정
def cutmix(data, target, alpha):
    
    # 정수로 구성된 난수를 생성 -> Data 사이즈 범위 내에서 생성
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_target = target[indices]

    lam = np.clip(np.random.beta(alpha, alpha),0.3,0.4)
    bbx1, bby1, bbx2, bby2 = rand_bbox(data.size(), lam)
    
    # 이미지 요소를 그대로 복제
    new_data = data.clone()
    new_data[:, :, bby1:bby2, bbx1:bbx2] = data[indices, :, bby1:bby2, bbx1:bbx2]
    
    # rand_bbox에서 나온 결과값을 Data의 픽셀과 비율을 정확하게 매치 시키기 위한 과정 수행
    lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (data.size()[-1] * data.size()[-2]))
    targets = (target, shuffled_target, lam)

    return new_data, targets

### ✔️ 불러온 DataSet을 전처리하기 전 사용하는 함수 & 클래스 정의

In [None]:
class SorghumDataset(Dataset):
    def __init__(self, dirs, labels, transformation=None):
        super(SorghumDataset,self).__init__()
        self.dirs = dirs
        self.labels = labels
        self.transformation = transformation
    def __len__(self):
        return len(self.dirs)

    def __getitem__(self, index):
        image = cv2.imread(self.dirs[index])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = self.labels[index] # need to one hot encoding here
        
        image = np.array(image)

        if self.transformation:
            aug_image = self.transformation(image=image)
            image = aug_image['image']
        
        # 픽셀 값을 255로 나누어 정규화 진행
        image = image / 255.
        image = image.transpose((2, 0, 1))
        
        image = torch.from_numpy(image).type(torch.float32)
        image = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(image)
        
        labels = torch.from_numpy(np.array(self.labels[index])).type(torch.float32)

        return image, labels

In [None]:
training_transformation = A.Compose([
    A.Resize(width=gcc.image_size, height=gcc.image_size, p=1.0),
    A.Flip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    A.HueSaturationValue(p=0.5),
])

validation_transformation = A.Compose([
    A.Resize(width=gcc.image_size, height=gcc.image_size, p=1.0)
])

## ✔️ Train & Validation DataSet 로드
-----
* **Train batch size : 8**
* **Validation batch size : 32**
* **Suffle : True**

In [None]:
training_set = SorghumDataset(df_train.file_path.values, df_train.cultivar_index.values, training_transformation)
validation_set = SorghumDataset(df_valid.file_path.values, df_valid.cultivar_index.values, validation_transformation)

training_dataloader = DataLoader(
    training_set,
    batch_size = gcc.batch_size,
    shuffle = True,
    num_workers = gcc.num_workers,
    pin_memory = True, 
    drop_last = True
)

validation_dataloader = DataLoader(
    validation_set,
    batch_size = gcc.val_batch_size,
    shuffle = True,
    num_workers = gcc.num_workers,
    pin_memory = True,
    drop_last = True
)

## ✔️ 사용하고자 하는 모델을 원하는 모델로 재정의

In [None]:
class CustomModel(torch.nn.Module): 
    def __init__(self, model_backbone):
        super(CustomModel,self).__init__()
        self.model = model_backbone
        self.num_in_features = self.model.get_classifier().in_features
        print(self.num_in_features)
        
        # 여러 클래스를 구분하기 위해서 Sequential 사용
        # 활성 함수 : ReLU
        # 과적합 방지를 위한 Dropout : 0.5 설정
        self.model.classifier = nn.Sequential(
            nn.BatchNorm1d(self.num_in_features),
            nn.Linear(self.num_in_features, 512),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True),
            nn.Linear(512, gcc.num_classes),
        )

    def forward(self,x):
        x = self.model(x)
        return x

In [None]:
def to_one_hot(labels, num_classes, dtype=torch.float, dim=1):
    if labels.ndim < dim + 1:
        shape = list(labels.shape) + [1] * (dim + 1 - len(labels.shape))
        labels = torch.reshape(labels, shape)
    sh = list(labels.shape)
    sh[dim] = num_classes
    o = torch.zeros(size=sh, dtype=dtype, device=labels.device)
    labels = o.scatter_(dim=dim, index=labels.long(), value=1)
    return labels

class PolyLoss(_Loss):
    def __init__(self, softmax, ce_weight=None, reduction='mean', epsilon=1.0):
        super().__init__()
        self.softmax = softmax
        self.reduction = reduction
        self.epsilon = epsilon
        self.cross_entropy = nn.CrossEntropyLoss(weight=ce_weight, reduction='none')

    def forward(self, input, target):

        if len(input.shape) - len(target.shape) == 1:
            target = target.unsqueeze(1).long()
        n_pred_ch, n_target_ch = input.shape[1], target.shape[1]
        if n_pred_ch != n_target_ch:
            self.ce_loss = self.cross_entropy(input, torch.squeeze(target, dim=1).long())
            target = to_one_hot(target, num_classes=n_pred_ch)
        else:
            self.ce_loss = self.cross_entropy(input, torch.argmax(target, dim=1))

        if self.softmax:
            input = torch.softmax(input, 1)

        pt = (input * target).sum(dim=1) 
        
        poly_loss = self.ce_loss + self.epsilon * (1 - pt)

        polyl = torch.mean(poly_loss)  # the batch and channel average
        # polyl = torch.sum(poly_loss)  # sum over the batch and channel dims
        return (polyl)

## ✔️ 학습에 필요한 함수들 선언
-----
* **Loss 함수 : CrossEntropyLoss**
* **Optimizer 함수 : Adam**
* **스케줄러 : CosineAnnealingWarmRestarts**

In [None]:
# 사용하고자 하는 모델 불러와서 새롭게 재정의
backbone = timm.create_model(gcc.model_name,pretrained=True)

scaler = GradScaler()   
# 재정의 할 클래스에 입력
model = CustomModel(backbone)

loss_func = torch.nn.CrossEntropyLoss().to(gcc.device)
metrics_acc = torchmetrics.Accuracy(threshold=0.0, num_classes = gcc.num_classes)

trainable_parameters = [param for param in model.parameters() if param.requires_grad == True]
optimizer = torch.optim.Adam(trainable_parameters, lr = gcc.lr)

lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=5, T_mult=2)

model.to(gcc.device)

print('load model done')

## ✔️ Image Data 분석
------
* **Color Image로 구성되어있어서 tensor[0]에 3가지(R,G,B)의 이미지 정보가 들어있음**
* **tensor[1]에는 label이 함께 포함이 됨**

In [None]:
training_set[0]

## ✔️ Model 학습 진행
------
* **학습을 진행하면서 주석처리 부분을 이용하여 사용되는 Data 확인**

In [None]:
import torchvision.transforms as T
from torchvision.transforms.functional import to_pil_image

def training_progress(training_dataloader, loss_func, scheduler):
    model.train()
    training_loss = 0
    training_acc = 0
    cnt = 0 
    print('Learning rate: ',scheduler.get_last_lr())
    print(scheduler.state_dict())
    training_loader = tqdm(training_dataloader, desc='Iterating through the training set')
    for image, label in training_loader:
        image = image.to(gcc.device).float()
        label = label.to(gcc.device).long()

#         plt.figure()
#         plt.subplot(1,2,1)
#         plt.imshow(to_pil_image(image[0]), cmap='gray')
#         plt.title('train')

        mix_decision = np.random.rand()
        if mix_decision < 0.25:
            image, label = cutmix(image, label, 1.)

        with autocast():
            output = model(image.float())

            if mix_decision < 0.25:

                acc = metrics_acc(output, label[0].cpu().argmax(1)) * label[2].cpu().argmax(1) + metric_acc(output, label[1].cpu().argmax(1)) * (1. - label[2].cpu().argmax(1)) # metrics_acc(output.cpu().argmax(1), label.cpu().int())
                loss = loss_func(output, label[0]) * label[2] + loss_func(output, label[1]) * (1. - label[2])
                
            else:
                loss = loss_func(output, label)

        # acc = 0
        acc = metrics_acc(output.cpu().argmax(1), label.cpu().int())
        # metrics_acc(output.cpu().argmax(1), label.cpu().int())
        # loss = loss_func(output, label.long())

        training_loss += loss.detach().item()
        training_acc += acc
        cnt +=1 
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
    
    mean_training_loss = training_loss / cnt
    mean_training_acc = training_acc / cnt
    
    return mean_training_loss, mean_training_acc

## ✔️ Model Validation 진행
------
* **정확도가 떨어지는 class를 확인하기 위해 데이터셋의 label과 모델 예측 label을 저장**
* **batch size가 32로 설정이 되어있어, label_list에 32개씩 저장됨**
* **예측값과 정답을 비교하여, 정확도가 떨어지는 class 찾기**

In [None]:
label_list = []

def validation_progress(validation_dataloader, loss_func):
    model.eval()
    validation_loss = 0
    validation_acc = 0
    cnt = 0 
    global label_list
    validation_loader = tqdm(validation_dataloader, desc='Iterating through the validation set')
    with torch.no_grad():
        for image, label in validation_loader:
            image = image.to(gcc.device)
            label = label.to(gcc.device)

            output = model(image)
            loss = loss_func(output, label.long())
            # acc = calc_accuracy(output.cpu(), label.cpu())
            # output.to(gcc.device)
            acc = metrics_acc(output.cpu().argmax(1), label.cpu().int())
            label_list.append(output.cpu().argmax(1))
            label_list.append(label.cpu().int())
            # calculate accuracy here
            validation_loss += loss.detach().item()
            validation_acc += acc
            
            cnt += 1

    mean_validation_loss = validation_loss / cnt
    mean_validation_acc = validation_acc / cnt
    return mean_validation_loss, mean_validation_acc

In [None]:
def training_model(model, training_dataloader, validation_dataloader, loss_func, scheduler):
    training_losses_history, validation_losses_history = [], []
    training_acc_history, validation_acc_history = [], []
    best_loss = gcc.biggest_loss
    for epoch in range(gcc.num_epochs):
        
        training_loss, training_acc = training_progress(training_dataloader, loss_func, scheduler)
        training_losses_history.append(training_loss)
        training_acc_history.append(training_acc)
        
        validation_loss, validation_acc = validation_progress(validation_dataloader, loss_func)
        validation_losses_history.append(validation_loss)
        validation_acc_history.append(validation_acc)
        
        if validation_loss <= best_loss: # sussy baka
            best_loss = validation_loss
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict()
            }, gcc.model_name + '_best.pt')
            # torch.save(model.state_dict(), gcc.model_name + '_best.pt')
        
        if epoch == gcc.num_epochs - 1: # i believe my timing capability
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict()
            }, gcc.model_name + '_' + str(gcc.num_epochs_done + gcc.num_epochs) + '_last.pt')

        print(f'Epoch {epoch + 1}/{gcc.num_epochs} | Training_loss : {training_loss:.3f} | Validation_loss : {validation_loss:.3f}' 
             + f' Training_acc : {training_acc:.3f} | Validation_acc : {validation_acc:.3f}'
             )
    return training_losses_history, validation_losses_history, training_acc_history, validation_acc_history


In [None]:
# training_losses_history, validation_losses_history, training_acc_history, validation_acc_history = training_model(model, training_dataloader, validation_dataloader, loss_func, lr_scheduler)

## ✔️ Model 학습 상황 (Loss, Accuracy)
------
* **Train, Validation Loss는 계속해서 떨어지고 있는 양상을 보임**
* **Accuracy도 계속해서 오르는 추세를 보이지만, 20 epoch 가까이 가게 되면 일정 수준을 유지하거나 떨어지기 시작함**

In [None]:
def plot_loss_history(model_name, train_loss_history, val_loss_history, num_epochs):
    
    x = np.arange(num_epochs)
    fig = plt.figure(figsize=(10, 6))
    plt.plot(x, train_loss_history, label='Train Loss', lw=3)
    plt.plot(x, val_loss_history, label='Validation Loss', lw=3)

    plt.title(f"{model_name}", fontsize=20)
    plt.legend(fontsize=12)
    plt.xlabel("Epoch", fontsize=15)
    plt.ylabel("Loss", fontsize=15)

    plt.show()
    
plot_loss_history(gcc.model_name, training_losses_history, validation_losses_history, gcc.num_epochs)

In [None]:
def plot_acc_history(model_name, train_acc_history, val_acc_history, num_epochs):
    
    x = np.arange(num_epochs)
    fig = plt.figure(figsize=(10, 6))
    plt.plot(x, train_acc_history, label='Training Accuracy', lw=3)
    plt.plot(x, val_acc_history, label='Validation Accuracy', lw=3)

    plt.title(f"{model_name}", fontsize=20)
    plt.legend(fontsize=12)
    plt.xlabel("Epoch", fontsize=15)
    plt.ylabel("Accuracy", fontsize=15)

    plt.show()
    
plot_acc_history(gcc.model_name, training_acc_history, validation_acc_history, gcc.num_epochs)

In [None]:
checkpoint = torch.load(gcc.model_name + '_best.pt')
model.load_state_dict(checkpoint['model_state_dict'])

## ✔️ Model Test
------
* **Test DataSet 개수 : 23639개**

In [None]:
sub["filename"] = sub["filename"].apply(lambda image: '../input/sorghum-id-fgvc-9/test/' + image)
sub["cultivar"] = 0

sub.head()

In [None]:
testing_dataset = SorghumDataset(sub['filename'], sub['cultivar'], validation_transformation)

testing_dataloader = DataLoader(testing_dataset, 
                                batch_size=gcc.val_batch_size, 
                                shuffle=False, 
                                num_workers=gcc.num_workers)

In [None]:
# predictions = np.zeros(len(testing_dataloader))
predictions = []
cnt = 0 
with torch.no_grad():
    for image, label in tqdm(testing_dataloader):
        image = image.to(gcc.device)
        outputs = model(image)
        # print(outputs)
        preds = outputs.detach().cpu()
        predictions.append(preds.argmax(1)) # need optimize here
        # print(predictions)

In [None]:
tmp = predictions[0]
for i in range(len(predictions) - 1):
    tmp = torch.cat((tmp, predictions[i+1]))

In [None]:
# predictions = label_encoder.inverse_transform(tmp)
predictions = [unique_cultivars[pred] for pred in tmp]

In [None]:
sub = pd.read_csv('../input/sorghum-id-fgvc-9/sample_submission.csv')
sub['cultivar'] = predictions
sub.to_csv('submission.csv', index=False)
sub.head()

## ✔️ Model 학습 결과, 정확도가 떨어지는 Class 찾기
------
* **정확도가 떨어지는 class를 찾기 위해 모델 예측값과 정답 비교**
* **label_list = 모델 예측값, 정답**

In [None]:
label_list[:5]

### ❤️ 저장된 값 타입 변경 : Tensor -> DataFrame
------
* **label_list 내부는 Tensor로 저장**

In [None]:
label_list_model = []
label_list_answer = []

for i in range(len(label_list)):
    if i % 2 == 0:
        for j in range(32):
            label_list_model.append((label_list[i].tolist()[j]))

    else :
        for j in range(32):
            label_list_answer.append((label_list[i].tolist())[j])

label_pre = pd.DataFrame({'model_pre':[], 'answer':[]})

label_pre['model_pre'] = label_list_model
label_pre['answer'] = label_list_answer

label_pre

In [None]:
is_answer = []

for i in range(len(label_pre)):
    if label_pre['model_pre'][i] == label_pre['answer'][i]:
        is_answer.append(1)
    else:
        is_answer.append(0)
        
label_pre['is_answer'] = is_answer

label_pre

In [None]:
prediction = {}

for i in range(100):
    prediction[i] = 0

for i in range(1731,):
    if label_pre['is_answer'][i] == 0:
        prediction[label_pre['answer'][i]] = prediction[label_pre['answer'][i]] + 1

print(prediction)

In [None]:
label_pre = pd.read_csv('../input/is-false/label_list.csv')

In [None]:
is_false = pd.DataFrame({'model_pre':[], 'answer':[], 'is_answer' : []})

model_pre = []
answer = []
is_answer = []

for i in range(len(label_pre)):
    if label_pre['is_answer'][i] == 0:
        model_pre.append(label_pre['model_pre'][i])
        answer.append(label_pre['answer'][i])
        is_answer.append(label_pre['is_answer'][i])
        
is_false['model_pre'] = model_pre
is_false['answer'] = answer
is_false['is_answer'] = is_answer

is_false

In [None]:
import seaborn as sns

plt.figure(figsize=[24, 6], dpi=200)
sns.countplot(x=is_false['answer'])
plt.xticks(rotation=60)
plt.show()

In [None]:
# df_all["cultivar_index"] = df_all["cultivar"].map(lambda item: unique_cultivars.index(item))
for i in range(len(df_all)):
    if df_all["cultivar_index"][i] == 6:
        print(df_all['image'][i], df_all["cultivar"][i], i)

In [None]:
class31 = pd.DataFrame({'model_pre':[], 'answer':[], 'is_answer' : []})

model_pre = []
answer = []
is_answer = []

for i in range(len(is_false)):
    if is_false['answer'][i] == 32:
        model_pre.append(is_false['model_pre'][i])
        answer.append(is_false['answer'][i])
        is_answer.append(is_false['is_answer'][i])
        
class31['model_pre'] = model_pre
class31['answer'] = answer
class31['is_answer'] = is_answer

class31

In [None]:
class90 = pd.DataFrame({'model_pre':[], 'answer':[], 'is_answer' : []})

model_pre = []
answer = []
is_answer = []

for i in range(len(is_false)):
    if is_false['answer'][i] == 90:
        model_pre.append(is_false['model_pre'][i])
        answer.append(is_false['answer'][i])
        is_answer.append(is_false['is_answer'][i])
        
class90['model_pre'] = model_pre
class90['answer'] = answer
class90['is_answer'] = is_answer

class90

In [None]:
class72 = pd.DataFrame({'model_pre':[], 'answer':[], 'is_answer' : []})

model_pre = []
answer = []
is_answer = []

for i in range(len(is_false)):
    if is_false['answer'][i] == 72:
        model_pre.append(is_false['model_pre'][i])
        answer.append(is_false['answer'][i])
        is_answer.append(is_false['is_answer'][i])
        
class72['model_pre'] = model_pre
class72['answer'] = answer
class72['is_answer'] = is_answer

class72

In [None]:
import seaborn as sns

plt.figure(figsize=[24, 6], dpi=200)
sns.countplot(x=class72['model_pre'])
plt.xticks(rotation=60)
plt.show()

In [None]:
import seaborn as sns

plt.figure(figsize=[24, 6], dpi=200)
sns.countplot(x=class90['model_pre'])
plt.xticks(rotation=60)
plt.show()

In [None]:
from PIL import Image

dir = '../input/sorghum-id-fgvc-9/train_images'
filename1 = '2017-06-28__10-19-29-505.png'
filename2 = '2017-06-01__11-00-44-942.png'
filename3 = '2017-06-01__11-00-13-909.png'

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[12, 6], dpi=300)

axes[0].imshow(Image.open(os.path.join(dir, filename1)))
axes[1].imshow(Image.open(os.path.join(dir, filename2)))
axes[2].imshow(Image.open(os.path.join(dir, filename3)))

plt.show()

In [None]:
from PIL import Image

dir = '../input/sorghum-id-fgvc-9/train_images'
filename1 = '2017-06-02__18-04-25-964.png'
filename2 = '2017-06-09__11-56-23-546.png'
filename3 = '2017-06-26__11-57-27-185.png'

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[12, 6], dpi=300)

axes[0].imshow(Image.open(os.path.join(dir, filename1)))
axes[1].imshow(Image.open(os.path.join(dir, filename2)))
axes[2].imshow(Image.open(os.path.join(dir, filename3)))

plt.show()

In [None]:
from PIL import Image

dir = '../input/sorghum-id-fgvc-9/train_images'
filename1 = '2017-06-28__11-50-22-395.png'
filename2 = '2017-06-13__11-55-55-043.png'
filename3 = '2017-06-22__13-39-10-652.png'

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[12, 6], dpi=300)

axes[0].imshow(Image.open(os.path.join(dir, filename1)))
axes[1].imshow(Image.open(os.path.join(dir, filename2)))
axes[2].imshow(Image.open(os.path.join(dir, filename3)))

plt.show()

In [None]:
from PIL import Image

dir = '../input/sorghum-id-fgvc-9/train_images'
filename1 = '2017-06-18__13-23-50-617.png'
filename2 = '2017-06-21__12-09-32-052.png'
filename3 = '2017-06-14__13-11-56-857.png'

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[12, 6], dpi=300)

axes[0].imshow(Image.open(os.path.join(dir, filename1)))
axes[1].imshow(Image.open(os.path.join(dir, filename2)))
axes[2].imshow(Image.open(os.path.join(dir, filename3)))

plt.show()

In [None]:
from tensorflow.keras.preprocessing.image import load_img,img_to_array

image  = load_img(os.path.join(dir, filename1))
grayscale = load_img(os.path.join(dir, filename1),color_mode = "grayscale")
gray_array=  img_to_array(grayscale)
image_array = img_to_array(image)

plt.imshow(image)
fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=1,ncols=4, sharey=True, figsize=(24,5))

ax1.hist(image_array[:,:,0].ravel(),256,[0,256],color='red')
plt.ylim(0,20000)
ax2.hist(image_array[:,:,1].ravel(),256,[0,256], color='green')
ax3.hist(image_array[:,:,1].ravel(),256,[0,256], color='blue')
ax4.hist(gray_array.ravel(),256,[0,256])
plt.show()

In [None]:
filename2 = '2017-06-28__10-19-29-505.png'

from tensorflow.keras.preprocessing.image import load_img,img_to_array

image  = load_img(os.path.join(dir, filename2))
grayscale = load_img(os.path.join(dir, filename2),color_mode = "grayscale")
gray_array=  img_to_array(grayscale)
image_array = img_to_array(image)

plt.imshow(image)
fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=1,ncols=4, sharey=True, figsize=(24,5))

ax1.hist(image_array[:,:,0].ravel(),256,[0,256],color='red')
plt.ylim(0,20000)
ax2.hist(image_array[:,:,1].ravel(),256,[0,256], color='green')
ax3.hist(image_array[:,:,1].ravel(),256,[0,256], color='blue')
ax4.hist(gray_array.ravel(),256,[0,256])
plt.show()