In [None]:
import torch
import cv2
import os
import torch.nn as tnn
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import seaborn as sns

from PIL import Image
from skimage import io, transform
from torchvision.transforms import transforms
from torchvision import utils
from torchvision import datasets
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MultiLabelBinarizer
from collections import Counter

In [None]:
IMAGE_PATH = "../input/plant-pathology-2021-fgvc8/train_images/"
TEST_IMG_PATH = "../input/plant-pathology-2021-fgvc8/test_images/"
TRAIN_PATH = "../input/plant-pathology-2021-fgvc8/train.csv"
SUB_PATH = "../input/plant-pathology-2021-fgvc8/sample_submission.csv"

In [None]:
train_labels = pd.read_csv(TRAIN_PATH)
train_labels

In [None]:
train_labels['labels'].unique()

In [None]:
plt.figure(figsize=(18,12))
plt.title("Phân phối số lượng ảnh trong các nhãn",size= 25)
plt.ylabel("Số lượng ảnh", size=20);
plt.xlabel("Nhãn", size=20);
labels = sns.barplot(train_labels.labels.value_counts().index,train_labels.labels.value_counts())
for item in labels.get_xticklabels():
    item.set_rotation(45)
plt.savefig('plot.png')

In [None]:
mlb = MultiLabelBinarizer().fit(train_labels.labels.apply(lambda x : x.split()))
labels = pd.DataFrame(mlb.transform(train_labels.labels.apply(lambda x : x.split())), columns = mlb.classes_)

labels = pd.concat([train_labels['image'], labels], axis=1)
labels.head()

In [None]:
data = ['1','2','3']
value = labels.iloc[:,1:].sum(axis=1).value_counts().values
colors = ['mediumturquoise', 'burlywood','sandybrown']
plt.figure(figsize=(8, 8))
plt.bar(data, value, color = colors)
plt.title('Ảnh có nhiều nhãn',fontsize = 14)
plt.xlabel('Số nhãn',fontsize = 12)
plt.ylabel('Số lượng ảnh',fontsize = 12)
plt.savefig('plot2.png')
plt.show()

In [None]:
img_name = labels.iloc[:,0].tolist()
hs = []
ws = []
for i in range(len(img_name)):
        img = Image.open(IMAGE_PATH+(img_name[i]))
        h, w = img.size
        hs.append(h)
        ws.append(w)

In [None]:
labels, values = zip(*Counter(hs).items())

indexes = np.arange(len(labels))
width = 1

plt.bar(indexes, values, width)
plt.xticks(indexes + width * 0.5, labels)
plt.savefig('plot4.png')
plt.show()

In [None]:
labels, values = zip(*Counter(ws).items())

indexes = np.arange(len(labels))
width = 1

plt.bar(indexes, values, width)
plt.xticks(indexes + width * 0.5, labels)
plt.savefig('plot5.png')
plt.show()

In [None]:
labels = pd.DataFrame(mlb.transform(train_labels.labels.apply(lambda x : x.split())), columns = mlb.classes_)

labels = pd.concat([train_labels['image'], labels], axis=1)

In [None]:
def visualize_batch(path,image_ids, labels):
    plt.figure(figsize=(16, 12))
    
    for ind, (image_id, label) in enumerate(zip(image_ids, labels)):
        plt.subplot(3, 3, ind + 1)
        image = cv2.imread(os.path.join(path, image_id))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        plt.imshow(image)
        plt.title(f"Class: {label}", fontsize=12)
        plt.axis("off")
        plt.savefig('plot3.png')
    plt.show()

In [None]:
img_s = train_labels.sample(9)
image_ids = img_s["image"].values
labels_s = img_s["labels"].values
visualize_batch(IMAGE_PATH,image_ids,labels_s)

In [None]:
l_complex = labels.loc[labels['complex'] == 1].iloc[:,0].tolist()
frog_eye_leaf_spot = labels.loc[labels['frog_eye_leaf_spot'] == 1].iloc[:,0].tolist()
healthy = labels.loc[labels['healthy'] == 1].iloc[:,0].tolist()
powdery_mildew = labels.loc[labels['powdery_mildew'] == 1].iloc[:,0].tolist()
rust = labels.loc[labels['rust'] == 1].iloc[:,0].tolist()
scab = labels.loc[labels['scab'] == 1].iloc[:,0].tolist()

In [None]:
fig = plt.figure(figsize=(12, 12))
for i in range(0,9):
        img_array = np.array(Image.open(IMAGE_PATH +healthy[i]))
        fig.add_subplot(3, 3, i+1) 
        plt.imshow(img_array)

In [None]:
fig = plt.figure(figsize=(12, 12))
for i in range(0,9):
        img_array = np.array(Image.open(IMAGE_PATH +l_complex[i]))
        fig.add_subplot(3, 3, i+1) 
        plt.imshow(img_array)

In [None]:
fig = plt.figure(figsize=(12, 12))
for i in range(0,9):
        img_array = np.array(Image.open(IMAGE_PATH +frog_eye_leaf_spot[i]))
        fig.add_subplot(3, 3, i+1) 
        plt.imshow(img_array)

In [None]:
fig = plt.figure(figsize=(12, 12))
for i in range(0,9):
        img_array = np.array(Image.open(IMAGE_PATH +scab[i]))
        fig.add_subplot(3, 3, i+1) 
        plt.imshow(img_array)

In [None]:
fig = plt.figure(figsize=(12, 12))
for i in range(0,9):
        img_array = np.array(Image.open(IMAGE_PATH +powdery_mildew[i]))
        fig.add_subplot(3, 3, i+1) 
        plt.imshow(img_array)

In [None]:
fig = plt.figure(figsize=(12, 12))
for i in range(0,9):
        img_array = np.array(Image.open(IMAGE_PATH +rust[i]))
        fig.add_subplot(3, 3, i+1) 
        plt.imshow(img_array)