<a href="https://colab.research.google.com/github/raffiainuls/Rice-Leaf-Disease-Classification-CNN/blob/main/PrePocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


PRE - PROCESSING

In [2]:
import pandas as pd
import numpy as np
import os

import cv2

import albumentations as albu
from albumentations import Compose, ShiftScaleRotate, Resize
from albumentations.pytorch import ToTensor


from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

from sklearn.metrics import confusion_matrix
import itertools
from sklearn.metrics import classification_report

import shutil

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
IMAGE_CHANNELS = 3

In [4]:
os.listdir('/content/drive/MyDrive/Colab Notebooks/Pembelajaran Mesin/ML_TugasKelompok/rice_leaf_diseases')

['Bacterial leaf blight', 'Brown spot', 'Leaf smut']

In [5]:
leaf_smut_list = \
os.listdir('/content/drive/MyDrive/Colab Notebooks/Pembelajaran Mesin/ML_TugasKelompok/rice_leaf_diseases/Leaf smut')
brown_spot_list = \
os.listdir('/content/drive/MyDrive/Colab Notebooks/Pembelajaran Mesin/ML_TugasKelompok/rice_leaf_diseases/Brown spot')
bacterial_leaf_blight_list = \
os.listdir('/content/drive/MyDrive/Colab Notebooks/Pembelajaran Mesin/ML_TugasKelompok/rice_leaf_diseases/Bacterial leaf blight')

print(len(leaf_smut_list))
print(len(brown_spot_list))
print(len(bacterial_leaf_blight_list))

40
40
40


SPLITTING DATA

In [28]:
# Create val sets

df_leaf_smut = pd.DataFrame(leaf_smut_list, columns=['image'])
df_leaf_smut['target'] = 'leaf_smut'

df_brown_spot = pd.DataFrame(brown_spot_list, columns=['image'])
df_brown_spot['target'] = 'brown_spot'

df_bacterial_leaf_blight = pd.DataFrame(bacterial_leaf_blight_list, columns=['image'])
df_bacterial_leaf_blight['target'] = 'bacterial_leaf_blight'


# Create a val set for each class
# Sample 5 validation images from each class
df_leaf_smut_val = df_leaf_smut.sample(n=6, random_state=101)
df_brown_spot_val = df_brown_spot.sample(n=6, random_state=101)
df_bacterial_leaf_blight_val = df_bacterial_leaf_blight.sample(n=6, random_state=101)


print("Data Val")
print(len(df_leaf_smut_val))
print(len(df_brown_spot_val))
print(len(df_bacterial_leaf_blight_val))

Data Val
6
6
6


In [29]:
# Create the train set for each class

# leaf_smut
# get a list of val images
val_list = list(df_leaf_smut_val['image'])
# filter out the val images
df_leaf_smut_train = df_leaf_smut[~df_leaf_smut['image'].isin(val_list)] # ~ means notin

# brown_spot
# get a list of val images
val_list = list(df_brown_spot_val['image'])
# filter out the val images
df_brown_spot_train = df_brown_spot[~df_brown_spot['image'].isin(val_list)] # ~ means notin

# bacterial_leaf_blight
# get a list of val images
val_list = list(df_bacterial_leaf_blight_val['image'])
# filter out the val images
df_bacterial_leaf_blight_train = \
df_bacterial_leaf_blight[~df_bacterial_leaf_blight['image'].isin(val_list)] # ~ means notin


print("Data Train")
print(len(df_leaf_smut_train))
print(len(df_brown_spot_train))
print(len(df_bacterial_leaf_blight_train))

Data Train
34
34
34


In [30]:
# Create df_data, df_train and df_val

df_data = pd.concat([df_leaf_smut, df_brown_spot, df_bacterial_leaf_blight], axis=0).reset_index(drop=True)

df_train = \
pd.concat([df_leaf_smut_train, df_brown_spot_train, df_bacterial_leaf_blight_train], axis=0).reset_index(drop=True)

df_val = \
pd.concat([df_leaf_smut_val, df_brown_spot_val, df_bacterial_leaf_blight_val], axis=0).reset_index(drop=True)

df_data = shuffle(df_data)
df_train = shuffle(df_train)
df_val = shuffle(df_val)

print(df_data.shape)
print(df_train.shape)
print(df_val.shape)

(120, 2)
(102, 2)
(18, 2)


In [31]:
df_data['target'].value_counts()

leaf_smut                40
bacterial_leaf_blight    40
brown_spot               40
Name: target, dtype: int64

In [32]:
df_train['target'].value_counts()

leaf_smut                34
bacterial_leaf_blight    34
brown_spot               34
Name: target, dtype: int64

In [33]:
df_val['target'].value_counts()

bacterial_leaf_blight    6
brown_spot               6
leaf_smut                6
Name: target, dtype: int64

In [34]:
# Create the target as index values

# combine val, train and test
val_len = len(df_val)
train_len = len(df_train)
df_combined =  pd.concat(objs=[df_val, df_train], axis=0).reset_index(drop=True)

# create the dummy variables
df_combined = pd.get_dummies(df_combined, columns=['target'])

# separate the train and val sets
df_val = df_combined[:val_len]
df_train = df_combined[val_len:]


print(df_train.shape)
print(df_val.shape)

(102, 4)
(18, 4)


In [35]:
df_combined.head()

Unnamed: 0,image,target_bacterial_leaf_blight,target_brown_spot,target_leaf_smut
0,DSC_0404.JPG,1,0,0
1,DSC_0394.jpg,0,1,0
2,DSC_0511.jpg,0,0,1
3,DSC_0510.jpg,0,0,1
4,DSC_0398.JPG,1,0,0


In [36]:
df_train.head()

Unnamed: 0,image,target_bacterial_leaf_blight,target_brown_spot,target_leaf_smut
18,DSC_0391.jpg,0,1,0
19,DSC_0309.JPG,0,0,1
20,DSC_0376.JPG,1,0,0
21,DSC_0405.JPG,1,0,0
22,DSC_0310.JPG,0,0,1


In [37]:
df_val.head()

Unnamed: 0,image,target_bacterial_leaf_blight,target_brown_spot,target_leaf_smut
0,DSC_0404.JPG,1,0,0
1,DSC_0394.jpg,0,1,0
2,DSC_0511.jpg,0,0,1
3,DSC_0510.jpg,0,0,1
4,DSC_0398.JPG,1,0,0


SAVE DATAFRAME SEBAGAI CSV

In [38]:
df_combined.to_csv('df_combined.csv.gz', compression='gzip', index=False)

df_train.to_csv('df_train.csv.gz', compression='gzip', index=False)
df_val.to_csv('df_val.csv.gz', compression='gzip', index=False)