# Prerequisites

In [21]:
from google.colab import drive
import os
import pandas as pd
import cv2
import numpy as np

In [2]:
# Mounting Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


# Exploring dataset

## Getting healthy images info

In [3]:
# Defining variables
healthy_imgs_path = 'drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/healthy'
healthy_imgs_paths = []
healthy_category = []
healthy_imgs_dimensions = []

# Reading healthy images and saving their information
for filename in os.listdir(healthy_imgs_path):
  healthy_img_path = os.path.join(healthy_imgs_path, filename)
  healthy_imgs_paths.append(healthy_img_path)
  healthy_category.append('healthy')
  healthy_img = cv2.imread(healthy_img_path)
  healthy_imgs_dimensions.append(healthy_img.shape)

df_healthy = pd.DataFrame({'Filename': healthy_imgs_paths, 'Dimension': healthy_imgs_dimensions, 'Category': healthy_category})
df_healthy

Unnamed: 0,Filename,Dimension,Category
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy
...,...,...,...
1012,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy
1013,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy
1014,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy
1015,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy


In [4]:
# Checking healthy images dimensions to see if all of them have the same dimensions
df_healthy['Dimension'].value_counts()

(256, 256, 3)    1017
Name: Dimension, dtype: int64

In [5]:
# Saving healthy dataframe as CSV
df_healthy.to_csv('grape_healthy_full.csv', index=False)

## Getting esca images info

In [6]:
# Defining variables
esca_imgs_path = 'drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/esca'
esca_imgs_paths = []
esca_category = []
esca_imgs_dimensions = []

# Reading esca images and saving their information
for filename in os.listdir(esca_imgs_path):
  esca_img_path = os.path.join(esca_imgs_path, filename)
  esca_imgs_paths.append(esca_img_path)
  esca_category.append('esca')
  esca_img = cv2.imread(esca_img_path)
  esca_imgs_dimensions.append(esca_img.shape)

df_esca = pd.DataFrame({'Filename': esca_imgs_paths, 'Dimension': esca_imgs_dimensions, 'Category': esca_category})
df_esca

Unnamed: 0,Filename,Dimension,Category
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca
...,...,...,...
13279,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca
13280,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca
13281,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca
13282,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca


In [7]:
# Checking esca images dimensions to see if all of them have the same dimensions
df_esca['Dimension'].value_counts()

(256, 256, 3)    13284
Name: Dimension, dtype: int64

In [8]:
# Saving esca dataframe as CSV
df_esca.to_csv('grape_esca_full.csv', index=False)

## Getting black rot images info

In [9]:
# Defining variables
black_rot_imgs_path = 'drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/black_rot'
black_rot_imgs_paths = []
black_rot_category = []
black_rot_imgs_dimensions = []

# Reading black rot images and saving their information
for filename in os.listdir(black_rot_imgs_path):
  black_rot_img_path = os.path.join(black_rot_imgs_path, filename)
  black_rot_imgs_paths.append(black_rot_img_path)
  black_rot_category.append('black_rot')
  black_rot_img = cv2.imread(black_rot_img_path)
  black_rot_imgs_dimensions.append(black_rot_img.shape)

df_black_rot = pd.DataFrame({'Filename': black_rot_imgs_paths, 'Dimension': black_rot_imgs_dimensions, 'Category': black_rot_category})
df_black_rot

Unnamed: 0,Filename,Dimension,Category
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot
...,...,...,...
11323,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot
11324,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot
11325,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot
11326,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot


In [10]:
# Checking black rot images dimensions to see if all of them have the same dimensions
df_black_rot['Dimension'].value_counts()

(256, 256, 3)    11328
Name: Dimension, dtype: int64

In [11]:
# Saving black rot dataframe as CSV
df_black_rot.to_csv('grape_black_rot_full.csv', index=False)

## Getting isariopsis images info

In [12]:
# Defining variables
isariopsis_imgs_path = 'drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/isariopsis'
isariopsis_imgs_paths = []
isariopsis_category = []
isariopsis_imgs_dimensions = []

# Reading isariopsis images and saving their information
for filename in os.listdir(isariopsis_imgs_path):
  isariopsis_img_path = os.path.join(isariopsis_imgs_path, filename)
  isariopsis_imgs_paths.append(isariopsis_img_path)
  isariopsis_category.append('isariopsis')
  isariopsis_img = cv2.imread(isariopsis_img_path)
  isariopsis_imgs_dimensions.append(isariopsis_img.shape)

df_isariopsis = pd.DataFrame({'Filename': isariopsis_imgs_paths, 'Dimension': isariopsis_imgs_dimensions, 'Category': isariopsis_category})
df_isariopsis

Unnamed: 0,Filename,Dimension,Category
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis
...,...,...,...
10327,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis
10328,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis
10329,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis
10330,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis


In [13]:
# Checking isariopsis images dimensions to see if all of them have the same dimensions
df_isariopsis['Dimension'].value_counts()

(256, 256, 3)    10332
Name: Dimension, dtype: int64

In [14]:
# Saving isariopsis dataframe as CSV
df_isariopsis.to_csv('grape_isariopsis_full.csv', index=False)

## Exploring dataset findings
*   All categories except "healthy" have more than 10k images.
*   All images have a dimension of 256x256 and are color images.

Let's try a model with 1k images per category and see the results we obtain. In case of get bad results we will need to augment "healthy" category images.


# Load CSVs
Avoiding read and information extraction from all images by loading CSVs

In [None]:
# Loading original CSVs into dataframes
df_healthy = pd.read_csv('grape_healthy_full.csv')
df_esca = pd.read_csv('grape_esca_full.csv')
df_black_rot = pd.read_csv('grape_black_rot_full.csv')
df_isariopsis = pd.read_csv('grape_isariopsis_full.csv')

# Preparing dataframes for 1k model

In [23]:
# Defining data partition dimensions
total_rows = 1000
train_rows = int(total_rows * 0.7)
validation_rows = int(total_rows * 0.2)
test_rows = total_rows - train_rows - validation_rows

## Healthy

In [36]:
# Getting 1k of healthy samples of the original dataframe
df_healthy_1k = df_healthy.sample(n=total_rows, random_state=33).reset_index(drop=True)

# Adding category tag and partition row to 1k healthy dataframe
df_healthy_1k['Category_tag'] = 0
df_healthy_1k['Partition'] = np.array(['train'] * train_rows + ['validation'] * validation_rows + ['test'] * test_rows)
df_healthy_1k

Unnamed: 0,Filename,Dimension,Category,Category_tag,Partition
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
...,...,...,...,...,...
995,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test
996,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test
997,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test
998,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test


In [37]:
# Checking data partitions dimensions
df_healthy_1k['Partition'].value_counts()

train         700
validation    200
test          100
Name: Partition, dtype: int64

## ESCA

In [38]:
# Getting 1k of esca samples of the original dataframe
df_esca_1k = df_esca.sample(n=total_rows, random_state=33).reset_index(drop=True)

# Adding category tag and partition row to 1k esca dataframe
df_esca_1k['Category_tag'] = 1
df_esca_1k['Partition'] = np.array(['train'] * train_rows + ['validation'] * validation_rows + ['test'] * test_rows)
df_esca_1k

Unnamed: 0,Filename,Dimension,Category,Category_tag,Partition
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,train
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,train
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,train
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,train
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,train
...,...,...,...,...,...
995,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,test
996,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,test
997,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,test
998,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",esca,1,test


In [30]:
# Checking data partitions dimensions
df_esca_1k['Partition'].value_counts()

train         700
validation    200
test          100
Name: Partition, dtype: int64

## Black rot

In [39]:
# Getting 1k of esca samples of the original dataframe
df_black_rot_1k = df_black_rot.sample(n=total_rows, random_state=33).reset_index(drop=True)

# Adding category tag and partition row to 1k black rot dataframe
df_black_rot_1k['Category_tag'] = 2
df_black_rot_1k['Partition'] = np.array(['train'] * train_rows + ['validation'] * validation_rows + ['test'] * test_rows)
df_black_rot_1k

Unnamed: 0,Filename,Dimension,Category,Category_tag,Partition
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,train
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,train
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,train
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,train
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,train
...,...,...,...,...,...
995,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,test
996,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,test
997,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,test
998,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",black_rot,2,test


In [32]:
# Checking data partitions dimensions
df_black_rot_1k['Partition'].value_counts()

train         700
validation    200
test          100
Name: Partition, dtype: int64

## Isariopsis

In [40]:
# Getting 1k of esca samples of the original dataframe
df_isariopsis_1k = df_isariopsis.sample(n=total_rows, random_state=33).reset_index(drop=True)

# Adding category tag and partition row to 1k isariopsis dataframe
df_isariopsis_1k['Category_tag'] = 3
df_isariopsis_1k['Partition'] = np.array(['train'] * train_rows + ['validation'] * validation_rows + ['test'] * test_rows)
df_isariopsis_1k

Unnamed: 0,Filename,Dimension,Category,Category_tag,Partition
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train
...,...,...,...,...,...
995,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,test
996,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,test
997,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,test
998,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,test


In [42]:
# Checking data partitions dimensions
df_isariopsis_1k['Partition'].value_counts()

train         700
validation    200
test          100
Name: Partition, dtype: int64

## Saving 1k samples CSVs

In [41]:
# Saving all 1k samples dataframes as CSVs
df_healthy_1k.to_csv('grape_healthy_1k.csv', index=False)
df_esca_1k.to_csv('grape_esca_1k.csv', index=False)
df_black_rot_1k.to_csv('grape_black_rot_1k.csv', index=False)
df_isariopsis_1k.to_csv('grape_isariopsis_1k.csv', index=False)

# Preparing data for model training

In [48]:
# Creating train dataframe
df_train = pd.concat([df_healthy_1k[df_healthy_1k['Partition'] == 'train'], df_esca_1k[df_esca_1k['Partition'] == 'train'], df_black_rot_1k[df_black_rot_1k['Partition'] == 'train'], df_isariopsis_1k[df_isariopsis_1k['Partition'] == 'train']], ignore_index=True)
df_train

Unnamed: 0,Filename,Dimension,Category,Category_tag,Partition
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,train
...,...,...,...,...,...
2795,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train
2796,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train
2797,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train
2798,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,train


In [50]:
# Creating validation dataframe
df_val = pd.concat([df_healthy_1k[df_healthy_1k['Partition'] == 'validation'], df_esca_1k[df_esca_1k['Partition'] == 'validation'], df_black_rot_1k[df_black_rot_1k['Partition'] == 'validation'], df_isariopsis_1k[df_isariopsis_1k['Partition'] == 'validation']], ignore_index=True)
df_val

Unnamed: 0,Filename,Dimension,Category,Category_tag,Partition
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,validation
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,validation
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,validation
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,validation
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,validation
...,...,...,...,...,...
795,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,validation
796,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,validation
797,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,validation
798,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,validation


In [51]:
# Creating test dataframe
df_test = pd.concat([df_healthy_1k[df_healthy_1k['Partition'] == 'test'], df_esca_1k[df_esca_1k['Partition'] == 'test'], df_black_rot_1k[df_black_rot_1k['Partition'] == 'test'], df_isariopsis_1k[df_isariopsis_1k['Partition'] == 'test']], ignore_index=True)
df_test

Unnamed: 0,Filename,Dimension,Category,Category_tag,Partition
0,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test
1,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test
2,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test
3,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test
4,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",healthy,0,test
...,...,...,...,...,...
395,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,test
396,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,test
397,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,test
398,drive/MyDrive/MBD&CD/MBID14_TFM/DL/Grape_imgs/...,"(256, 256, 3)",isariopsis,3,test
