<a href="https://colab.research.google.com/github/leoisqualified/Computer-Vision-with-YOLO/blob/main/Computer_Vision_with_YOLO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''
This notebook is a submission to the zindi crop detection challenge. The aim of the challenge is to predict the
type of disease affecting a crop using YOLO (You Only Look Once) model.
'''

'\nThis notebook is a submission to the zindi crop detection challenge. The aim of the challenge is to predict the\ntype of disease affecting a crop using YOLO (You Only Look Once) model.\n'

In [None]:
#import required libraries
import requests
from PIL import Image
from zipfile import ZipFile
import os
import shutil
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch

#install YOLOv5
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
%pip install -qr requirements.txt

# Add the yolov5 directory to the Python path
import sys
sys.path.append('/content/yolov5')

#check if GPU is available
print(f'GPU Available: {torch.cuda.is_available()}')

Cloning into 'yolov5'...
remote: Enumerating objects: 16995, done.[K
remote: Counting objects: 100% (190/190), done.[K
remote: Compressing objects: 100% (137/137), done.[K
remote: Total 16995 (delta 101), reused 114 (delta 53), pack-reused 16805 (from 1)[K
Receiving objects: 100% (16995/16995), 15.72 MiB | 13.79 MiB/s, done.
Resolving deltas: 100% (11630/11630), done.
/content/yolov5
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m870.5/870.5 kB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hCreating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help 

In [None]:
'''
We have added YOLO and checked our GPU availability. Next we will see how we can read the images from the drive,
Convert the annotated data into YOLO format, train and make predictions.
'''

'\nWe have added YOLO and checked our GPU availability. Next we will see how we can read the images from the drive,\nConvert the annotated data into YOLO format, train and make predictions.\n'

#Set Kaggle API & Download Dataset

In [None]:
#from google.colab import files
#files.upload()   This will prompt you to upload the kaggle.json file

In [None]:
# Create a Kaggle directory
#os.makedirs('/root/.kaggle', exist_ok=True)

# Move the kaggle.json file to the appropriate location
#!cp kaggle.json /root/.kaggle/

# Set permissions for the kaggle.json file
#!chmod 600 /root/.kaggle/kaggle.json


In [None]:
# Download the dataset
#!kaggle datasets download -d ohagwucollinspatrick/ghana-crop-disease

In [None]:
# Mount Google Drive
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
# Upload the dataset
#!mv ghana-crop-disease.zip /content/drive/MyDrive/

In [None]:
'''
The dataset has been downloaded and uploaded to google drive. Run the above cells to get the dataset.
Run the cells below to continue the project.
'''

'\nThe dataset has been downloaded and uploaded to google drive. Run the above cells to get the dataset.\nRun the cells below to continue the project.\n'

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Data Preprocessing and Validation

In [None]:
# Load the annotation csv
train = pd.read_csv('/content/drive/MyDrive/zindi_train.csv')

# Check the data
train.head()

In [None]:
# Check the number of unique classes
train['class'].nunique()

In [None]:
'''
There are 23 different annotated classes indicating there are 23 different diseases identified from the images.
The ymin, ymax, xmin, xmax are the positions of the various bouding boxes around the disease.
We will need to convert the train set to YOLO format. The following preprocessing steps will take place:
1. Perform data validation to ensure features are of the correct data type.
2. We will convert the values under the class column to numeric
3. We will scale values of ymin, ymax, xmin, xmax
'''

In [None]:
# Check the data
train.info()

In [None]:
'''
The data is very clean and there are no missing values and the data types are correct.
'''

#Create YOLO Format Annotation for Train

In [None]:
# Convert class to numeric
le = LabelEncoder()
train['class'] = le.fit_transform(train['class'])

# Check the class column
train['class'].unique()

NameError: name 'train' is not defined

In [None]:
# Define the Image dimensions (you might want to adjust this based on your actual image dimensions)
image_width = 640
image_height = 640

# Directory to save YOLO annotations
output_dir = '/content/dataset/labels/train'
os.makedirs(output_dir, exist_ok=True)

# Convert the data into YOLO format
for index, row in train.iterrows():
    # Extract relevant data from each row
    image_name = row['Image_ID'].replace('.jpg', '')  # Image name without extension
    class_id = row['class']  # The class/category ID
    xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']  # Bounding box coordinates

    # Calculate the center coordinates and dimensions in YOLO format (normalized)
    x_center = ((xmax + xmin) / 2) / image_width
    y_center = ((ymax + ymin) / 2) / image_height
    width = (xmax - xmin) / image_width
    height = (ymax - ymin) / image_height

    # Create a corresponding annotation file for each image
    train_yolo_annotation_file = os.path.join(output_dir, f'{image_name}.txt')

    # Write the annotation to the file
    with open(train_yolo_annotation_file, 'a') as f:
        f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

#Read the Images from the Dataset

In [None]:
#Path to image
source_path = '/content/drive/MyDrive/ghana-crop-disease.zip'

#Destination Path
extraction_path = '/content/dataset/'
os.makedirs(extraction_path, exist_ok=True)
#Read images
with ZipFile(source_path, 'r') as zip_ref:
  zip_ref.extractall(extraction_path)
  #Output if extraction is done
  print('Done')

Done


In [None]:
'''
After extracting delete the submission file test and train from the dataset folder.
These are redundant files
'''

'\nAfter extracting delete the submission file test and train from the dataset folder.\nThese are redundant files\n'

#Creating and Organizing the Directory for YOLOv5

In [None]:
'''
From the above cells we have created the YOLO format for the test annotations created and have extracted the images
File. Now we will create directories and prepare it for the YOLOv5 model.
'''

'\nFrom the above cells we have created the YOLO format for the test annotations created and have extracted the images \nFile. Now we will create directories and prepare it for the YOLOv5 model.\n'