## **Initial Setup**

In [1]:
# Check GPU type
!nvidia-smi

Thu Feb 27 08:22:48 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   62C    P8             12W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [7]:
# Mount google drive

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
# Navigate to the project folder

%cd drive/MyDrive/Amini_Cocoa_Contamination_identification

/content/drive/MyDrive/Amini_Cocoa_Contamination_identification


In [53]:
# downloading the datasets using url

!wget -O "./dataset.zip" "https://zindi-private-release.s3.eu-west-2.amazonaws.com/uploads/competition_datafile/file/104895/dataset.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAZF6GMQOOWFPUAZPE%2F20250227%2Feu-west-2%2Fs3%2Faws4_request&X-Amz-Date=20250227T093632Z&X-Amz-Expires=900&X-Amz-SignedHeaders=host&X-Amz-Signature=60b6292b750338819db5079036d6302debdcd6761903e8db4a0d6bc863d537a1"

--2025-02-27 09:38:08--  https://zindi-private-release.s3.eu-west-2.amazonaws.com/uploads/competition_datafile/file/104895/dataset.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAZF6GMQOOWFPUAZPE%2F20250227%2Feu-west-2%2Fs3%2Faws4_request&X-Amz-Date=20250227T093632Z&X-Amz-Expires=900&X-Amz-SignedHeaders=host&X-Amz-Signature=60b6292b750338819db5079036d6302debdcd6761903e8db4a0d6bc863d537a1
Resolving zindi-private-release.s3.eu-west-2.amazonaws.com (zindi-private-release.s3.eu-west-2.amazonaws.com)... 52.95.142.118, 3.5.244.142, 52.95.143.114, ...
Connecting to zindi-private-release.s3.eu-west-2.amazonaws.com (zindi-private-release.s3.eu-west-2.amazonaws.com)|52.95.142.118|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10129066154 (9.4G) [application/zip]
Saving to: ‘./dataset.zip’


2025-02-27 09:48:59 (14.9 MB/s) - ‘./dataset.zip’ saved [10129066154/10129066154]



## File Processing

In [54]:
# importing libraries
import os
from pathlib import Path
import shutil


In [55]:
# Current working directory (Present Working Directory)
PWD = Path(".")
FILE_LIST = os.listdir(PWD)
FILE_LIST

['Train.csv',
 'CoCoa_Disease_Starter_Notebook.ipynb',
 'Test.csv',
 'Amini_Cocoa_Contamination_identification.ipynb',
 'dataset.zip']

In [56]:
## Assigning Path direcotries to DATASETS_DIR
DATASETS_DIR = Path('dataset')
DATASETS_DIR

PosixPath('dataset')

In [57]:
# Image & labels sub directories with in DATASETS_DIR

TRAIN_IMAGES_DIR = Path(os.path.join(DATASETS_DIR, 'images', 'train'))
TRAIN_LABELS_DIR = Path(os.path.join(DATASETS_DIR, 'labels', 'train'))
TEST_IMAGES_DIR = Path(os.path.join(DATASETS_DIR, 'images', 'test'))
VAL_IMAGES_DIR = Path(os.path.join(DATASETS_DIR, 'images', 'val'))
VAL_LABELS_DIR = Path(os.path.join(DATASETS_DIR, 'labels', 'val'))


In [59]:
# Check if DATASETS_DIR exists, if it does delete it and recreate it
DIRS = [TRAIN_IMAGES_DIR, VAL_IMAGES_DIR, TEST_IMAGES_DIR, VAL_LABELS_DIR, DATASETS_DIR]

for DIR in DIRS:
  if DIR.exists():
    shutil.rmtree(DIR)
  DIR.mkdir(parents=True, exist_ok = True)

In [60]:
# Unpack the 'dataset.zip'
shutil.unpack_archive(PWD / 'dataset.zip', DATASETS_DIR)

In [61]:
# A function to count the number of files in each Sub Directories

def count_files(directory):
    total_files = 0
    for root, _, files in os.walk(directory):
        total_files += len(files)
    return total_files


In [78]:
## Count the number of files in TRAIN_IMAGES_DIR
num_train_images = count_files(TRAIN_IMAGES_DIR)

## Count the number of files in TRAIN_LABELS_DIR
num_train_labels = count_files(TRAIN_LABELS_DIR)

print(f'{"-"*35} \n{" "*10} Number of files \n{"-"*35}')
print(f'TRAIN_IMAGES_DIR | TRAIN_LABEL_DIR ')
print(f'{"-"*35}')
print(f'{num_train_images} {" "*11} | {num_train_labels}')


----------------------------------- 
           Number of files 
-----------------------------------
TRAIN_IMAGES_DIR | TRAIN_LABEL_DIR 
-----------------------------------
5529             | 5529


In [82]:
## Get the set of all the stems of the images & labels
train_images_stems = set([str(Path(name).stem) for name in os.listdir(TRAIN_IMAGES_DIR)])
train_labels_stems = set([str(Path(name).stem) for name in os.listdir(TRAIN_LABELS_DIR)])

## Checking the two sets are identical
train_images_stems == train_labels_stems

True

## Data Pre-Processing

In [84]:
# importing libraries

import pandas as pd


In [85]:
# Load train and test files
train = pd.read_csv(PWD / 'Train.csv')
test = pd.read_csv(PWD / 'Test.csv')

In [86]:
train.head()

Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax,class_id,ImagePath
0,ID_nBgcAR.jpg,healthy,1.0,75.0,15.0,162.0,195.0,2,dataset/images/train/ID_nBgcAR.jpg
1,ID_nBgcAR.jpg,healthy,1.0,58.0,1.0,133.0,171.0,2,dataset/images/train/ID_nBgcAR.jpg
2,ID_nBgcAR.jpg,healthy,1.0,42.0,29.0,377.0,349.0,2,dataset/images/train/ID_nBgcAR.jpg
3,ID_Kw2v8A.jpg,healthy,1.0,112.0,124.0,404.0,341.0,2,dataset/images/train/ID_Kw2v8A.jpg
4,ID_Kw2v8A.jpg,healthy,1.0,148.0,259.0,413.0,412.0,2,dataset/images/train/ID_Kw2v8A.jpg


In [87]:
test.head()

Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax,class_id,ImagePath
0,ID_Genxyu.jpg,,,,,,,,dataset/images/test/ID_Genxyu.jpg
1,ID_svY6TG.jpg,,,,,,,,dataset/images/test/ID_svY6TG.jpg
2,ID_d0gpda.jpg,,,,,,,,dataset/images/test/ID_d0gpda.jpg
3,ID_frWmBT.jpg,,,,,,,,dataset/images/test/ID_frWmBT.jpg
4,ID_TaRW6o.jpg,,,,,,,,dataset/images/test/ID_TaRW6o.jpg


Github Pushing

In [152]:
!rm -rf .git

In [153]:
!git config --global user.name "sineshawl"
!git config --global user.email "sinelegese306@gmail.com"

In [101]:
!pwd

/content/drive/MyDrive/Amini_Cocoa_Contamination_identification


In [154]:
!git init

[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/drive/MyDrive/Amini_Cocoa_Contamination_identification/.git/


In [155]:
!git remote add origin "https://github.com/sineshawl/Amini_Cocoa_Contamination_Identification.git"

In [74]:
!git remote -v

origin	https://github.com/sineshawl/Amini_Cocoa_Contamination_Identification.git (fetch)
origin	https://github.com/sineshawl/Amini_Cocoa_Contamination_Identification.git (push)


In [157]:
!git branch -v

In [146]:
!git branch -M Deep-learning

In [134]:
!git pull https://github.com/sineshawl/Amini_Cocoa_Contamination_Identification.git --allow-unrelated-histories

remote: Enumerating objects: 28, done.[K
remote: Counting objects:   3% (1/28)[Kremote: Counting objects:   7% (2/28)[Kremote: Counting objects:  10% (3/28)[Kremote: Counting objects:  14% (4/28)[Kremote: Counting objects:  17% (5/28)[Kremote: Counting objects:  21% (6/28)[Kremote: Counting objects:  25% (7/28)[Kremote: Counting objects:  28% (8/28)[Kremote: Counting objects:  32% (9/28)[Kremote: Counting objects:  35% (10/28)[Kremote: Counting objects:  39% (11/28)[Kremote: Counting objects:  42% (12/28)[Kremote: Counting objects:  46% (13/28)[Kremote: Counting objects:  50% (14/28)[Kremote: Counting objects:  53% (15/28)[Kremote: Counting objects:  57% (16/28)[Kremote: Counting objects:  60% (17/28)[Kremote: Counting objects:  64% (18/28)[Kremote: Counting objects:  67% (19/28)[Kremote: Counting objects:  71% (20/28)[Kremote: Counting objects:  75% (21/28)[Kremote: Counting objects:  78% (22/28)[Kremote: Counting objects:  82% (23/28)[Kr

In [109]:
!rm -rf .git/index.lock
!rm -rf .git/refs/heads/lock

In [147]:
!git add Amini_Cocoa_Contamination_identification.ipynb Train.csv Test.csv

In [136]:
!git status


No commits yet

Changes to be committed:
  (use "git rm --cached <file>..." to unstage)
	[32mnew file:   Amini_Cocoa_Contamination_identification.ipynb[m
	[32mnew file:   Test.csv[m
	[32mnew file:   Train.csv[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31mCoCoa_Disease_Starter_Notebook.ipynb[m
	[31mdataset.zip[m
	[31mdataset/[m



In [144]:
!git commit -m "Initial Setup and File Processing."

[main (root-commit) 121d9dc] Initial Setup and File Processing.
 3 files changed, 11421 insertions(+)
 create mode 100644 Amini_Cocoa_Contamination_identification.ipynb
 create mode 100644 Test.csv
 create mode 100644 Train.csv


In [151]:
!git push --set-upstream origin Deep-learning


fatal: could not read Username for 'https://github.com': No such device or address
