

1.   Importing libraries



In [None]:
import os
import shutil

In [None]:
from sklearn.model_selection import train_test_split



2.   Connecting to the Drive



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive




3.   Data preparation



In [None]:
# Data directories in Drive
data_dir = '/content/drive/My Drive/contact-maps'
alpha_dir = os.path.join(data_dir, 'alpha-helices')
beta_dir = os.path.join(data_dir, 'beta-sheets')

In [None]:
alpha_helices = [os.path.join(alpha_dir, f) for f in os.listdir(alpha_dir) if f.endswith('.png')]
beta_sheets = [os.path.join(beta_dir, f) for f in os.listdir(beta_dir) if f.endswith('.png')]

In [None]:
alpha_train, alpha_val = train_test_split(alpha_helices, test_size=0.2, random_state=42)
beta_train, beta_val = train_test_split(beta_sheets, test_size=0.2, random_state=42)

In [None]:
yolo_data_dir = '/content/yolo-contact-maps'
train_dir = os.path.join(yolo_data_dir, 'train/images')
val_dir = os.path.join(yolo_data_dir, 'val/images')
train_labels_dir = os.path.join(yolo_data_dir, 'train/labels')
val_labels_dir = os.path.join(yolo_data_dir, 'val/labels')

In [None]:
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)



5.   Data preprocessing



In [None]:
# Function to generate annotations for contact maps
def create_annotation(label_path, class_id, x_center=0.5, y_center=0.5, width=0.7, height=0.4):
    with open(label_path, 'w') as f:
        f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

In [None]:
# Function for copying images
def prepare_data(img_files, dst_img_dir, dst_lbl_dir, class_id, width, height):
    for img_path in img_files:
        img_file = os.path.basename(img_path)
        dst_img_path = os.path.join(dst_img_dir, img_file)
        shutil.copy(img_path, dst_img_path)

        # Gerar anotação
        label_file = img_file.replace('.png', '.txt')
        label_path = os.path.join(dst_lbl_dir, label_file)
        create_annotation(label_path, class_id, width=width, height=height)

In [None]:
# Copy training files
prepare_data(alpha_train, train_dir, train_labels_dir, class_id=0, width=0.7, height=0.4)
prepare_data(beta_train, train_dir, train_labels_dir, class_id=1, width=0.6, height=0.3)

# Copy validation files
prepare_data(alpha_val, val_dir, val_labels_dir, class_id=0, width=0.7, height=0.4)
prepare_data(beta_val, val_dir, val_labels_dir, class_id=1, width=0.6, height=0.3)



6.   Treinamento



In [None]:
# YOLOv5 data file configuration
data_yaml = """
train: /content/yolo-contact-maps/train
val: /content/yolo-contact-maps/val

nc: 2
names: ['alpha-helix', 'beta-sheet']
"""

In [None]:
with open(os.path.join(yolo_data_dir, 'data.yaml'), 'w') as f:
    f.write(data_yaml)

In [None]:
# Clone YOLOv5 repository
!git clone https://github.com/ultralytics/yolov5
%cd yolov5

Cloning into 'yolov5'...
remote: Enumerating objects: 16725, done.[K
remote: Counting objects: 100% (46/46), done.[K
remote: Compressing objects: 100% (39/39), done.[K
remote: Total 16725 (delta 11), reused 32 (delta 7), pack-reused 16679[K
Receiving objects: 100% (16725/16725), 15.31 MiB | 15.22 MiB/s, done.
Resolving deltas: 100% (11490/11490), done.
/content/yolov5


In [None]:
!pip install -r requirements.txt

Collecting gitpython>=3.1.30 (from -r requirements.txt (line 5))
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/207.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting pillow>=10.3.0 (from -r requirements.txt (line 9))
  Downloading pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m61.9 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.0 (from -r requirements.txt (line 12))
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting ultralyt

In [None]:
# Train the YOLOv5 model with transfer learning
!python train.py --img 640 --batch 16 --epochs 50 --data /content/yolo-contact-maps/data.yaml --weights yolov5s.pt

2024-07-03 12:12:57.553967: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-03 12:12:57.554021: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-03 12:12:57.555511: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=/content/yolo-contact-maps/data.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=100, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data/hyps, resume_evolve=None, bucket=, cache=None, image_weights=False, device=, m