---

---

### 0. Project Setup & Obtaining Dataset

In [1]:
# check if GPU is available
import tensorflow as tf
import torch

print("TensorFlow GPU available:", tf.config.list_physical_devices('GPU'))
print("PyTorch GPU available:", torch.cuda.is_available())

# If using PyTorch, print GPU name
if torch.cuda.is_available():
    print("PyTorch GPU name:", torch.cuda.get_device_name(0))

TensorFlow GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
PyTorch GPU available: True
PyTorch GPU name: Tesla T4


In [None]:

# install and update Hugging Face Transformers, Datasets, Accelerate, Evaluate
# Also ensure fsspec and huggingface_hub are up-to-date to resolve common loading issues
!pip install -U transformers datasets accelerate evaluate huggingface_hub fsspec sentencepiece -q

In [None]:
from datasets import load_dataset

# load the ADE-Corpus-V2 classification dataset
# this dataset contains sentences labeled as 0 (not ADE) or 1 (ADE)
dataset = load_dataset("SetFit/ade_corpus_v2_classification")

print("\nDataset loaded successfully!")
print(dataset)
print("\nKeys in the dataset object:", dataset.keys())

In [4]:
# Access the training split
train_dataset = dataset['train']
test_dataset = dataset['test']

print("\n--- Training Dataset Sample ---")
print(train_dataset[0]) # Print the first example
print(train_dataset[1]) # Print the second example

print("\n--- Test Dataset Sample ---")
print(test_dataset[0]) # Print the first example

print("\nFeatures (columns) available:", train_dataset.column_names)
print("Label mapping (if available):", train_dataset.features['label'])


--- Training Dataset Sample ---
{'text': 'On cessation of the injections, the retrocorneal membrane grew rapidly to involve the entire posterior cornea.', 'label': 0, 'label_text': 'Not-Related'}
{'text': 'Median patient age was 52 years.', 'label': 0, 'label_text': 'Not-Related'}

--- Test Dataset Sample ---
{'text': 'The use of somatostatin analog in gastroenteropancreatic tumors other than carcinoid.', 'label': 0, 'label_text': 'Not-Related'}

Features (columns) available: ['text', 'label', 'label_text']
Label mapping (if available): Value(dtype='int64', id=None)


### 1. Exploratory Data Analysis & Initial Preprocessing

