In [12]:
from transformers import SiglipForImageClassification, AutoImageProcessor
import os
import pandas as pd
from PIL import Image
from datasets import Dataset
from transformers import Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch

In [13]:
! apt-get install -y gdown

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package gdown


In [14]:
# download zip from google drive and unzip contents
! gdown --id 1rbISVuHbT_AJPw4wv4ywLym3TLVuGLDH      

Downloading...
From (original): https://drive.google.com/uc?id=1rbISVuHbT_AJPw4wv4ywLym3TLVuGLDH
From (redirected): https://drive.google.com/uc?id=1rbISVuHbT_AJPw4wv4ywLym3TLVuGLDH&confirm=t&uuid=cc14b690-55c4-4f67-883d-d84889fdd616
To: /kaggle/working/frames_and_annotations.zip
100%|██████████████████████████████████████| 1.14G/1.14G [00:28<00:00, 39.8MB/s]


In [15]:
# Unzip the file
import zipfile
with zipfile.ZipFile("./frames_and_annotations.zip", "r") as zip_ref:
    zip_ref.extractall("frames")


In [16]:
df = pd.read_csv("./frames/frames_annotations.csv")

# fix backslash to forward slash in relative paths
df["image"] = df["image"].apply(lambda x: os.path.join("frames", x.replace("\\", "/")))

#assign labels
df["label"] = df["label"].map({"not_taking_medication": 0, "taking_medication": 1})

# generate dataset from df
dataset = Dataset.from_pandas(df)

In [17]:
model = SiglipForImageClassification.from_pretrained("prithivMLmods/Human-Action-Recognition")
processor = AutoImageProcessor.from_pretrained("prithivMLmods/Human-Action-Recognition")

In [18]:
def preprocess(example):
    image = Image.open(example["image"]).convert("RGB")
    inputs = processor(images=image, return_tensors="pt")
    inputs["labels"] = example["label"]
    return {
        "pixel_values": inputs["pixel_values"].squeeze(),
        "labels": inputs["labels"]
    }

In [19]:
dataset = dataset.map(preprocess, remove_columns=["image", "label"])
dataset = dataset.train_test_split(test_size=0.1)

Map:   0%|          | 0/850 [00:00<?, ? examples/s]

In [10]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Ensure logits are numpy arrays
    if isinstance(logits, tuple):
        print("logits is tuple")
        logits = logits[0]
    if isinstance(logits, torch.Tensor):
        print("logits is tensor")
        logits = logits.detach().cpu().numpy()
    if isinstance(labels, torch.Tensor):
        print("labels is tensor")
        labels = labels.detach().cpu().numpy()
    preds = np.argmax(logits, axis=1)
    acc = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="binary", zero_division=0)
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1,
    }

In [None]:
model.config.label2id = {"not_taking_medication": 0, "taking_medication": 1}
model.config.id2label = {0: "not_taking_medication", 1: "taking_medication"}

training_args = TrainingArguments(
    output_dir="./HAR-medication-finetuned",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    hub_model_id="Adekiii/HAR-medication-finetuned",
    report_to=["none"],
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    compute_metrics=compute_metrics,
)

In [None]:
# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
trainer.train()

In [None]:
trainer.save_model("./HAR-med-finetunedv2")
processor.save_pretrained("./HAR-med-finetunedv2")

In [None]:
from huggingface_hub import HfApi

access_token = "..."

api = HfApi(token=access_token)
api.upload_folder(
    folder_path="HAR-med-finetunedv2",
    repo_id="tam6/HAR-medication-finetunedv2", # change with own repo
    repo_type="model",
)

In [20]:
# Load the processor and model from the HuggingFace Hub
processor = AutoImageProcessor.from_pretrained("Adekiii/HAR-medication-finetuned")
model = SiglipForImageClassification.from_pretrained("Adekiii/HAR-medication-finetuned")

RuntimeError: Error(s) in loading state_dict for Linear:
	size mismatch for bias: copying a param with shape torch.Size([15]) from checkpoint, the shape in current model is torch.Size([2]).

In [22]:
test_data = dataset["test"]
print(test_data)

Dataset({
    features: ['pixel_values', 'labels'],
    num_rows: 85
})


In [None]:
print(dataset)

In [None]:
#inputs = processor(images=test_data["pixel_values"], return_tensors="pt")
print('succeed')
# Run inference
predicted_labels = []
true_labels = test_data["label"]
inputs = test_data["pixel_values"]
print(inputs.type)

for frame in inputs:
    output = model(frame)
    logits = outputs.logits
    predicted_class_idx = logits.argmax(-1).item()
    predicted_labels.append(predicted_class_idx)

precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average="binary", zero_division=0)

"""
with torch.no_grad():
    #for im in test_data["pixel_values"]
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class_idx = logits.argmax(-1).item()
    #predicted_labels.append(predicted_class_idx)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_class_idx, average="binary", zero_division=0)
"""

In [None]:
print("f1 score on test set: " + str(f1))