In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
from transformers import DistilBertTokenizer, DistilBertModel
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import train_test_split
import os

In [None]:
# Завантаження даних
train_df = pd.read_csv("./train.csv")
test_df = pd.read_csv("./test.csv")

In [None]:
# Завантаження та попередня обробка зображень
def get_image_paths(folder, pet_id):
    return [os.path.join(folder, f) for f in os.listdir(folder) if f.startswith(pet_id)]

def process_image(image_path, transform):
    image = Image.open(image_path).convert("RGB")
    return transform(image).unsqueeze(0)

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# Токенізація та обробка тексту
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
def process_text(text, max_length=128):
    tokens = tokenizer(text, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
    return tokens["input_ids"], tokens["attention_mask"]

In [None]:
# Модель для зображень
resnet = models.resnet50(pretrained=True)
resnet.fc = nn.Linear(resnet.fc.in_features, 256)
resnet.eval()

def extract_image_features(image_paths):
    features_list = []
    for image_path in image_paths:
        image_tensor = process_image(image_path, image_transform)
        with torch.no_grad():
            features = resnet(image_tensor)
        features_list.append(features.numpy().flatten())
    
    if features_list:
        return np.mean(features_list, axis=0)
    else:
        return np.zeros(256)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/innasnegurova/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 53.7MB/s]


In [None]:
# Модель для тексту
bert_model = DistilBertModel.from_pretrained("distilbert-base-uncased")
bert_model.eval()

def extract_text_features(text):
    input_ids, attention_mask = process_text(text)
    with torch.no_grad():
        output = bert_model(input_ids, attention_mask=attention_mask)
    return output.last_hidden_state[:, 0, :].numpy().flatten()

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
# Обробка тренувальних даних
X_images = np.array([extract_image_features(get_image_paths("/mnt/data/images/train", pet_id)) for pet_id in train_df["PetID"]])
X_texts = np.array([extract_text_features(desc) for desc in train_df["Description"].fillna("")])
y = train_df["AdoptionSpeed"].values