In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cs-480-2024-spring/data/sample_submission.csv
/kaggle/input/cs-480-2024-spring/data/target_name_meta.tsv
/kaggle/input/cs-480-2024-spring/data/train.csv
/kaggle/input/cs-480-2024-spring/data/test.csv
/kaggle/input/cs-480-2024-spring/data/train_images/196411326.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/192107575.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/193949843.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/195104173.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/195545915.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/63674209.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/133057159.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/193023712.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/196002408.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/190304633.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/169455664.jpeg
/kaggle/input/cs-480-2024-spring/data/train_images/194889424.

In [3]:
# ! pip install xformers

In [4]:
import torch
import torchvision.transforms as transforms
from PIL import Image

DATA_FOLDER = "/kaggle/input/cs-480-2024-spring/data/"
dinov2_vitb14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14')
dinov2_vitb14.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  from .autonotebook import tqdm as notebook_tqdm
Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vits14_pretrain.pth
100%|██████████| 84.2M/84.2M [00:00<00:00, 202MB/s] 


In [90]:
# define dataset and model
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as torch_models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image


class PlantDatasetUnloaded(Dataset):
    def __init__(self, train_df, target_names, img_folder, transform=None):
        self.transform = transform
        self.train_df = train_df
        self.target_names = target_names
        self.img_folder = img_folder
    
    def __len__(self):
        return len(self.train_df)

    def __getitem__(self, idx):
        row = self.train_df.iloc[idx]
        target = self.train_df.loc[idx, self.target_names]
        id_ = self.train_df.loc[idx, "id"]
        ancillary_data = row.drop(self.target_names + ["id"])
        filename = str(id_) + ".jpeg"
        image_path = os.path.join(self.img_folder, filename)
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        
        
        return image, id_, torch.tensor(ancillary_data.values, dtype=torch.float32), torch.tensor(target.values, dtype=torch.float32)

In [91]:
train_df = pd.read_csv(DATA_FOLDER + "train.csv")
train_ancillary_data = train_df.iloc[:, :-6]
train_targets = train_df.iloc[:, -6:]

In [97]:
def get_img_embedding(model, images):  
    return model(images)

In [98]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [99]:
train_images_dir = DATA_FOLDER + "train_images"
test_images_dir = DATA_FOLDER + "test_images"


In [101]:
# Initialize training dataset and dataloader
target_names = ["X4_mean", "X11_mean", "X18_mean", "X26_mean", "X50_mean", "X3112_mean"]
train_dataset = PlantDatasetUnloaded(
    train_df, target_names, train_images_dir, transform=train_transform
)
train_dataloader = DataLoader(train_dataset, batch_size=32)

In [104]:
X_train = []
y_train = []
for idx, data in enumerate(train_dataloader):
    images, image_ids, ancillary_data, targets = data
    embedding = get_img_embedding(dinov2_vitb14, images)
    concat_input = torch.cat((embedding, ancillary_data), dim=1)
    concat_input = concat_input.detach().cpu().numpy()
    X_train.append(concat_input)
    y_train.extend(targets)
    if idx % 10 == 0: 
        print(idx, "/", len(train_dataloader))
X_train = np.vstack(X_train)

0 / 1356
10 / 1356
20 / 1356
30 / 1356
40 / 1356
50 / 1356
60 / 1356
70 / 1356
80 / 1356
90 / 1356
100 / 1356
110 / 1356
120 / 1356
130 / 1356
140 / 1356
150 / 1356
160 / 1356
170 / 1356
180 / 1356
190 / 1356
200 / 1356
210 / 1356
220 / 1356
230 / 1356
240 / 1356
250 / 1356
260 / 1356
270 / 1356
280 / 1356
290 / 1356
300 / 1356
310 / 1356
320 / 1356
330 / 1356
340 / 1356
350 / 1356
360 / 1356
370 / 1356
380 / 1356
390 / 1356
400 / 1356
410 / 1356
420 / 1356
430 / 1356
440 / 1356
450 / 1356
460 / 1356
470 / 1356
480 / 1356
490 / 1356
500 / 1356
510 / 1356
520 / 1356
530 / 1356
540 / 1356
550 / 1356
560 / 1356
570 / 1356
580 / 1356
590 / 1356
600 / 1356
610 / 1356
620 / 1356
630 / 1356
640 / 1356
650 / 1356
660 / 1356
670 / 1356
680 / 1356
690 / 1356
700 / 1356
710 / 1356
720 / 1356
730 / 1356
740 / 1356
750 / 1356
760 / 1356
770 / 1356
780 / 1356
790 / 1356
800 / 1356
810 / 1356
820 / 1356
830 / 1356
840 / 1356
850 / 1356
860 / 1356
870 / 1356
880 / 1356
890 / 1356
900 / 1356
910 / 1356

In [17]:
# val_image_embeddings = []
# for i in range(len(X_val)): 
#     image_id = X_val.iloc[i].id
#     img_embedding = id_to_embedding[image_id]
#     ancillary_data = list(X_val.iloc[i].drop("id"))
#     concat_input = img_embedding + ancillary_data
#     val_image_embeddings.append(concat_input)

In [105]:
! pip install xgboost

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [106]:
import xgboost as xgb
from sklearn.multioutput import MultiOutputRegressor

# Create XGBoost model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror')

# Wrap with MultiOutputRegressor
multi_output_model = MultiOutputRegressor(xgb_model)
multi_output_model.fit(X_train, y_train)

In [107]:
from sklearn.metrics import r2_score
def calculate_r2_score(y_true, y_pred):
    y_true = y_true
    y_pred = y_pred
    return r2_score(y_true, y_pred)

In [108]:
test_ancillary_data = pd.read_csv("/kaggle/input/cs-480-2024-spring/data/test.csv")

In [115]:
class TestDataset(Dataset):
    def __init__(self, test_df, img_folder, transform=None):
        self.transform = transform
        self.test_df = test_df
        self.img_folder = img_folder
    
    def __len__(self):
        return len(self.test_df)

    def __getitem__(self, idx):
        row = self.test_df.iloc[idx]
        id_ = self.test_df.loc[idx, "id"]
        ancillary_data = row.drop("id")
        filename = str(id_) + ".jpeg"
        image_path = os.path.join(self.img_folder, filename)
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        
        
        return image, id_, torch.tensor(ancillary_data.values, dtype=torch.float32)

In [116]:
# Initialize training dataset and dataloader
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_dataset = TestDataset(
    test_ancillary_data, test_images_dir, transform=test_transform
)
test_dataloader = DataLoader(test_dataset, batch_size=32)

In [118]:
# 
X_test = []
test_id_data = []
for idx, data in enumerate(test_dataloader):
    images, image_ids, ancillary_data = data
    embedding = get_img_embedding(dinov2_vitb14, images)
    concat_input = torch.cat((embedding, ancillary_data), dim=1)
    concat_input = concat_input.detach().cpu().numpy()
    X_test.append(concat_input)
    test_id_data.extend(image_ids.numpy())
    if idx % 50 == 0: 
        print(idx, "/", len(test_dataloader))
X_test = np.vstack(X_test)

0 / 200
50 / 200
100 / 200
150 / 200


In [119]:
# Make predictions
y_pred = multi_output_model.predict(X_test)

In [122]:
col_labels = train_df.iloc[:, -6:].columns.values
predictions_df = pd.DataFrame(y_pred, columns=col_labels)
predictions_df.insert(0, 'id', test_id_data)
predictions_df.to_csv('predictions.csv', index=False)