In [3]:
import torch 
import torch.nn as nn
from torch.utils.data import DataLoader
import timm

import numpy as np
import pandas as pd

import gc
import os
from glob import glob 
from tqdm.notebook import tqdm

from sklearn.preprocessing import OneHotEncoder

from src.dataset import DamageDataset, OrdinalBinDamageDataset, BinaryDamageDataset
from src.agents import EmbedingsAgent, InferenceAgent
from src.utils import estimate_maximum_batch_size, vector2prediction

# Networks

## OLD

### stellar-frost-8

In [2]:
net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=11)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

agent = InferenceAgent(net, device)
agent.load_weights('exp/stellar-frost-8/BEST.PTH')

cuda
Weights are loaded from exp/stellar-frost-8/BEST.PTH


In [3]:
shape = (3, 512, 512)
batch_size = estimate_maximum_batch_size(agent.model, device, shape)
print('batch_size :', batch_size)

root = 'data/test'
split = 'data/splits/test.csv'
metadata = 'data/test_meta.csv'

dataset = DamageDataset(root, split, metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=4)

batch_size : 128


In [4]:
sub = pd.DataFrame(columns=['ID', 'extent'])
sub.set_index('ID', inplace=True)

In [5]:
for idx, features, _ in tqdm(loader):
    output = agent.predict(features)
    probabilities = nn.Softmax(dim=1)(output)
    prediction = torch.argmax(probabilities, dim=1)
    prediction = prediction.cpu().numpy()
    
    df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
    df.set_index('ID', inplace=True)
    
    sub = pd.concat([sub, df])

  0%|          | 0/68 [00:00<?, ?it/s]

In [6]:
sub.extent *= 10

In [7]:
sub.to_csv('data/submission.csv', index='ID')

In [9]:
np.unique(sub.extent)

array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100], dtype=object)

### polished-music-16 / dainty-frog-17

In [13]:
net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

agent = InferenceAgent(net, device)
agent.load_weights('exp/dainty-frog-17/BEST.PTH')

cuda
Weights are loaded from exp/dainty-frog-17/BEST.PTH


In [14]:
shape = (3, 512, 512)
batch_size = estimate_maximum_batch_size(agent.model, device, shape)
print('batch_size :', batch_size)

root = 'data/test'
split = 'data/splits/test.csv'
metadata = 'data/test_meta.csv'

dataset = DamageDataset(root, split, metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=4)

batch_size : 128


In [15]:
sub = pd.DataFrame(columns=['ID', 'extent'])
sub.set_index('ID', inplace=True)

In [16]:
for idx, features, _ in tqdm(loader):
    output = agent.predict(features)
    probabilities = nn.Softmax(dim=1)(output)
    prediction = torch.argmax(probabilities, dim=1)
    prediction = prediction.cpu().numpy() + 1
    
    df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
    df.set_index('ID', inplace=True)
    
    sub = pd.concat([sub, df])

  0%|          | 0/68 [00:00<?, ?it/s]

In [17]:
meta = pd.read_csv(metadata, index_col='ID')
subset = meta[np.logical_or(meta.damage != 'DR', np.logical_not(meta.growth_stage.isin(['F', 'M', 'V'])))]
sub[sub.index.isin(subset.index)] = 0

In [18]:
sub.extent *= 10

In [19]:
sub.to_csv('data/submission.csv', index='ID')

### likely-lake-25

In [2]:
net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=9)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

agent = InferenceAgent(net, device)
agent.load_weights('exp/likely-lake-25/BEST.PTH')

cuda
Weights are loaded from exp/likely-lake-25/BEST.PTH


In [3]:
shape = (3, 512, 512)
batch_size = estimate_maximum_batch_size(agent.model, device, shape)
print('batch_size :', batch_size)

root = 'data/test'
split = 'data/splits/test.csv'
metadata = 'data/test_meta.csv'

dataset = OrdinalBinDamageDataset(root, split, metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=4)

batch_size : 128


In [4]:
sub = pd.DataFrame(columns=['ID', 'extent'])
sub.set_index('ID', inplace=True)

In [5]:
for idx, features, target in tqdm(loader):
    output = agent.predict(features)
    output = nn.Sigmoid()(output)
    prediction = output > 0.5
    prediction = prediction.to(torch.int64).cpu()
    prediction = torch.tensor([torch.where(row == 1)[0].max() + 1 if row.sum() > 0 else 0 for row in prediction])
    prediction += 1
    
    df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
    df.set_index('ID', inplace=True)
    
    sub = pd.concat([sub, df])

  0%|          | 0/68 [00:00<?, ?it/s]

In [6]:
meta = pd.read_csv(metadata, index_col='ID')
subset = meta[np.logical_or(meta.damage != 'DR', np.logical_not(meta.growth_stage.isin(['F', 'M', 'V'])))]
sub[sub.index.isin(subset.index)] = 0

In [7]:
sub.extent *= 10

In [8]:
sub.to_csv('data/submission.csv', index='ID')

### daily-elevator-38

In [2]:
net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=9)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

agent = InferenceAgent(net, device)
agent.load_weights('exp/daily-elevator-38/BEST.PTH')

cuda
Weights are loaded from exp/daily-elevator-38/BEST.PTH


In [3]:
shape = (3, 512, 512)
batch_size = estimate_maximum_batch_size(agent.model, device, shape)
print('batch_size :', batch_size)

root = 'data/test'
split = 'data/splits/test.csv'
metadata = 'data/test_meta.csv'

dataset = OrdinalBinDamageDataset(root, split, metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=2)

batch_size : 32


In [4]:
sub = pd.DataFrame(columns=['ID', 'extent'])
sub.set_index('ID', inplace=True)

In [5]:
for idx, features, target in tqdm(loader):
    output = agent.predict(features)
    output = nn.Sigmoid()(output)
    prediction = output > 0.65
    prediction = prediction.to(torch.int64).cpu()
    prediction = torch.tensor([torch.where(row == 1)[0].max() + 1 if row.sum() > 0 else 0 for row in prediction])
    prediction += 1
    
    df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
    df.set_index('ID', inplace=True)
    
    sub = pd.concat([sub, df])

  0%|          | 0/271 [00:00<?, ?it/s]

In [6]:
meta = pd.read_csv(metadata, index_col='ID')
subset = meta[np.logical_or(meta.damage != 'DR', np.logical_not(meta.growth_stage.isin(['F', 'M', 'V'])))]
sub[sub.index.isin(subset.index)] = 0

sub.extent *= 10

In [7]:
sub.to_csv('data/submission.csv', index='ID')

### leafy-shadow-36

In [8]:
net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=9)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

agent = InferenceAgent(net, device)
agent.load_weights('exp/leafy-shadow-36/BEST.PTH')

cuda
Weights are loaded from exp/leafy-shadow-36/BEST.PTH


In [9]:
shape = (3, 512, 512)
batch_size = estimate_maximum_batch_size(agent.model, device, shape)
print('batch_size :', batch_size)

root = 'data/test'
split = 'data/splits/test.csv'
metadata = 'data/test_meta.csv'

dataset = OrdinalBinDamageDataset(root, split, metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=2)

batch_size : 32


In [10]:
sub = pd.DataFrame(columns=['ID', 'extent'])
sub.set_index('ID', inplace=True)

In [11]:
for idx, features, target in tqdm(loader):
    output = agent.predict(features)
    output = nn.Sigmoid()(output)
    prediction = output > 0.7
    prediction = prediction.to(torch.int64).cpu()
    prediction = torch.tensor([torch.where(row == 1)[0].max() + 1 if row.sum() > 0 else 0 for row in prediction])
    prediction += 1
    
    df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
    df.set_index('ID', inplace=True)
    
    sub = pd.concat([sub, df])

  0%|          | 0/271 [00:00<?, ?it/s]

In [12]:
meta = pd.read_csv(metadata, index_col='ID')
subset = meta[np.logical_or(meta.damage != 'DR', np.logical_not(meta.growth_stage.isin(['F', 'M', 'V'])))]
sub[sub.index.isin(subset.index)] = 0

sub.extent *= 10

In [13]:
sub.to_csv('data/submission.csv', index='ID')

### worthy-lake-40

In [14]:
net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=9)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

agent = InferenceAgent(net, device)
agent.load_weights('exp/worthy-lake-40/BEST.PTH')

cuda
Weights are loaded from exp/worthy-lake-40/BEST.PTH


In [15]:
shape = (3, 512, 512)
batch_size = estimate_maximum_batch_size(agent.model, device, shape)
print('batch_size :', batch_size)

root = 'data/test'
split = 'data/splits/test.csv'
metadata = 'data/test_meta.csv'

dataset = OrdinalBinDamageDataset(root, split, metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=2)

batch_size : 32


In [16]:
sub = pd.DataFrame(columns=['ID', 'extent'])
sub.set_index('ID', inplace=True)

In [17]:
for idx, features, target in tqdm(loader):
    output = agent.predict(features)
    output = nn.Sigmoid()(output)
    prediction = output > 0.7
    prediction = prediction.to(torch.int64).cpu()
    prediction = torch.tensor([torch.where(row == 1)[0].max() + 1 if row.sum() > 0 else 0 for row in prediction])
    prediction += 1
    
    df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
    df.set_index('ID', inplace=True)
    
    sub = pd.concat([sub, df])

  0%|          | 0/271 [00:00<?, ?it/s]

In [18]:
meta = pd.read_csv(metadata, index_col='ID')
subset = meta[np.logical_or(meta.damage != 'DR', np.logical_not(meta.growth_stage.isin(['F', 'M', 'V'])))]
sub[sub.index.isin(subset.index)] = 0

sub.extent *= 10

In [19]:
sub.to_csv('data/submission.csv', index='ID')

### Split over season (autumn-haze-44, misunderstood-blaze-49, leafy-shadow-36)

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.cuda.empty_cache()
gc.collect()

37

In [14]:
lr2020_model = timm.create_model("efficientnet_b0", pretrained=True, num_classes=8)
lr2020_agent = InferenceAgent(lr2020_model, device)
lr2020_agent.load_weights('exp/autumn-haze-44/BEST.PTH')

lr2021_model = timm.create_model("efficientnet_b0", pretrained=True, num_classes=8)
lr2021_agent = InferenceAgent(lr2021_model, device)
lr2021_agent.load_weights('exp/misunderstood-blaze-49/BEST.PTH')

sr2020_model = timm.create_model("efficientnet_b0", pretrained=True, num_classes=9)
sr2020_agent = InferenceAgent(sr2020_model, device)
sr2020_agent.load_weights('exp/leafy-shadow-36/BEST.PTH')

sr2021_model = timm.create_model("efficientnet_b0", pretrained=True, num_classes=9)
sr2021_agent = InferenceAgent(sr2021_model, device)
sr2021_agent.load_weights('exp/leafy-shadow-36/BEST.PTH')

Weights are loaded from exp/autumn-haze-44/BEST.PTH
Weights are loaded from exp/misunderstood-blaze-49/BEST.PTH
Weights are loaded from exp/leafy-shadow-36/BEST.PTH
Weights are loaded from exp/leafy-shadow-36/BEST.PTH


In [15]:
agents = {
    'LR2020' : lr2020_agent,
    'LR2021' : lr2021_agent,
    'SR2020' : sr2020_agent,
    'SR2021' : sr2021_agent
}

thresholds = {
    'LR2020' : 0.6,
    'LR2021' : 0.45,
    'SR2020' : 0.7,
    'SR2021' : 0.7
}    

In [16]:
shape = (3, 512, 512)

root = 'data/test'
metadata = pd.read_csv('data/test_meta.csv', index_col='ID')

In [17]:
prediction_df = pd.DataFrame(columns=['ID', 'extent'])
prediction_df.set_index('ID', inplace=True)

In [18]:
for key in tqdm(agents.keys()):
    torch.cuda.empty_cache()
    gc.collect()

    batch_size = estimate_maximum_batch_size(agents[key].model, device, shape)

    split = pd.DataFrame({'ID' : metadata[metadata.season == key].index})
    split.set_index('ID', inplace=True)
    dataset = DamageDataset(root, split, metadata)
    loader = DataLoader(dataset, batch_size=batch_size, num_workers=4)

    prediction_df_part = pd.DataFrame(columns=['ID', 'extent'])
    prediction_df_part.set_index('ID', inplace=True)

    for idx, features, _ in tqdm(loader):
        output = agents[key].predict(features)
        output = nn.Sigmoid()(output)
        prediction = output > thresholds[key]
        prediction = prediction.to(torch.int64).cpu()
        prediction = torch.tensor([torch.where(row == 1)[0].max() + 1 if row.sum() > 0 else 0 for row in prediction])
        prediction += 1
        
        df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
        df.set_index('ID', inplace=True)
        
        prediction_df_part = pd.concat([prediction_df_part, df])

    prediction_df = pd.concat([prediction_df, prediction_df_part])

index = metadata[np.logical_or(metadata.damage != 'DR', np.logical_not(metadata.growth_stage.isin(['F', 'M', 'V'])))].index
prediction_df.loc[index] = 0
prediction_df.extent *= 10

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

In [19]:
prediction_df.to_csv('data/submission.csv', index='ID')

## NEW

### ZINDI-2

In [2]:
net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

agent = InferenceAgent(net, device)
agent.load_weights('exp/ZINDI-2/BEST.PTH')

cuda
Weights are loaded from exp/ZINDI-2/BEST.PTH


In [4]:
shape = (3, 512, 512)
batch_size = estimate_maximum_batch_size(agent.model, device, shape)
print('batch_size :', batch_size)

root = 'data/test'
split = pd.read_csv('data/splits/test.csv', index_col='ID')
metadata = pd.read_csv('data/test_meta.csv', index_col='ID')

dataset = OrdinalBinDamageDataset(root, split, metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=1)

batch_size : 32


In [5]:
sub = pd.DataFrame(columns=['ID', 'extent'])
sub.set_index('ID', inplace=True)

In [7]:
for idx, features, _ in tqdm(loader):
    output = agent.predict(features)
    output = nn.Sigmoid()(output)
    prediction = output > 0.6
    prediction = prediction.to(torch.int64).cpu()
    prediction = vector2prediction(prediction)
    
    df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
    df.set_index('ID', inplace=True)
    
    sub = pd.concat([sub, df])

sub.extent *= 10

  0%|          | 0/271 [00:00<?, ?it/s]

In [8]:
sub.to_csv('data/submission.csv', index='ID')

## ZINDI-6 + ZINDI-7 / 8

In [24]:
net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

binary_agent = InferenceAgent(net, device)
binary_agent.load_weights('exp/ZINDI-6/BEST.PTH')

net = timm.create_model("efficientnet_b0", pretrained=True, num_classes=9)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

agent = InferenceAgent(net, device)
agent.load_weights('exp/ZINDI-7/BEST.PTH')

Weights are loaded from exp/ZINDI-6/BEST.PTH
Weights are loaded from exp/ZINDI-7/BEST.PTH


In [69]:
shape = (3, 512, 512)
batch_size = estimate_maximum_batch_size(agent.model, device, shape)
print('batch_size :', batch_size)

root = 'data/test'
split = pd.read_csv('data/splits/test.csv', index_col='ID')
metadata = pd.read_csv('data/test_meta.csv', index_col='ID')

dataset = BinaryDamageDataset(root, split, metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=4)

batch_size : 128


In [11]:
bin_split = pd.DataFrame(columns=['ID', 'class'])
bin_split.set_index('ID', inplace=True)

In [12]:
for idx, features, _ in tqdm(loader):
    output = binary_agent.predict(features)
    output = nn.Sigmoid()(output)
    prediction = output > 0.3
    prediction = prediction.to(torch.int64).cpu()
    prediction = torch.squeeze(prediction, dim=1)
    
    df = pd.DataFrame(data={'ID' : idx, 'class' : prediction})
    df.set_index('ID', inplace=True)
    
    bin_split = pd.concat([bin_split, df])

  0%|          | 0/68 [00:00<?, ?it/s]

In [71]:
dataset = OrdinalBinDamageDataset(root, split.loc[bin_split['class'] == 1], metadata)
loader = DataLoader(dataset, batch_size=batch_size, num_workers=4)

In [72]:
result = pd.DataFrame(columns=['ID', 'extent'])
result.set_index('ID', inplace=True)

In [73]:
for idx, features, _ in tqdm(loader):
    output = agent.predict(features)
    output = nn.Sigmoid()(output)
    prediction = output > 0.7
    prediction = prediction.to(torch.int64).cpu()
    prediction = vector2prediction(prediction)
    prediction += 1
    
    df = pd.DataFrame(data={'ID' : idx, 'extent' : prediction})
    df.set_index('ID', inplace=True)
    
    result = pd.concat([result, df])

result.extent *= 10

  0%|          | 0/20 [00:00<?, ?it/s]

In [74]:
df = pd.DataFrame(data={'ID' : split.loc[bin_split['class'] == 0].index.values, 'extent' : 0}).set_index('ID')
result = pd.concat([result, df])
result.to_csv('data/submission.csv', index='ID')

In [100]:
def get_costs_proportional(n_classes, distance):
    indices = torch.arange(n_classes).to(torch.float32)
    return distance(indices.unsqueeze(1), indices)

In [110]:
dist = lambda x,y : torch.abs(x - y)
n_classes = 10

get_costs_proportional(n_classes, dist)

tensor([[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [1., 0., 1., 2., 3., 4., 5., 6., 7., 8.],
        [2., 1., 0., 1., 2., 3., 4., 5., 6., 7.],
        [3., 2., 1., 0., 1., 2., 3., 4., 5., 6.],
        [4., 3., 2., 1., 0., 1., 2., 3., 4., 5.],
        [5., 4., 3., 2., 1., 0., 1., 2., 3., 4.],
        [6., 5., 4., 3., 2., 1., 0., 1., 2., 3.],
        [7., 6., 5., 4., 3., 2., 1., 0., 1., 2.],
        [8., 7., 6., 5., 4., 3., 2., 1., 0., 1.],
        [9., 8., 7., 6., 5., 4., 3., 2., 1., 0.]])

In [113]:
str(dist)

'<function <lambda> at 0x155398901b40>'

In [116]:
torch.cat([torch.tensor([1]), torch.tensor([0])])

tensor([1, 0])

In [119]:
torch.tensor([torch.tensor(0), torch.tensor(1)])

tensor([0, 1])