In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import h5py
import cv2
from scipy.stats import rankdata
from tqdm import tqdm
from pathlib import Path
from uco.data_loader import pivot_df

In [3]:
pseudo_dir = Path('../data/pseudo')

In [4]:
def get_df():
    df = pd.read_csv('../data/raw/gibs.csv')
    df = pivot_df(df).reset_index()[['Image']]
    return df

In [16]:
with h5py.File(pseudo_dir / 'avg-seg-predictions.h5', 'r') as f:
    for k in f.keys():
        print(k)

deeplabv3_resnet101-DeepLabV3
efficientnet-b0-FPN
efficientnet-b0-Unet
efficientnet-b2-FPN
efficientnet-b2-Unet
efficientnet-b5-FPN
efficientnet-b5-Unet
efficientnet-b6-FPN
inceptionresnetv2-Unet
resnext101_32x8d-FPN
resnext101_32x8d-Unet


In [6]:
# Save prediction ranks based on segmentation models
with h5py.File(pseudo_dir / 'avg-seg-predictions.h5', 'r') as f:
    group_names = [k for k in f.keys() if k != 'average']
    for c in tqdm(list(range(4))):
        df = get_df()
        for k in group_names:
            preds = f[k][:, c, :, :]
            totals = np.sum(preds, axis=1)
            totals = np.sum(totals, axis=1)
            ranks = rankdata(totals)
            df[k] = ranks
        df.set_index('Image', inplace=True)
        df['mean'] = df.apply(lambda row: row.mean(), axis=1)
        df['rank'] = rankdata(df['mean'])
        df.to_csv(pseudo_dir / f'pseudo-segmentation-ranks-c{c}.csv')


  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:58<02:55, 58.47s/it][A
 50%|█████     | 2/4 [01:51<01:53, 56.79s/it][A
 75%|███████▌  | 3/4 [02:44<00:55, 55.79s/it][A
100%|██████████| 4/4 [03:37<00:00, 54.48s/it][A


In [17]:
with h5py.File(pseudo_dir / 'avg-clas-predictions.h5', 'r') as f:
    for k in f.keys():
        print(k)

average
efficientnet-b0-EfficientNet
efficientnet-b2-EfficientNet
efficientnet-b4-EfficientNet
resnext50d_32x4d-TIMM
tv_resnext50_32x4d-TIMM


In [8]:
# Save prediction ranks based on classification models
with h5py.File(pseudo_dir / 'avg-clas-predictions.h5', 'r') as f:
    k = 'average'
    for c in range(4):
        df = get_df()
        preds = f[k][:, c]
        ranks = rankdata(preds)
        df['rank'] = ranks
        df.to_csv(pseudo_dir / f'pseudo-classification-ranks-c{c}.csv', index=False)

In [14]:
df = get_df()
n_total = df.shape[0]
n_selected = 1500
cutoff = n_total - n_selected

for c in range(4):
    df_seg  = pd.read_csv(pseudo_dir / f'pseudo-segmentation-ranks-c{c}.csv')
    df_clas = pd.read_csv(pseudo_dir / f'pseudo-classification-ranks-c{c}.csv')
    
    df_seg['selected']  = df_seg['rank'] > cutoff
    df_clas['selected'] = df_clas['rank'] > cutoff
    df[str(c)] = np.logical_and(df_seg['selected'], df_clas['selected'])
df.to_csv(pseudo_dir / 'selected.csv', index=False)

In [13]:
for c in range(4):
    print(df[str(c)].sum())
df.head(60)

1339
1376
1329
1308


Unnamed: 0,Image,0,1,2,3
0,2002-01-01-domain-a-MODIS-Terra-CorrectedRefle...,True,False,False,False
1,2002-01-01-domain-b-MODIS-Terra-CorrectedRefle...,True,False,False,False
2,2002-01-01-domain-c-MODIS-Terra-CorrectedRefle...,False,True,True,True
3,2002-01-02-domain-a-MODIS-Terra-CorrectedRefle...,True,False,False,False
4,2002-01-02-domain-b-MODIS-Terra-CorrectedRefle...,True,False,True,False
5,2002-01-02-domain-c-MODIS-Terra-CorrectedRefle...,False,True,False,False
6,2002-01-03-domain-a-MODIS-Terra-CorrectedRefle...,True,True,False,False
7,2002-01-03-domain-b-MODIS-Terra-CorrectedRefle...,False,False,False,False
8,2002-01-03-domain-c-MODIS-Terra-CorrectedRefle...,False,True,False,False
9,2002-01-04-domain-a-MODIS-Terra-CorrectedRefle...,True,True,False,False
