In [1]:
import os
import glob

import numpy as np
import pandas as pd

In [2]:
pred_paths = sorted([x for x in glob.glob('../input/*/*_prediction*.csv')])
pred_paths

['../input/fastai2-resnet101/fastai2_prediction.csv',
 '../input/vgg16/vgg16_predictions.csv']

In [3]:
weights = [0.98731, 0.94963]
weights

[0.98731, 0.94963]

In [4]:
dfs = [pd.read_csv(x) for x in pred_paths]
dfs = [x.sort_values(by='image_id').reset_index(drop=True) for x in dfs]
dfs

[        image_id  bacterial_leaf_blight  bacterial_leaf_streak  \
 0     200001.jpg           2.864758e-05           1.162965e-06   
 1     200002.jpg           1.278784e-06           4.071217e-08   
 2     200003.jpg           1.140360e-05           5.341411e-06   
 3     200004.jpg           4.141745e-04           3.266747e-04   
 4     200005.jpg           2.544002e-04           5.678924e-06   
 ...          ...                    ...                    ...   
 3464  203465.jpg           1.433615e-07           1.588797e-06   
 3465  203466.jpg           2.398059e-05           2.581897e-07   
 3466  203467.jpg           8.803228e-07           1.477485e-07   
 3467  203468.jpg           4.722792e-08           9.993249e-01   
 3468  203469.jpg           2.698272e-10           2.878642e-13   
 
       bacterial_panicle_blight         blast    brown_spot    dead_heart  \
 0                 2.744146e-06  7.535722e-07  1.351228e-04  1.414751e-05   
 1                 2.363348e-07  6.78394

In [5]:
class_names = dfs[0].columns.drop('image_id')
class_names

Index(['bacterial_leaf_blight', 'bacterial_leaf_streak',
       'bacterial_panicle_blight', 'blast', 'brown_spot', 'dead_heart',
       'downy_mildew', 'hispa', 'normal', 'tungro'],
      dtype='object')

In [6]:
for df in dfs:
    df[class_names] = np.clip(df[class_names], 0, 1)

In [7]:
submit = pd.read_csv('../input/paddy-disease-classification/sample_submission.csv')
submit[class_names] = 0
submit.sort_values(by='image_id').reset_index(drop=True)

Unnamed: 0,image_id,label,bacterial_leaf_blight,bacterial_leaf_streak,bacterial_panicle_blight,blast,brown_spot,dead_heart,downy_mildew,hispa,normal,tungro
0,200001.jpg,,0,0,0,0,0,0,0,0,0,0
1,200002.jpg,,0,0,0,0,0,0,0,0,0,0
2,200003.jpg,,0,0,0,0,0,0,0,0,0,0
3,200004.jpg,,0,0,0,0,0,0,0,0,0,0
4,200005.jpg,,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
3464,203465.jpg,,0,0,0,0,0,0,0,0,0,0
3465,203466.jpg,,0,0,0,0,0,0,0,0,0,0
3466,203467.jpg,,0,0,0,0,0,0,0,0,0,0
3467,203468.jpg,,0,0,0,0,0,0,0,0,0,0


In [8]:
# weighted average
for df, weight in zip(dfs, weights):
    submit[class_names] += (df[class_names] * weight)

submit[class_names] /= np.sum(weights)

In [9]:
# rank average
from scipy.stats import rankdata

ranking_weights = rankdata(weights)
ranking_weights /= np.sum(ranking_weights)

for df, weight in zip(dfs, ranking_weights):
    submit[class_names] += (df[class_names] * weight)

In [10]:
submit['label'] = class_names[np.argmax(np.array(submit[class_names]), axis=1)]

submit[['image_id', 'label']].to_csv('submission.csv', index=False)