In [1]:
import pandas as pd

In [2]:
from pathlib import Path

In [3]:
from functools import reduce

In [4]:
submissions_path = Path('./submissions')

In [5]:
list(submissions_path.rglob('*.csv'))

[PosixPath('submissions/predictions_989.csv'),
 PosixPath('submissions/predictions_988.csv'),
 PosixPath('submissions/predictions_convnext_base_tta_009689.csv'),
 PosixPath('submissions/predictions_resenet34_tta_031095.csv'),
 PosixPath('submissions/predictions_resnet34.csv'),
 PosixPath('submissions/predictions_resnet152_tta_029743.csv'),
 PosixPath('submissions/predictions_convnext_base_tta_0040558.csv'),
 PosixPath('submissions/predictions_987.csv')]

In [6]:
dfs = [pd.read_csv(f) for f in submissions_path.rglob('*.csv')]

In [7]:
for i, df in enumerate(dfs):
    df.rename(columns = {'cultivar':f'cultivar_{i}'}, inplace = True)

In [8]:
len(dfs)

8

In [9]:
dfs[0].head()

Unnamed: 0,filename,cultivar_0
0,1320211956.png,PI_180348
1,1460752465.png,PI_22913
2,1437601570.png,PI_302252
3,1053653999.png,PI_152733
4,937061344.png,PI_63715


In [10]:
dfs[1].head()

Unnamed: 0,filename,cultivar_1
0,1320211956.png,PI_180348
1,1460752465.png,PI_22913
2,1437601570.png,PI_302252
3,1053653999.png,PI_152733
4,937061344.png,PI_63715


In [11]:
(dfs[0]['filename'] == dfs[1]['filename']).describe()

count     23639
unique        1
top        True
freq      23639
Name: filename, dtype: object

In [12]:
df_merged = reduce(lambda left,right: pd.merge(left,right,on=['filename'], how='outer'),
                   dfs)

In [13]:
df_merged.describe()

Unnamed: 0,filename,cultivar_0,cultivar_1,cultivar_2,cultivar_3,cultivar_4,cultivar_5,cultivar_6,cultivar_7
count,23639,23639,23639,23639,23639,23639,23639,23639,23639
unique,23639,100,100,100,100,100,100,100,100
top,1320211956.png,PI_175919,PI_175919,PI_196586,PI_152965,PI_92270,PI_152965,PI_152965,PI_175919
freq,1,706,749,573,641,609,656,538,679


In [14]:
df_merged.head()

Unnamed: 0,filename,cultivar_0,cultivar_1,cultivar_2,cultivar_3,cultivar_4,cultivar_5,cultivar_6,cultivar_7
0,1320211956.png,PI_180348,PI_180348,PI_180348,PI_180348,PI_180348,PI_180348,PI_180348,PI_180348
1,1460752465.png,PI_22913,PI_22913,PI_22913,PI_22913,PI_22913,PI_22913,PI_22913,PI_22913
2,1437601570.png,PI_302252,PI_302252,PI_302252,PI_302252,PI_302252,PI_302252,PI_302252,PI_302252
3,1053653999.png,PI_152733,PI_152733,PI_152733,PI_152733,PI_152733,PI_152733,PI_152733,PI_152733
4,937061344.png,PI_63715,PI_63715,PI_63715,PI_63715,PI_63715,PI_196583,PI_196583,PI_63715


In [15]:
df_merged.filter(regex="^cultivar_", axis=1)

Unnamed: 0,cultivar_0,cultivar_1,cultivar_2,cultivar_3,cultivar_4,cultivar_5,cultivar_6,cultivar_7
0,PI_180348,PI_180348,PI_180348,PI_180348,PI_180348,PI_180348,PI_180348,PI_180348
1,PI_22913,PI_22913,PI_22913,PI_22913,PI_22913,PI_22913,PI_22913,PI_22913
2,PI_302252,PI_302252,PI_302252,PI_302252,PI_302252,PI_302252,PI_302252,PI_302252
3,PI_152733,PI_152733,PI_152733,PI_152733,PI_152733,PI_152733,PI_152733,PI_152733
4,PI_63715,PI_63715,PI_63715,PI_63715,PI_63715,PI_196583,PI_196583,PI_63715
...,...,...,...,...,...,...,...,...
23634,PI_156330,PI_156330,PI_156330,PI_156330,PI_156330,PI_156330,PI_156330,PI_156330
23635,PI_156463,PI_156463,PI_155760,PI_167093,PI_156463,PI_156463,PI_297155,PI_156463
23636,PI_156487,PI_156487,PI_156487,PI_156487,PI_156487,PI_303658,PI_156487,PI_156487
23637,PI_152923,PI_152923,PI_152923,PI_152923,PI_152923,PI_152923,PI_152923,PI_152923


In [16]:
df_merged.filter(regex="^cultivar_", axis=1).iloc[4].describe()

count            8
unique           2
top       PI_63715
freq             6
Name: 4, dtype: object

In [17]:
df_merged.filter(regex="^cultivar_", axis=1).mode(axis=1)[0]

0        PI_180348
1         PI_22913
2        PI_302252
3        PI_152733
4         PI_63715
           ...    
23634    PI_156330
23635    PI_156463
23636    PI_156487
23637    PI_152923
23638    PI_218112
Name: 0, Length: 23639, dtype: object

In [18]:
df_merged['cultivar'] = df_merged.filter(regex="^cultivar_", axis=1).mode(axis=1)[0]

In [19]:
df_merged[['filename', 'cultivar']]

Unnamed: 0,filename,cultivar
0,1320211956.png,PI_180348
1,1460752465.png,PI_22913
2,1437601570.png,PI_302252
3,1053653999.png,PI_152733
4,937061344.png,PI_63715
...,...,...
23634,1263467501.png,PI_156330
23635,1268280871.png,PI_156463
23636,833806909.png,PI_156487
23637,384418396.png,PI_152923


In [20]:
df_final = df_merged[['filename', 'cultivar']]

In [21]:
df_final.to_csv(submissions_path/'ensembled_04.csv', index=False)