# Ensembling: find the most frequent label for each sample from public notebooks

This notebook presents an automated ensembling using predicted results from the most relevant public notebooks. 
Please save all the upvotes for them.

In [None]:
import os
import numpy as np
import pandas as pd
from scipy import stats
import plotly.express as px

In [None]:
targetName = 'Cover_Type'
competitionDir = '/kaggle/input/tabular-playground-series-dec-2021'
submission = pd.read_csv('../input/tabular-playground-series-dec-2021/sample_submission.csv')

# Import any number of public notebooks to update the ensemble prediction 

In [None]:
preds = []
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if (dirname != competitionDir) & ('.csv' in filename):
            df = pd.read_csv(os.path.join(dirname, filename))
            preds.append(df[targetName]) 

# Save ensemble prediction to csv

In [None]:
#submission['Cover_Type'] = stats.mode(np.array(preds), axis=0)[0].transpose()
submission['Cover_Type'] = np.max(np.array(preds), axis=0).transpose()
submission.to_csv("submission.csv", index=False)

# Distribution of the predicted classes

In [None]:
target_df = pd.DataFrame(np.log(submission[targetName].value_counts())).reset_index()
target_df.columns = [targetName, 'Log count']
fig = px.bar(data_frame = target_df, 
             x = 'Cover_Type',
             y = 'Log count' , 
             color = "Log count",
             color_continuous_scale="Emrld") 
fig.show()