In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import polars as pl

In [26]:
sub_path_006 = '/kaggle/input/leap-pytorch-1m-f64-batch006/submission.csv'
sub_path_007 = '/kaggle/input/leap-pytorch-1m-f64-batch007/submission.csv'
sub_path_008 = '/kaggle/input/leap-pytorch-1m-f64-batch008/submission.csv'
sub_path_009 = '/kaggle/input/leap-pytorch-1m-f64-batch009/submission.csv'
sub_path_010 = '/kaggle/input/leap-pytorch-1m-f64-batch010/submission.csv'


# Paths to submission files
submission_paths = [
    sub_path_006,
    sub_path_007,
    sub_path_008,
    sub_path_009,
    sub_path_010
]

In [27]:
# Read the sample_id column from the first submission file
sample_id_col = pl.read_csv('/kaggle/input/leap-atmospheric-physics-ai-climsim/sample_submission.csv', n_threads=1)['sample_id']
sample_id_col = sample_id_col.to_pandas()

In [29]:
# Load the header from the first submission file
header = pd.read_csv(submission_paths[0], nrows=0).columns.tolist()

In [4]:
def ensemble_submissions(submission_paths):
    """
    Ensemble predictions from multiple submission files.

    Parameters
    ----------
    submission_paths : list of str
        List of file paths to submission CSV files.

    Returns
    -------
    np.ndarray
        The ensembled predictions.
    """
    # Read and ensemble predictions from each submission file
    ensemble_predictions = None
    for path in submission_paths:
        submission_df = pl.read_csv(path)
        predictions = submission_df[:, 1:].to_numpy()  # Exclude the first column (sample_id)
        if ensemble_predictions is None:
            ensemble_predictions = predictions
        else:
            ensemble_predictions += predictions

        # Delete submission_df to free up memory
        del submission_df

    # Average ensemble predictions
    ensemble_predictions /= len(submission_paths)

    return ensemble_predictions


In [5]:
# Ensemble predictions
ensemble_preds = ensemble_submissions(submission_paths)

In [30]:
# Create DataFrame from ensembled predictions
ensemble_df = pd.DataFrame(ensemble_preds, columns=header[1:])  # Exclude sample_id from header

ensemble_df

Unnamed: 0,ptend_t_0,ptend_t_1,ptend_t_2,ptend_t_3,ptend_t_4,ptend_t_5,ptend_t_6,ptend_t_7,ptend_t_8,ptend_t_9,...,ptend_v_58,ptend_v_59,cam_out_NETSW,cam_out_FLWDS,cam_out_PRECSC,cam_out_PRECC,cam_out_SOLS,cam_out_SOLL,cam_out_SOLSD,cam_out_SOLLD
0,-0.273603,-0.572617,-0.420769,-0.674980,-0.842079,-0.936971,-0.962081,-0.969052,-0.966484,-0.969105,...,-0.076521,0.052478,0.021993,5.208791,0.027036,0.005094,-0.003307,0.005161,0.005335,-0.020650
1,-0.207653,-0.482115,-0.389808,-0.685726,-0.896880,-0.981336,-0.971453,-0.964379,-0.971159,-0.993728,...,-0.060662,0.444818,0.049124,5.164723,0.047321,0.169649,0.008765,0.013382,0.008571,0.005768
2,-0.294149,-1.009206,-0.592350,-0.657142,-0.827764,-0.942111,-0.968362,-0.966161,-0.943443,-0.929282,...,-0.132954,-0.023342,0.001039,5.740270,0.006489,-0.020462,-0.007558,-0.018947,-0.004631,-0.017838
3,-0.344701,-0.846254,-0.594204,-0.659959,-0.833907,-0.977672,-1.017021,-0.996800,-0.945251,-0.929675,...,-0.190573,-0.225404,0.004372,5.785780,0.027768,0.266417,0.025222,0.010669,0.009350,-0.028798
4,-0.242013,-0.274682,-0.378127,-0.745073,-0.959155,-1.031197,-1.006845,-0.987113,-0.976434,-1.008134,...,0.714799,0.123089,0.056226,5.097706,-0.003922,0.153527,-0.002314,-0.010504,0.028519,0.049310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624995,1.220584,0.713749,1.057947,1.194688,1.350079,1.385391,1.351001,1.311565,1.315815,1.349023,...,0.115226,0.141908,2.194750,5.372446,0.023762,0.052938,2.068975,2.066922,1.593797,1.034804
624996,1.288521,0.736400,1.062245,1.254386,1.316720,1.310882,1.206583,1.135950,1.289046,1.433324,...,-0.066142,0.002354,1.787373,5.744978,0.019158,0.122284,1.265032,1.209005,2.380951,2.165696
624997,1.325629,0.435494,0.938963,1.181860,1.139313,0.994999,0.808248,0.768268,0.897555,0.982030,...,-0.080705,-0.082586,1.280887,5.685720,0.034610,0.043323,0.986527,0.977646,1.564780,1.282789
624998,1.352064,0.508826,0.943052,1.114957,1.237883,1.155951,1.001346,0.935503,0.974412,1.039783,...,0.239071,-0.070281,1.772131,5.427377,0.039169,0.009712,1.762941,1.804130,1.153101,0.621549


In [31]:
# Insert the sample_id column at the beginning of the DataFrame
ensemble_df.insert(0, 'sample_id', sample_id_col)

ensemble_df

Unnamed: 0,sample_id,ptend_t_0,ptend_t_1,ptend_t_2,ptend_t_3,ptend_t_4,ptend_t_5,ptend_t_6,ptend_t_7,ptend_t_8,...,ptend_v_58,ptend_v_59,cam_out_NETSW,cam_out_FLWDS,cam_out_PRECSC,cam_out_PRECC,cam_out_SOLS,cam_out_SOLL,cam_out_SOLSD,cam_out_SOLLD
0,test_169651,-0.273603,-0.572617,-0.420769,-0.674980,-0.842079,-0.936971,-0.962081,-0.969052,-0.966484,...,-0.076521,0.052478,0.021993,5.208791,0.027036,0.005094,-0.003307,0.005161,0.005335,-0.020650
1,test_524862,-0.207653,-0.482115,-0.389808,-0.685726,-0.896880,-0.981336,-0.971453,-0.964379,-0.971159,...,-0.060662,0.444818,0.049124,5.164723,0.047321,0.169649,0.008765,0.013382,0.008571,0.005768
2,test_634129,-0.294149,-1.009206,-0.592350,-0.657142,-0.827764,-0.942111,-0.968362,-0.966161,-0.943443,...,-0.132954,-0.023342,0.001039,5.740270,0.006489,-0.020462,-0.007558,-0.018947,-0.004631,-0.017838
3,test_403572,-0.344701,-0.846254,-0.594204,-0.659959,-0.833907,-0.977672,-1.017021,-0.996800,-0.945251,...,-0.190573,-0.225404,0.004372,5.785780,0.027768,0.266417,0.025222,0.010669,0.009350,-0.028798
4,test_484578,-0.242013,-0.274682,-0.378127,-0.745073,-0.959155,-1.031197,-1.006845,-0.987113,-0.976434,...,0.714799,0.123089,0.056226,5.097706,-0.003922,0.153527,-0.002314,-0.010504,0.028519,0.049310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624995,test_578220,1.220584,0.713749,1.057947,1.194688,1.350079,1.385391,1.351001,1.311565,1.315815,...,0.115226,0.141908,2.194750,5.372446,0.023762,0.052938,2.068975,2.066922,1.593797,1.034804
624996,test_395695,1.288521,0.736400,1.062245,1.254386,1.316720,1.310882,1.206583,1.135950,1.289046,...,-0.066142,0.002354,1.787373,5.744978,0.019158,0.122284,1.265032,1.209005,2.380951,2.165696
624997,test_88942,1.325629,0.435494,0.938963,1.181860,1.139313,0.994999,0.808248,0.768268,0.897555,...,-0.080705,-0.082586,1.280887,5.685720,0.034610,0.043323,0.986527,0.977646,1.564780,1.282789
624998,test_79382,1.352064,0.508826,0.943052,1.114957,1.237883,1.155951,1.001346,0.935503,0.974412,...,0.239071,-0.070281,1.772131,5.427377,0.039169,0.009712,1.762941,1.804130,1.153101,0.621549


In [33]:
# Write the ensembled DataFrame to a CSV file
pl.from_pandas(ensemble_df).write_csv('ensemble_submission.csv')

In [34]:
ensemble_df

Unnamed: 0,sample_id,ptend_t_0,ptend_t_1,ptend_t_2,ptend_t_3,ptend_t_4,ptend_t_5,ptend_t_6,ptend_t_7,ptend_t_8,...,ptend_v_58,ptend_v_59,cam_out_NETSW,cam_out_FLWDS,cam_out_PRECSC,cam_out_PRECC,cam_out_SOLS,cam_out_SOLL,cam_out_SOLSD,cam_out_SOLLD
0,test_169651,-0.273603,-0.572617,-0.420769,-0.674980,-0.842079,-0.936971,-0.962081,-0.969052,-0.966484,...,-0.076521,0.052478,0.021993,5.208791,0.027036,0.005094,-0.003307,0.005161,0.005335,-0.020650
1,test_524862,-0.207653,-0.482115,-0.389808,-0.685726,-0.896880,-0.981336,-0.971453,-0.964379,-0.971159,...,-0.060662,0.444818,0.049124,5.164723,0.047321,0.169649,0.008765,0.013382,0.008571,0.005768
2,test_634129,-0.294149,-1.009206,-0.592350,-0.657142,-0.827764,-0.942111,-0.968362,-0.966161,-0.943443,...,-0.132954,-0.023342,0.001039,5.740270,0.006489,-0.020462,-0.007558,-0.018947,-0.004631,-0.017838
3,test_403572,-0.344701,-0.846254,-0.594204,-0.659959,-0.833907,-0.977672,-1.017021,-0.996800,-0.945251,...,-0.190573,-0.225404,0.004372,5.785780,0.027768,0.266417,0.025222,0.010669,0.009350,-0.028798
4,test_484578,-0.242013,-0.274682,-0.378127,-0.745073,-0.959155,-1.031197,-1.006845,-0.987113,-0.976434,...,0.714799,0.123089,0.056226,5.097706,-0.003922,0.153527,-0.002314,-0.010504,0.028519,0.049310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624995,test_578220,1.220584,0.713749,1.057947,1.194688,1.350079,1.385391,1.351001,1.311565,1.315815,...,0.115226,0.141908,2.194750,5.372446,0.023762,0.052938,2.068975,2.066922,1.593797,1.034804
624996,test_395695,1.288521,0.736400,1.062245,1.254386,1.316720,1.310882,1.206583,1.135950,1.289046,...,-0.066142,0.002354,1.787373,5.744978,0.019158,0.122284,1.265032,1.209005,2.380951,2.165696
624997,test_88942,1.325629,0.435494,0.938963,1.181860,1.139313,0.994999,0.808248,0.768268,0.897555,...,-0.080705,-0.082586,1.280887,5.685720,0.034610,0.043323,0.986527,0.977646,1.564780,1.282789
624998,test_79382,1.352064,0.508826,0.943052,1.114957,1.237883,1.155951,1.001346,0.935503,0.974412,...,0.239071,-0.070281,1.772131,5.427377,0.039169,0.009712,1.762941,1.804130,1.153101,0.621549
