In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import polars as pl

In [2]:
sub_path_001 = '/kaggle/input/leap-pytorch-1m-f64-batch001/submission.csv'
sub_path_002 = '/kaggle/input/leap-pytorch-1m-f64-batch002/submission.csv'
sub_path_003 = '/kaggle/input/leap-pytorch-1m-f64-batch003/submission.csv'
sub_path_004 = '/kaggle/input/leap-pytorch-1m-f64-batch004/submission.csv'
sub_path_005 = '/kaggle/input/leap-pytorch-1m-f64-batch005/submission.csv'


# Paths to submission files
submission_paths = [
    sub_path_001,
    sub_path_002,
    sub_path_003,
    sub_path_004,
    sub_path_005
]

In [3]:
# Read the sample_id column from the first submission file
sample_id_col = pl.read_csv('/kaggle/input/leap-atmospheric-physics-ai-climsim/sample_submission.csv', n_threads=1)['sample_id']
sample_id_col = sample_id_col.to_pandas()

In [4]:
# Load the header from the first submission file
header = pd.read_csv(submission_paths[0], nrows=0).columns.tolist()

In [5]:
def ensemble_submissions(submission_paths):
    """
    Ensemble predictions from multiple submission files.

    Parameters
    ----------
    submission_paths : list of str
        List of file paths to submission CSV files.

    Returns
    -------
    np.ndarray
        The ensembled predictions.
    """
    # Read and ensemble predictions from each submission file
    ensemble_predictions = None
    for path in submission_paths:
        submission_df = pl.read_csv(path)
        predictions = submission_df[:, 1:].to_numpy()  # Exclude the first column (sample_id)
        if ensemble_predictions is None:
            ensemble_predictions = predictions
        else:
            ensemble_predictions += predictions

        # Delete submission_df to free up memory
        del submission_df

    # Average ensemble predictions
    ensemble_predictions /= len(submission_paths)

    return ensemble_predictions


In [6]:
# Ensemble predictions
ensemble_preds = ensemble_submissions(submission_paths)

In [7]:
# Create DataFrame from ensembled predictions
ensemble_df = pd.DataFrame(ensemble_preds, columns=header[1:])  # Exclude sample_id from header

ensemble_df

Unnamed: 0,ptend_t_0,ptend_t_1,ptend_t_2,ptend_t_3,ptend_t_4,ptend_t_5,ptend_t_6,ptend_t_7,ptend_t_8,ptend_t_9,...,ptend_v_58,ptend_v_59,cam_out_NETSW,cam_out_FLWDS,cam_out_PRECSC,cam_out_PRECC,cam_out_SOLS,cam_out_SOLL,cam_out_SOLSD,cam_out_SOLLD
0,-0.207607,-0.456919,-0.408766,-0.559131,-0.749744,-0.882443,-0.920104,-0.923119,-0.935302,-0.932201,...,-0.111695,0.085450,0.009031,5.181016,0.012960,0.004282,0.002685,0.018982,0.013551,-0.016591
1,-0.172122,-0.378285,-0.488326,-0.651977,-0.858462,-0.963757,-0.966721,-0.966961,-0.985602,-0.988206,...,-0.197619,0.402244,0.019022,5.152816,0.053962,0.132203,-0.000677,0.018833,0.012775,0.021072
2,-0.202661,-0.872962,-0.582133,-0.574082,-0.743102,-0.888217,-0.945246,-0.945113,-0.936466,-0.925673,...,-0.175747,-0.039109,-0.024206,5.679336,0.032037,0.031334,0.000412,0.006414,-0.017632,-0.031507
3,-0.237251,-0.693257,-0.610592,-0.583984,-0.818570,-0.964228,-0.977112,-0.952296,-0.951152,-0.946074,...,-0.296166,-0.324240,-0.013464,5.717164,0.088412,0.223871,0.016261,0.018099,-0.000331,-0.026627
4,-0.156448,-0.225488,-0.429392,-0.702776,-0.893046,-0.974232,-0.966059,-0.969786,-0.978677,-0.972550,...,0.403248,0.083554,-0.001426,5.071438,0.092818,0.152306,-0.008454,0.015521,0.013201,0.033666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624995,1.266020,0.695734,0.973722,1.091054,1.245688,1.246317,1.139073,1.070941,1.083540,1.145589,...,0.063423,0.120819,1.840593,5.453213,0.069374,0.016133,1.726208,1.738164,1.657493,1.207371
624996,1.319401,0.589996,0.938204,1.170623,1.259272,1.195334,1.033243,0.969609,1.098798,1.245984,...,-0.029409,-0.011828,1.417029,5.662967,0.047727,0.062225,1.031641,1.018766,2.267198,2.097136
624997,1.260162,0.361503,0.853741,1.102112,1.043872,0.848883,0.635235,0.617563,0.741779,0.834292,...,-0.129659,-0.133872,1.005441,5.696776,-0.000525,0.012759,0.781760,0.793454,1.458184,1.259734
624998,1.415583,0.371745,0.865894,1.028315,1.120754,1.029779,0.864377,0.815570,0.848167,0.905469,...,0.133311,-0.185103,1.409218,5.348567,0.108954,0.018211,1.366972,1.410080,1.204850,0.793381


In [8]:
# Insert the sample_id column at the beginning of the DataFrame
ensemble_df.insert(0, 'sample_id', sample_id_col)

ensemble_df

Unnamed: 0,sample_id,ptend_t_0,ptend_t_1,ptend_t_2,ptend_t_3,ptend_t_4,ptend_t_5,ptend_t_6,ptend_t_7,ptend_t_8,...,ptend_v_58,ptend_v_59,cam_out_NETSW,cam_out_FLWDS,cam_out_PRECSC,cam_out_PRECC,cam_out_SOLS,cam_out_SOLL,cam_out_SOLSD,cam_out_SOLLD
0,test_169651,-0.207607,-0.456919,-0.408766,-0.559131,-0.749744,-0.882443,-0.920104,-0.923119,-0.935302,...,-0.111695,0.085450,0.009031,5.181016,0.012960,0.004282,0.002685,0.018982,0.013551,-0.016591
1,test_524862,-0.172122,-0.378285,-0.488326,-0.651977,-0.858462,-0.963757,-0.966721,-0.966961,-0.985602,...,-0.197619,0.402244,0.019022,5.152816,0.053962,0.132203,-0.000677,0.018833,0.012775,0.021072
2,test_634129,-0.202661,-0.872962,-0.582133,-0.574082,-0.743102,-0.888217,-0.945246,-0.945113,-0.936466,...,-0.175747,-0.039109,-0.024206,5.679336,0.032037,0.031334,0.000412,0.006414,-0.017632,-0.031507
3,test_403572,-0.237251,-0.693257,-0.610592,-0.583984,-0.818570,-0.964228,-0.977112,-0.952296,-0.951152,...,-0.296166,-0.324240,-0.013464,5.717164,0.088412,0.223871,0.016261,0.018099,-0.000331,-0.026627
4,test_484578,-0.156448,-0.225488,-0.429392,-0.702776,-0.893046,-0.974232,-0.966059,-0.969786,-0.978677,...,0.403248,0.083554,-0.001426,5.071438,0.092818,0.152306,-0.008454,0.015521,0.013201,0.033666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624995,test_578220,1.266020,0.695734,0.973722,1.091054,1.245688,1.246317,1.139073,1.070941,1.083540,...,0.063423,0.120819,1.840593,5.453213,0.069374,0.016133,1.726208,1.738164,1.657493,1.207371
624996,test_395695,1.319401,0.589996,0.938204,1.170623,1.259272,1.195334,1.033243,0.969609,1.098798,...,-0.029409,-0.011828,1.417029,5.662967,0.047727,0.062225,1.031641,1.018766,2.267198,2.097136
624997,test_88942,1.260162,0.361503,0.853741,1.102112,1.043872,0.848883,0.635235,0.617563,0.741779,...,-0.129659,-0.133872,1.005441,5.696776,-0.000525,0.012759,0.781760,0.793454,1.458184,1.259734
624998,test_79382,1.415583,0.371745,0.865894,1.028315,1.120754,1.029779,0.864377,0.815570,0.848167,...,0.133311,-0.185103,1.409218,5.348567,0.108954,0.018211,1.366972,1.410080,1.204850,0.793381


In [9]:
# Write the ensembled DataFrame to a CSV file
pl.from_pandas(ensemble_df).write_csv('ensemble_submission.csv')

In [10]:
ensemble_df

Unnamed: 0,sample_id,ptend_t_0,ptend_t_1,ptend_t_2,ptend_t_3,ptend_t_4,ptend_t_5,ptend_t_6,ptend_t_7,ptend_t_8,...,ptend_v_58,ptend_v_59,cam_out_NETSW,cam_out_FLWDS,cam_out_PRECSC,cam_out_PRECC,cam_out_SOLS,cam_out_SOLL,cam_out_SOLSD,cam_out_SOLLD
0,test_169651,-0.207607,-0.456919,-0.408766,-0.559131,-0.749744,-0.882443,-0.920104,-0.923119,-0.935302,...,-0.111695,0.085450,0.009031,5.181016,0.012960,0.004282,0.002685,0.018982,0.013551,-0.016591
1,test_524862,-0.172122,-0.378285,-0.488326,-0.651977,-0.858462,-0.963757,-0.966721,-0.966961,-0.985602,...,-0.197619,0.402244,0.019022,5.152816,0.053962,0.132203,-0.000677,0.018833,0.012775,0.021072
2,test_634129,-0.202661,-0.872962,-0.582133,-0.574082,-0.743102,-0.888217,-0.945246,-0.945113,-0.936466,...,-0.175747,-0.039109,-0.024206,5.679336,0.032037,0.031334,0.000412,0.006414,-0.017632,-0.031507
3,test_403572,-0.237251,-0.693257,-0.610592,-0.583984,-0.818570,-0.964228,-0.977112,-0.952296,-0.951152,...,-0.296166,-0.324240,-0.013464,5.717164,0.088412,0.223871,0.016261,0.018099,-0.000331,-0.026627
4,test_484578,-0.156448,-0.225488,-0.429392,-0.702776,-0.893046,-0.974232,-0.966059,-0.969786,-0.978677,...,0.403248,0.083554,-0.001426,5.071438,0.092818,0.152306,-0.008454,0.015521,0.013201,0.033666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624995,test_578220,1.266020,0.695734,0.973722,1.091054,1.245688,1.246317,1.139073,1.070941,1.083540,...,0.063423,0.120819,1.840593,5.453213,0.069374,0.016133,1.726208,1.738164,1.657493,1.207371
624996,test_395695,1.319401,0.589996,0.938204,1.170623,1.259272,1.195334,1.033243,0.969609,1.098798,...,-0.029409,-0.011828,1.417029,5.662967,0.047727,0.062225,1.031641,1.018766,2.267198,2.097136
624997,test_88942,1.260162,0.361503,0.853741,1.102112,1.043872,0.848883,0.635235,0.617563,0.741779,...,-0.129659,-0.133872,1.005441,5.696776,-0.000525,0.012759,0.781760,0.793454,1.458184,1.259734
624998,test_79382,1.415583,0.371745,0.865894,1.028315,1.120754,1.029779,0.864377,0.815570,0.848167,...,0.133311,-0.185103,1.409218,5.348567,0.108954,0.018211,1.366972,1.410080,1.204850,0.793381
