In [32]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
import sys

sys.path.append("..")
import pandas as pd 
import numpy as np 
from src.processing import hrrr_data
from src.processing import nysm_data
from src.processing import get_error
from src.processing import normalize
from src.processing import get_flag

In [34]:
def columns_drop(df):
    df = df.drop(
        columns=[
            "level_0",
            "index_x",
            "index_y",
            "lead time",
            "lsm",
            "station_y",
            'lat',
            'lon'
        ]
    )
    df = df.rename(columns={'station_x':'station'})
    return df

In [35]:
def create_data_for_model():
    """
    This function creates and processes data for a LSTM machine learning model.

    Args:
        station (str): The station identifier for which data is being processed.

    Returns:
        new_df (pandas DataFrame): A DataFrame containing processed data.
        df_train (pandas DataFrame): A DataFrame for training the machine learning model.
        df_test (pandas DataFrame): A DataFrame for testing the machine learning model.
        features (list): A list of feature names.
        forecast_lead (int): The lead time for the target variable.
    """
    # load nysm data
    nysm_df = nysm_data.load_nysm_data()
    nysm_df.reset_index(inplace=True)
    nysm_df = nysm_df.rename(columns={"time_1H": "valid_time"})

    # load hrrr data
    hrrr_df = hrrr_data.read_hrrr_data()

    # Filter NYSM data to match valid times from HRRR data and save it to a CSV file.
    mytimes = hrrr_df["valid_time"].tolist()
    nysm_df = nysm_df[nysm_df["valid_time"].isin(mytimes)]

    # Filter data by NY climate division 
    nysm_cats_path = "/home/aevans/nwp_bias/src/landtype/data/nysm.csv"
    nysm_cats_df = pd.read_csv(nysm_cats_path)
    nysm_cats_df = nysm_cats_df[nysm_cats_df['climate_division_name']=='Western Plateau']
    stations = nysm_cats_df["stid"].tolist()
    nysm_df = nysm_df[nysm_df['station'].isin(stations)]
    hrrr_df = hrrr_df[hrrr_df['station'].isin(stations)]

    # merge dataframes so that each row is hrrr + nysm data for the same time step
    # do this for each station individually 
    for station in stations:
        nysm_df1 = nysm_df[nysm_df['station']==station]
        hrrr_df1 = hrrr_df[hrrr_df['station']==station]

        master_df = hrrr_df1.merge(nysm_df1, on="valid_time")
        master_df = master_df.drop_duplicates(
            subset=["valid_time", "t2m"], keep="first"
        )
        master_df = columns_drop(master_df)

        # Calculate the error using NWP data.
        master_df = get_error.nwp_error("t2m", master_df)
        # encode for day_of_year
        master_df = normalize.encode(master_df, 'day_of_year', 366)
        # get flag for non-consecutive time steps
        master_df = get_flag.get_flag(master_df)

        cols_to_carry = ['valid_time', 'station', 'latitude', 'longitude', 'flag']

        new_df = master_df.drop(columns=cols_to_carry)

        new_df, features = normalize.normalize_df(new_df)

        # Split the data into training and testing sets.
        length = len(new_df)
        test_len = int(length * 0.8)
        df_train = new_df.iloc[:test_len].copy()
        df_test = new_df.iloc[test_len:].copy()
        print("Test Set Fraction", len(df_test) / len(new_df))

        # Reintegrate the specified columns back into the training and testing DataFrames.
        for c in cols_to_carry:
            df_train[c] = master_df[c]
            df_test[c] = master_df[c]

    return df_train, df_test, features

In [36]:
df_train, df_test, features = create_data_for_model()

Test Set Fraction 0.2
Test Set Fraction 0.2000190562397275
Test Set Fraction 0.20001532684496898
Test Set Fraction 0.2
Test Set Fraction 0.20001971123047357
Test Set Fraction 0.2
Test Set Fraction 0.2000195069615469
Test Set Fraction 0.20001455710022564
Test Set Fraction 0.2000193208713713


In [37]:
nysm_df = nysm_data.load_nysm_data()
nysm_df.reset_index(inplace=True)
nysm_df = nysm_df.rename(columns={"time_1H": "valid_time"})
nysm_df.head()

Unnamed: 0,index,station,valid_time,lat,lon,elev,tair,ta9m,td,relh,srad,pres,mslp,wspd_sonic,wmax_sonic,wdir_sonic,precip_total,snow_depth
0,1,ADDI,2018-01-01 01:00:00,42.040359,-77.237259,507.614014,-18.23782,-18.122169,-22.090469,71.722794,0.0,964.492004,971.503723,1.456784,3.037127,325.875793,0.0,0.045877
1,2,ADDI,2018-01-01 02:00:00,42.040359,-77.237259,507.614014,-18.368231,-18.33906,-21.647659,75.388893,0.0,964.286804,971.328857,1.484595,2.211236,305.772797,0.0,0.046327
2,3,ADDI,2018-01-01 03:00:00,42.040359,-77.237259,507.614014,-18.8304,-18.33363,-21.994141,76.068916,0.0,964.409973,971.574524,1.247007,1.992487,309.544586,0.0,0.045029
3,4,ADDI,2018-01-01 04:00:00,42.040359,-77.237259,507.614014,-18.518641,-18.328711,-21.890549,74.752434,0.0,964.479187,971.563843,1.821808,3.12947,314.346588,0.0,0.047812
4,5,ADDI,2018-01-01 05:00:00,42.040359,-77.237259,507.614014,-18.635839,-18.578341,-21.847321,75.787628,0.0,964.581726,971.698914,1.780934,2.918397,294.888092,0.0,0.047251


In [38]:
hrrr_df = hrrr_data.read_hrrr_data()
hrrr_df.head()

Unnamed: 0,valid_time,time,station,level_0,index,latitude,longitude,t2m,sh2,d2m,...,asnow,cape,dswrf,dlwrf,gh,u_total,u_dir,lead time,lsm,new_tp
1,2018-01-01 03:00:00,2018-01-01 01:00:00,ADDI,59218879,326784,42.045955,-77.218867,-19.199194,0.00068,-21.993216,...,7e-06,0.0,0.0,146.800003,5215.425781,4.024367,304.386841,2,1.0,0.001
2,2018-01-01 04:00:00,2018-01-01 02:00:00,ADDI,59218880,532440,42.045955,-77.218867,-19.118689,0.00068,-21.91857,...,1e-06,0.0,0.0,147.100006,5210.410645,3.768112,298.900574,2,1.0,0.0
3,2018-01-01 05:00:00,2018-01-01 03:00:00,ADDI,59218881,738096,42.045955,-77.218867,-19.439062,0.00065,-22.570868,...,0.0,0.0,0.0,147.600006,5207.041992,3.118722,286.333313,2,1.0,0.0
4,2018-01-01 06:00:00,2018-01-01 04:00:00,ADDI,59218882,943752,42.045955,-77.218867,-19.760291,0.00063,-22.821158,...,0.0,0.0,0.0,146.899994,5201.477539,3.214206,284.222107,2,1.0,0.0
5,2018-01-01 07:00:00,2018-01-01 05:00:00,ADDI,59218883,1149408,42.045955,-77.218867,-19.860175,0.00062,-22.993704,...,9e-06,0.0,0.0,146.600006,5200.875,3.268753,270.951294,2,1.0,0.001


In [39]:
# Filter NYSM data to match valid times from HRRR data and save it to a CSV file.
mytimes = hrrr_df["valid_time"].tolist()
nysm_df = nysm_df[nysm_df["valid_time"].isin(mytimes)]

In [40]:
# Set the path for tabular data.
nysm_cats_path = "/home/aevans/nwp_bias/src/landtype/data/nysm.csv"
nysm_cats_df = pd.read_csv(nysm_cats_path)
nysm_cats_df = nysm_cats_df[nysm_cats_df['climate_division_name']=='Western Plateau']
nysm_cats_df

Unnamed: 0,stid,number,name,lat [degrees],lon [degrees],elevation [m],county,nearest_city,state,distance_from_town [km],direction_from_town [degrees],climate_division,climate_division_name,wfo,commissioned,decommissioned
0,ADDI,107,Addison,42.04036,-77.23726,507.614,Steuben,Addison,NY,6.9,S,1,Western Plateau,BGM,2016-08-10 18:15:00 UTC,
6,BELM,70,Belmont,42.24249,-78.03958,417.576,Allegany,Belmont,NY,2.2,N,1,Western Plateau,BUF,2016-05-16 20:45:00 UTC,
28,COHO,74,Cohocton,42.51178,-77.43762,599.328,Steuben,Cohocton,NY,5.8,E,1,Western Plateau,BGM,2016-05-17 16:30:00 UTC,
34,DELE,67,Delevan,42.418464,-78.4232,622.31,Cattaraugus,Delevan,NY,9.2,SSE,1,Western Plateau,BUF,2016-03-08 21:15:00 UTC,
43,ELMI,73,Elmira,42.11332,-76.83664,332.445,Chemung,Elmira,NY,3.5,NW,1,Western Plateau,BGM,2016-06-30 17:35:00 UTC,
51,GROV,71,Grove,42.48951,-77.94946,609.722,Allegany,Grove,NY,8.9,NW,1,Western Plateau,BUF,2016-07-20 18:40:00 UTC,
55,HART,40,Hartsville,42.211221,-77.689733,729.569,Steuben,Hartsville,NY,4.0,S,1,Western Plateau,BGM,2016-08-09 21:00:00 UTC,
73,OLEA,65,Olean,42.09141,-78.40743,457.23,Cattaraugus,Olean,NY,2.1,ENE,1,Western Plateau,BUF,2016-06-02 23:15:00 UTC,
85,RAND,125,Randolph,42.14928,-78.90096,448.97,Cattaraugus,Randolph,NY,6.3,ESE,1,Western Plateau,BUF,2016-06-02 15:50:00 UTC,


In [41]:
stations = nysm_cats_df["stid"].tolist()

In [42]:
nysm_df = nysm_df[nysm_df['station'].isin(stations)]
hrrr_df = hrrr_df[hrrr_df['station'].isin(stations)]

In [43]:
nysm_df.head()

Unnamed: 0,index,station,valid_time,lat,lon,elev,tair,ta9m,td,relh,srad,pres,mslp,wspd_sonic,wmax_sonic,wdir_sonic,precip_total,snow_depth
1,2,ADDI,2018-01-01 02:00:00,42.040359,-77.237259,507.614014,-18.368231,-18.33906,-21.647659,75.388893,0.0,964.286804,971.328857,1.484595,2.211236,305.772797,0.0,0.046327
2,3,ADDI,2018-01-01 03:00:00,42.040359,-77.237259,507.614014,-18.8304,-18.33363,-21.994141,76.068916,0.0,964.409973,971.574524,1.247007,1.992487,309.544586,0.0,0.045029
3,4,ADDI,2018-01-01 04:00:00,42.040359,-77.237259,507.614014,-18.518641,-18.328711,-21.890549,74.752434,0.0,964.479187,971.563843,1.821808,3.12947,314.346588,0.0,0.047812
4,5,ADDI,2018-01-01 05:00:00,42.040359,-77.237259,507.614014,-18.635839,-18.578341,-21.847321,75.787628,0.0,964.581726,971.698914,1.780934,2.918397,294.888092,0.0,0.047251
5,6,ADDI,2018-01-01 06:00:00,42.040359,-77.237259,507.614014,-18.700809,-18.628189,-22.008728,75.137451,0.0,964.307983,971.436768,1.593597,2.972308,300.071198,0.0,0.045546


In [44]:
hrrr_df.head()

Unnamed: 0,valid_time,time,station,level_0,index,latitude,longitude,t2m,sh2,d2m,...,asnow,cape,dswrf,dlwrf,gh,u_total,u_dir,lead time,lsm,new_tp
1,2018-01-01 03:00:00,2018-01-01 01:00:00,ADDI,59218879,326784,42.045955,-77.218867,-19.199194,0.00068,-21.993216,...,7e-06,0.0,0.0,146.800003,5215.425781,4.024367,304.386841,2,1.0,0.001
2,2018-01-01 04:00:00,2018-01-01 02:00:00,ADDI,59218880,532440,42.045955,-77.218867,-19.118689,0.00068,-21.91857,...,1e-06,0.0,0.0,147.100006,5210.410645,3.768112,298.900574,2,1.0,0.0
3,2018-01-01 05:00:00,2018-01-01 03:00:00,ADDI,59218881,738096,42.045955,-77.218867,-19.439062,0.00065,-22.570868,...,0.0,0.0,0.0,147.600006,5207.041992,3.118722,286.333313,2,1.0,0.0
4,2018-01-01 06:00:00,2018-01-01 04:00:00,ADDI,59218882,943752,42.045955,-77.218867,-19.760291,0.00063,-22.821158,...,0.0,0.0,0.0,146.899994,5201.477539,3.214206,284.222107,2,1.0,0.0
5,2018-01-01 07:00:00,2018-01-01 05:00:00,ADDI,59218883,1149408,42.045955,-77.218867,-19.860175,0.00062,-22.993704,...,9e-06,0.0,0.0,146.600006,5200.875,3.268753,270.951294,2,1.0,0.001


In [45]:
for station in stations:
    nysm_df1 = nysm_df[nysm_df['station']==station]
    hrrr_df1 = hrrr_df[hrrr_df['station']==station]

    master_df = hrrr_df1.merge(nysm_df1, on="valid_time")
    master_df = master_df.drop_duplicates(
        subset=["valid_time", "t2m"], keep="first"
    )

In [46]:
master_df = columns_drop(master_df)

In [47]:
master_df.head()

Unnamed: 0,valid_time,time,station,latitude,longitude,t2m,sh2,d2m,r2,u10,...,td,relh,srad,pres,mslp,wspd_sonic,wmax_sonic,wdir_sonic,precip_total,snow_depth
0,2018-01-01 02:00:00,2018-01-01 00:00:00,RAND,42.147687,-78.896589,-18.648886,0.00072,-21.375479,75.199997,0.586771,...,-22.885422,83.251266,0.006639,972.996277,980.008057,1.964329,2.306689,27.634211,0.0,0.228177
1,2018-01-01 03:00:00,2018-01-01 01:00:00,RAND,42.147687,-78.896589,-18.761694,0.0007,-21.680716,73.800003,0.258581,...,-21.205078,83.594307,0.03983,973.030396,979.651123,1.320795,1.612775,43.862209,0.0,0.22471
2,2018-01-01 04:00:00,2018-01-01 02:00:00,RAND,42.147687,-78.896589,-17.993689,0.00063,-22.91857,65.5,0.17383,...,-20.371979,84.004471,0.0,973.266418,979.707031,0.296874,0.750597,6.878823,0.0,0.222155
3,2018-01-01 05:00:00,2018-01-01 03:00:00,RAND,42.147687,-78.896589,-17.751562,0.00058,-23.758368,59.700001,0.367857,...,-20.614105,83.912849,0.0,973.322815,979.818481,0.805538,1.376129,84.777046,0.0,0.222909
4,2018-01-01 06:00:00,2018-01-01 04:00:00,RAND,42.147687,-78.896589,-18.510291,0.00057,-24.071158,61.799999,-0.321807,...,-20.956146,83.824417,0.0,973.285889,979.859253,0.512716,0.747293,151.348999,0.0,-999.0


In [48]:
# Calculate the error using NWP data.
master_df = get_error.nwp_error("t2m", master_df)

In [49]:
master_df = normalize.encode(master_df, 'day_of_year', 366)

In [50]:
master_df.head()

Unnamed: 0,valid_time,station,latitude,longitude,t2m,sh2,d2m,r2,u10,v10,...,pres,mslp,wspd_sonic,wmax_sonic,wdir_sonic,precip_total,snow_depth,target_error,day_of_year_sin,day_of_year_cos
0,2018-01-01 02:00:00,RAND,42.147687,-78.896589,-18.648886,0.00072,-21.375479,75.199997,0.586771,-1.505135,...,972.996277,980.008057,1.964329,2.306689,27.634211,0.0,0.228177,2.143324,0.017166,0.999853
1,2018-01-01 03:00:00,RAND,42.147687,-78.896589,-18.761694,0.0007,-21.680716,73.800003,0.258581,-1.585373,...,973.030396,979.651123,1.320795,1.612775,43.862209,0.0,0.22471,0.366146,0.017166,0.999853
2,2018-01-01 04:00:00,RAND,42.147687,-78.896589,-17.993689,0.00063,-22.91857,65.5,0.17383,-1.633596,...,973.266418,979.707031,0.296874,0.750597,6.878823,0.0,0.222155,0.343061,0.017166,0.999853
3,2018-01-01 05:00:00,RAND,42.147687,-78.896589,-17.751562,0.00058,-23.758368,59.700001,0.367857,-2.002062,...,973.322815,979.818481,0.805538,1.376129,84.777046,0.0,0.222909,0.818918,0.017166,0.999853
4,2018-01-01 06:00:00,RAND,42.147687,-78.896589,-18.510291,0.00057,-24.071158,61.799999,-0.321807,-2.039671,...,973.285889,979.859253,0.512716,0.747293,151.348999,0.0,-999.0,0.39616,0.017166,0.999853


In [51]:
master_df = get_flag.get_flag(master_df)

In [52]:
master_df

Unnamed: 0,valid_time,station,latitude,longitude,t2m,sh2,d2m,r2,u10,v10,...,mslp,wspd_sonic,wmax_sonic,wdir_sonic,precip_total,snow_depth,target_error,day_of_year_sin,day_of_year_cos,flag
0,2018-01-01 02:00:00,RAND,42.147687,-78.896589,-18.648886,0.00072,-21.375479,75.199997,0.586771,-1.505135,...,980.008057,1.964329,2.306689,27.634211,0.000000,0.228177,2.143324,0.017166,0.999853,True
1,2018-01-01 03:00:00,RAND,42.147687,-78.896589,-18.761694,0.00070,-21.680716,73.800003,0.258581,-1.585373,...,979.651123,1.320795,1.612775,43.862209,0.000000,0.224710,0.366146,0.017166,0.999853,True
2,2018-01-01 04:00:00,RAND,42.147687,-78.896589,-17.993689,0.00063,-22.918570,65.500000,0.173830,-1.633596,...,979.707031,0.296874,0.750597,6.878823,0.000000,0.222155,0.343061,0.017166,0.999853,True
3,2018-01-01 05:00:00,RAND,42.147687,-78.896589,-17.751562,0.00058,-23.758368,59.700001,0.367857,-2.002062,...,979.818481,0.805538,1.376129,84.777046,0.000000,0.222909,0.818918,0.017166,0.999853,True
4,2018-01-01 06:00:00,RAND,42.147687,-78.896589,-18.510291,0.00057,-24.071158,61.799999,-0.321807,-2.039671,...,979.859253,0.512716,0.747293,151.348999,0.000000,-999.000000,0.396160,0.017166,0.999853,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41401,2022-12-31 19:00:00,RAND,42.147687,-78.896589,10.360986,0.00806,9.892847,95.800003,1.636268,3.586514,...,955.739990,1.840831,2.695169,138.419998,0.220001,0.005335,0.118026,-0.017166,0.999853,True
41402,2022-12-31 20:00:00,RAND,42.147687,-78.896589,10.072198,0.00805,9.876611,98.000000,1.396407,3.167988,...,955.471375,1.999802,2.892289,139.574799,0.139999,0.004999,0.032629,-0.017166,0.999853,True
41403,2022-12-31 21:00:00,RAND,42.147687,-78.896589,9.783258,0.00789,9.601648,97.000000,0.734075,1.733395,...,955.417664,2.654339,3.307981,150.948807,0.470001,0.005784,-0.325782,-0.017166,0.999853,True
41404,2022-12-31 22:00:00,RAND,42.147687,-78.896589,9.931055,0.00800,9.770227,97.500000,1.472957,1.702705,...,955.383118,1.516677,2.378469,163.847397,0.250000,0.003574,0.376166,-0.017166,0.999853,True


In [53]:
cols_to_carry = ['valid_time', 'station', 'latitude', 'longitude', 'flag']

In [54]:
new_df = master_df.drop(columns=cols_to_carry)

In [55]:
new_df, features = normalize.normalize_df(new_df)

In [56]:
new_df.head()

Unnamed: 0,t2m,sh2,d2m,r2,u10,v10,tp,mslma,orog,tcc,...,pres,mslp,wspd_sonic,wmax_sonic,wdir_sonic,precip_total,snow_depth,target_error,day_of_year_sin,day_of_year_cos
0,-2.557626,-1.429098,-2.57565,0.1174,-0.047033,-0.910739,-0.183904,2.231703,-0.874702,-0.358101,...,1.349866,2.098271,-0.354461,-0.700216,-1.403964,-0.177397,1.00091,1.226911,0.035113,1.424087
1,-2.568167,-1.434002,-2.606419,0.032617,-0.156187,-0.947073,-0.176446,2.205937,-0.874702,-0.268108,...,1.354961,2.049276,-0.713321,-0.946045,-1.245878,-0.177397,1.000903,0.177932,0.035113,1.424087
2,-2.496404,-1.451166,-2.731196,-0.470025,-0.184375,-0.96891,-0.188378,2.175876,-0.874702,-0.532281,...,1.390207,2.05695,-1.2843,-1.251483,-1.606156,-0.177397,1.000898,0.164307,0.035113,1.424087
3,-2.47378,-1.463425,-2.815849,-0.821268,-0.119843,-1.13576,-0.187632,2.21882,-0.874702,-0.169406,...,1.398628,2.072249,-1.000649,-1.02988,-0.847301,-0.177397,1.000899,0.445181,0.035113,1.424087
4,-2.544676,-1.465877,-2.847379,-0.694094,-0.349222,-1.15279,-0.188378,2.32618,-0.874702,-0.462609,...,1.393114,2.077845,-1.163938,-1.252654,-0.198782,-0.177397,-0.999469,0.195648,0.035113,1.424087


In [57]:
features

['asnow',
 'cape',
 'd2m',
 'day_of_year_cos',
 'day_of_year_sin',
 'dlwrf',
 'dswrf',
 'elev',
 'gh',
 'mslma',
 'mslp',
 'new_tp',
 'orog',
 'precip_total',
 'pres',
 'r2',
 'relh',
 'sh2',
 'snow_depth',
 'srad',
 't2m',
 'ta9m',
 'tair',
 'tcc',
 'td',
 'tp',
 'u10',
 'u_dir',
 'u_total',
 'v10',
 'wdir_sonic',
 'wmax_sonic',
 'wspd_sonic']

In [58]:
# Split the data into training and testing sets.
length = len(new_df)
test_len = int(length * 0.8)
df_train = new_df.iloc[:test_len].copy()
df_test = new_df.iloc[test_len:].copy()
print("Test Set Fraction", len(df_test) / len(new_df))

Test Set Fraction 0.2000193208713713


In [59]:
# Reintegrate the specified columns back into the training and testing DataFrames.
for c in cols_to_carry:
    df_train[c] = master_df[c]
    df_test[c] = master_df[c]

In [60]:
df_train.head()

Unnamed: 0,t2m,sh2,d2m,r2,u10,v10,tp,mslma,orog,tcc,...,precip_total,snow_depth,target_error,day_of_year_sin,day_of_year_cos,valid_time,station,latitude,longitude,flag
0,-2.557626,-1.429098,-2.57565,0.1174,-0.047033,-0.910739,-0.183904,2.231703,-0.874702,-0.358101,...,-0.177397,1.00091,1.226911,0.035113,1.424087,2018-01-01 02:00:00,RAND,42.147687,-78.896589,True
1,-2.568167,-1.434002,-2.606419,0.032617,-0.156187,-0.947073,-0.176446,2.205937,-0.874702,-0.268108,...,-0.177397,1.000903,0.177932,0.035113,1.424087,2018-01-01 03:00:00,RAND,42.147687,-78.896589,True
2,-2.496404,-1.451166,-2.731196,-0.470025,-0.184375,-0.96891,-0.188378,2.175876,-0.874702,-0.532281,...,-0.177397,1.000898,0.164307,0.035113,1.424087,2018-01-01 04:00:00,RAND,42.147687,-78.896589,True
3,-2.47378,-1.463425,-2.815849,-0.821268,-0.119843,-1.13576,-0.187632,2.21882,-0.874702,-0.169406,...,-0.177397,1.000899,0.445181,0.035113,1.424087,2018-01-01 05:00:00,RAND,42.147687,-78.896589,True
4,-2.544676,-1.465877,-2.847379,-0.694094,-0.349222,-1.15279,-0.188378,2.32618,-0.874702,-0.462609,...,-0.177397,-0.999469,0.195648,0.035113,1.424087,2018-01-01 06:00:00,RAND,42.147687,-78.896589,True


In [61]:
df_test.head()

Unnamed: 0,t2m,sh2,d2m,r2,u10,v10,tp,mslma,orog,tcc,...,precip_total,snow_depth,target_error,day_of_year_sin,day_of_year_cos,valid_time,station,latitude,longitude,flag
33124,-2.417364,-1.434002,-2.604381,-0.233844,-0.669365,-1.215163,-0.189124,1.746436,1.143245,1.168876,...,-0.177397,1.000516,-0.41924,0.37019,1.37755,2022-01-15 14:00:00,RAND,42.147687,-78.896589,True
33125,-2.302355,-1.411935,-2.476764,-0.136949,-0.582175,-1.216105,-0.188378,1.752161,1.143245,1.168876,...,-0.177397,1.000514,-0.013352,0.37019,1.37755,2022-01-15 15:00:00,RAND,42.147687,-78.896589,True
33126,-2.172318,-1.392319,-2.35764,-0.149061,-0.739434,-1.002953,-0.188378,1.578954,1.143245,1.168876,...,-0.177397,1.00051,-0.011845,0.37019,1.37755,2022-01-15 16:00:00,RAND,42.147687,-78.896589,True
33127,-2.012322,-1.404579,-2.420641,-0.887883,-0.660952,-1.02401,-0.189124,1.724964,1.143245,1.168876,...,-0.177397,1.000511,0.413047,0.37019,1.37755,2022-01-15 17:00:00,RAND,42.147687,-78.896589,True
33128,-1.888606,-1.394771,-2.36394,-1.093784,-0.538108,-1.05531,-0.189124,1.670568,1.143245,1.168876,...,-0.177397,1.000515,0.397455,0.37019,1.37755,2022-01-15 18:00:00,RAND,42.147687,-78.896589,True
