## This program calculates the hourly data 

Calculating the hourly data for parameters 'STR', 'SLHF', 'SSHF' from the data downloaded from the MOS archive. 
The data downloaded from the MOS archive using the R program MOS_archive_data_rail.R

In [None]:
!/usr/bin/pip3 install pandas
!/usr/bin/pip3 install numpy
!/usr/bin/pip3 install --upgrade numpy

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv(r'/home/daniel/projects/rails/data/mos_archive_data_for_all_rail_stations.csv', sep = ',')
df

Unnamed: 0,station_id,analysis_time,forecast_time,forecast_period,analysis_date,MSL,T2,D2,U10,V10,...,SRR,STR,SLHF,SSHF,cosmonth,sinmonth,coshour,sinhour,lat,lon
0,10,0,2019-09-07 01:00:00,1,2019-09-07,101743.2,284.1,283.3,2.5,0.4,...,0.0,-141475.5,-60470.5,67784.4,0.0,-1.0,0.97,0.26,62.397832,30.027159
1,10,0,2019-09-07 02:00:00,2,2019-09-07,101818.8,283.6,282.8,2.3,0.3,...,0.0,-398403.5,-93070.7,118210.8,0.0,-1.0,0.87,0.50,62.397832,30.027159
2,10,0,2019-09-07 03:00:00,3,2019-09-07,101899.5,283.4,282.7,2.2,0.8,...,0.0,-669765.6,-121901.7,153663.1,0.0,-1.0,0.71,0.71,62.397832,30.027159
3,10,0,2019-09-07 04:00:00,4,2019-09-07,101967.3,283.5,282.8,2.5,0.7,...,68823.7,-962815.9,-168166.8,196294.5,0.0,-1.0,0.50,0.87,62.397832,30.027159
4,10,0,2019-09-07 05:00:00,5,2019-09-07,102039.0,284.3,282.9,2.2,0.5,...,426920.0,-1284768.3,-313887.0,197465.5,0.0,-1.0,0.26,0.97,62.397832,30.027159
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3705609,110,12,2023-06-12 12:00:00,240,2023-06-02 12:00:00,102111.0,294.4,281.2,2.6,2.8,...,169290816.0,-54111156.0,-50554436.0,-53278352.0,-1.0,0.0,-1.00,0.00,66.141150,24.921643
3705610,110,12,2023-06-12 18:00:00,222,2023-06-03 12:00:00,102022.2,289.2,274.7,1.5,-2.5,...,146564240.0,-47635292.0,-47567784.0,-40401064.0,-1.0,0.0,0.00,-1.00,66.141150,24.921643
3705611,110,12,2023-06-13,228,2023-06-03 12:00:00,102400.0,279.8,273.6,-0.9,-2.2,...,147131696.0,-49586568.0,-48059048.0,-39334908.0,-1.0,0.0,1.00,0.00,66.141150,24.921643
3705612,110,12,2023-06-13 06:00:00,234,2023-06-03 12:00:00,102434.4,286.9,277.2,0.5,0.7,...,151020064.0,-51603696.0,-48890048.0,-40264364.0,-1.0,0.0,0.00,1.00,66.141150,24.921643


In [4]:
param = 'SRR'
h_param = f'hourly_{param}'
df_hourly_param = pd.DataFrame(columns=['lat', 'lon', 'analysis_date', 'forecast_period', h_param])
unique_latlon_pairs = df[['lat', 'lon']].drop_duplicates()
unique_latlon_pairs

Unnamed: 0,lat,lon
0,62.397832,30.027159
336874,61.838334,25.111172
673748,60.907743,27.283698
1010622,63.891666,23.854307
1347496,61.28091,23.750442
1684370,61.044734,26.73692
2021244,62.487371,27.195678
2358118,64.879316,25.503368
2694992,64.55291,27.160815
3031866,60.907849,27.285547


In [72]:
def calculate_hourly_values(df, param):
    h_param = f'hourly_{param}'
    df_hourly_param = pd.DataFrame(columns=['lat', 'lon', 'analysis_date', 'forecast_period', h_param])
    unique_latlon_pairs = df[['lat', 'lon']].drop_duplicates()

    def process_row(row_tuple):
        index, row = row_tuple
        lat, lon = row['lat'], row['lon']
        filtered_df = df[(df['lat'] == lat) & (df['lon'] == lon)]
        analysis_dates = filtered_df['analysis_date'].unique()

        result_rows = []
        for ad in analysis_dates:
            ad_filtered_df = filtered_df[filtered_df['analysis_date'] == ad]
            for period in range(1, 241):
                if period <= 90:
                    increment = 1
                elif period <= 150:
                    increment = 3
                else:
                    increment = 6
                period_filtered_df = ad_filtered_df[ad_filtered_df['forecast_period'] == period]
                param_values = period_filtered_df[param].values

                if len(param_values) > 0:
                    param_value = param_values[0]
                    if period == 1:
                        result_rows.append({'lat': lat, 'lon': lon, 'analysis_date': ad, 'forecast_period': period, h_param: param_value})
                    else:
                        prev_period = period - increment
                        prev_period_filtered_df = ad_filtered_df[ad_filtered_df['forecast_period'] == prev_period]
                        prev_param_values = prev_period_filtered_df[param].values

                        if len(prev_param_values) > 0:
                            prev_param_value = prev_param_values[0]
                            param_difference = param_value - prev_param_value
                            param_value = param_difference / increment
                            result_rows.append({'lat': lat, 'lon': lon, 'analysis_date': ad, 'forecast_period': period, h_param: param_value})
                        else:
                            result_rows.append({'lat': lat, 'lon': lon, 'analysis_date': ad, 'forecast_period': period, h_param: 0.0})
                else:
                    result_rows.append({'lat': lat, 'lon': lon, 'analysis_date': ad, 'forecast_period': period, h_param: 0.0})

        return result_rows

    result_rows = [row for rows in map(process_row, unique_latlon_pairs.iterrows()) for row in rows]
    df_hourly_param = pd.DataFrame(result_rows)

    return df_hourly_param


In [99]:
hourly_parameters = ['STR', 'SLHF', 'SSHF']
for param in hourly_parameters:
    print(param)
    start_time = time.time()
    df_hourly_param = calculate_hourly_values(df, param)
    df = df.merge(df_hourly_param, on=['lat', 'lon', 'analysis_date', 'forecast_period'])
    end_time = time.time()
    print(f'elapsed_time: ', end_time - start_time)

STR
elapsed_time:  3428.512216091156
SLHF
elapsed_time:  3467.3728301525116
SSHF
elapsed_time:  3550.6776678562164


In [100]:
df.columns

Index(['station_id', 'analysis_time', 'forecast_time', 'forecast_period',
       'analysis_date', 'MSL', 'T2', 'D2', 'U10', 'V10', 'LCC', 'MCC', 'SKT',
       'MX2T', 'MN2T', 'T_925', 'T2_ENSMEAN_MA1', 'SRR', 'STR', 'SLHF', 'SSHF',
       'cosmonth', 'sinmonth', 'coshour', 'sinhour', 'lat', 'lon',
       'hourly_SRR', 'hourly_STR', 'hourly_SLHF', 'hourly_SSHF'],
      dtype='object')

In [101]:
df.to_csv('/home/daniel/projects/rails/data/mos_archive_data_hourly_data_for_all_rail_stations.csv', index=False)