In [None]:
"""
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.

Authors:
Roland Baatz roland.baatz @ zalf.de

Maintainers and contact:
Currently maintained by the authors.

This file requires the data set of the COSMOS Europe publication:
https://essd.copernicus.org/articles/14/1125/2022/essd-14-1125-2022.html

Copyright (C) Leibniz Centre for Agricultural Landscape Research (ZALF)
"""

In [None]:
import sys
import os
current_dir = os.getcwd()
lib_dir = os.path.join(current_dir, 'lib')
sys.path.append(lib_dir)

import numpy as np
import CRNS_library as CRNS_lib
import time
import scipy.io as sio
import pandas as pd
import matplotlib.pyplot as plt
import CRNS_optimizer as CRNS_opt
from scipy.optimize import curve_fit


def func(x, p1, p2):
    return p1 * np.cos(p2 * x) + p2 * np.sin(p1 * x)
print('\nBlock completed at time:                       '+ time.strftime("%H:%M:%S on %Y-%m-%d")) 


In [None]:
src_path = './data'
##Read in data for one site as example:
#read incoming cosmic ray intensity
file_path=src_path+'/Scaling/JUNG1_Data_UTC.TXT'
new_data = pd.read_csv(file_path, sep=';', names=['DateTime', 'Incoming'], parse_dates=['DateTime'])
new_data['DateTime'] = pd.to_datetime(new_data['DateTime'])
#print(new_data.head())
Jung = new_data.set_index('DateTime')

##Read in data for one site as example:
#read incoming cosmic ray intensity
file_path=src_path+'/Scaling/OULU_1h_UTC.txt'
new_data = pd.read_csv(file_path, sep=';', names=['DateTime', 'Oulu'], parse_dates=['DateTime'])
new_data['DateTime'] = pd.to_datetime(new_data['DateTime'])
#print(new_data.head())
Oulu = new_data.set_index('DateTime')


##Read in data for one site as example:
#read incoming cosmic ray intensity
file_path=src_path+'/Scaling/LMKS.txt'
new_data = pd.read_csv(file_path, sep=';', names=['DateTime', 'Lmks'], parse_dates=['DateTime'])
new_data['DateTime'] = pd.to_datetime(new_data['DateTime'])
#print(new_data.head())
Lmks = new_data.set_index('DateTime')

##Read in data for one site as example:
#read incoming cosmic ray intensity
file_path=src_path+'/Scaling/APTY_1h_UTC.txt'
new_data = pd.read_csv(file_path, sep=';', names=['DateTime', 'Apty'], parse_dates=['DateTime'])
new_data['DateTime'] = pd.to_datetime(new_data['DateTime'])
#print(new_data.head())
Apty = new_data.set_index('DateTime')



##Read in data for one site as example:
#read incoming cosmic ray intensity
file_path=src_path+'/Scaling/AthensR8.53Alt260 m.txt'
new_data = pd.read_csv(file_path, sep=';', names=['DateTime', 'Athens'], parse_dates=['DateTime'])
new_data['DateTime'] = pd.to_datetime(new_data['DateTime'])
#print(new_data.head())
Athens = new_data.set_index('DateTime')


##Read in data for one site as example:
#read incoming cosmic ray intensity
file_path=src_path+'/Scaling/MXCO_1h_UTC8.28Alt2274m.txt'
new_data = pd.read_csv(file_path, sep=';', names=['DateTime', 'Mxco'], parse_dates=['DateTime'])
new_data['DateTime'] = pd.to_datetime(new_data['DateTime'])
#print(new_data.head())
Mxco = new_data.set_index('DateTime')

print('\nBlock completed at time:                       '+ time.strftime("%H:%M:%S on %Y-%m-%d")) 


In [None]:
## Schäfertal ID 3 removed; and columns renamed; asterisk remarks removed; BUC001 data gaps were removed
fname='EU_Study_sites.csv'
fname=src_path+"/Scaling/COSMOS_Europe_Data/Additional_information.csv"
site_info = pd.read_csv(fname)
fname=src_path+"/Scaling/COSMOS_Europe_Data/General_information.csv"
site_general_info = pd.read_csv(fname)
sites_names = site_info.StationID.to_list()
CutoffRigidity = site_info.CutoffRigidity.to_list()
sites_Bulkdensity = site_info.Bulkdensity.to_list()
sites_Altitude = site_general_info.Altitude.to_list()
sites_Precip = site_general_info.Meanannualprecipitation.to_list()
sites_Temperature = site_general_info.Meanairtemperature.to_list()
sites_lat = site_general_info.Latitude.to_list()
dfi = pd.DataFrame({
    'StationID': sites_names,
    'CutoffRigidity': CutoffRigidity,
    'sites_Precip': sites_Precip,
    'sites_Temperature': sites_Temperature,
    'Bulkdensity': sites_Bulkdensity,
    'Altitude': sites_Altitude
})

# Add new columns (beta, omega, psi) with initial values (you can update these later)
dfi['beta'] = np.nan
dfi['beta_sd'] = np.nan
dfi['beta_mae'] = np.nan
dfi['beta_rmse'] = np.nan
dfi['beta_oulu'] = np.nan
dfi['beta_apty'] = np.nan
dfi['beta_lmks'] = np.nan
dfi['beta_mxco'] = np.nan
dfi['beta_athens'] = np.nan
dfi['omega'] = np.nan
dfi['omega_sd'] = np.nan
dfi['omega_oulu'] = np.nan
dfi['omega_apty'] = np.nan
dfi['omega_mxco'] = np.nan
dfi['omega_athens'] = np.nan
dfi['omega_lmks'] = np.nan
dfi['omega_mae'] = np.nan
dfi['omega_rmse'] = np.nan
dfi['psi'] = np.nan
dfi['psi_sd'] = np.nan
dfi['psi_oulu'] = np.nan
dfi['psi_apty'] = np.nan
dfi['psi_athens'] = np.nan
dfi['psi_mxco'] = np.nan
dfi['psi_lmks'] = np.nan
dfi['psi_mae'] = np.nan
dfi['psi_rmse'] = np.nan
dfi['mean_P'] = np.nan
dfi['mean_N'] = np.nan
dfi['number_of_days_for_regression'] = np.nan
print('\nBlock completed at time:                       '+ time.strftime("%H:%M:%S on %Y-%m-%d")) 


In [None]:
### Iterate over EU Study sites:
plog_flag_id=0
# Assuming your DataFrame is named 'df'
#df.to_excel('my_dataframe.xlsx', index=False)  # Specify the desired file name
#csv_filename='my_dataframe_slow.csv'

suffix_csv = "rmse_u"
#csv_filename='my_dataframe_moderated_nelder_rmse.csv'
csv_filename=f'my_dataframe_slow_{suffix_csv}-.csv'


if os.path.exists(csv_filename):
    dfi = pd.read_csv(csv_filename, sep=';', index_col=0)  # Set index_col to 0 for row names
    #LOAD DATAFRAME FROM CSV
else:
    #FILL DATA FRAME AND STORE CSV
    for ii,nn in enumerate(sites_names):
        #plog_flag_id=1
        #for ii, nn in enumerate(sites_names[3:5], start=3):
        print("ID:",ii, "Reading:",nn)
        with open("your_file_slow.txt", "a") as file:
            file.write(f'ID: {ii} Site: {nn}\n')
            
        if nn in ['BUC001']:
            continue
        #else:
        print(ii," name: ", nn) #ID, site and fname, same
        DF = CRNS_lib.load_EU_data(src_path+'/Scaling/COSMOS_Europe_Data/',nn)
        DF['Abs_h'] = CRNS_opt.Absolute_conv(DF.RH, DF.TEMP)
        #resample to 1 hour exactly, for merging with 1h inc. radiation
        DF = DF.resample('1H').mean()
        #print(DF.columns)
        if DF['NeutronCount_Slow_Cum1h'].notna().sum() < 50:
            print("Less than 50 or no values found")
            pass
        else:
            print("There are counts for slow_cum1h")
            DF["MOD"] = DF['NeutronCount_Slow_Cum1h']

            #print(DF.tail(3))
            DF = DF.join(Jung)
            DF = DF.join(Apty)
            DF = DF.join(Oulu)
            DF = DF.join(Athens)
            DF = DF.join(Mxco)
            DF = DF.join(Lmks)
            DF.index.name = 'Date'
            
            # Normalizing and masking outliers for each column
            prctl_values = [0.1, 0.1, 0.1, 0.4, 0.1, 0.1]
            columns = ["Incoming", "Apty", "Oulu", "Mxco", "Lmks", "Athens"]
            for idcol, col in enumerate(columns):
                lower = 0+prctl_values[idcol]
                upper=100-prctl_values[idcol]
                lower, upper = np.percentile(DF[col].dropna(), [lower,upper])
                DF[col] = DF[col].mask((DF[col] < lower) | (DF[col] > upper))
                
            #print(DF.head(3))
            #print("")
            #print(DF.tail(3))
            #calculate relative scaling for inc. radiation
            DF["Incoming"] = DF["Incoming"]/np.nanmean(DF["Incoming"])
            DF["Apty"] = DF["Apty"]/np.nanmean(DF["Apty"])
            DF["Oulu"] = DF["Oulu"]/np.nanmean(DF["Oulu"])
            DF["Mxco"] = DF["Mxco"]/np.nanmean(DF["Mxco"])
            DF["Lmks"] = DF["Lmks"]/np.nanmean(DF["Lmks"])
            DF["Athens"] = DF["Athens"]/np.nanmean(DF["Athens"])
            #print(DF.index)
            #print(DF.head(2))
            #print(Absolute_conv(80,25))
            my_pars=CRNS_opt.parameter_estimator(DF['Pressure'],DF["MOD"],DF["Incoming"],DF["Abs_h"],"exp",plot_flag=plog_flag_id,site_name=nn,error_metric=suffix_csv)
            dfi.loc[ii, 'beta'] = my_pars[0]
            dfi.loc[ii, 'omega'] = my_pars[1]
            dfi.loc[ii, 'psi'] = my_pars[2]
            dfi.loc[ii, 'beta_sd'] = my_pars[3]
            dfi.loc[ii, 'omega_sd'] = my_pars[4]
            dfi.loc[ii, 'psi_sd'] = my_pars[5]

            #print(my_pars)
            
            my_pars=CRNS_opt.parameter_estimator(DF['Pressure'],DF["MOD"],DF["Apty"],DF["Abs_h"],"exp",plot_flag=plog_flag_id,site_name=nn,error_metric=suffix_csv)
            dfi.loc[ii, 'beta_apty'] = my_pars[0]
            dfi.loc[ii, 'omega_apty'] = my_pars[1]
            dfi.loc[ii, 'psi_apty'] = my_pars[2]
            dfi.loc[ii, 'mean_N'] = my_pars[8]


            my_pars=CRNS_opt.parameter_estimator(DF['Pressure'],DF["MOD"],DF["Oulu"],DF["Abs_h"],"exp",plot_flag=plog_flag_id,site_name=nn,error_metric=suffix_csv)
            dfi.loc[ii, 'beta_oulu'] = my_pars[0]
            dfi.loc[ii, 'omega_oulu'] = my_pars[1]
            dfi.loc[ii, 'psi_oulu'] = my_pars[2]

            
            my_pars=CRNS_opt.parameter_estimator(DF['Pressure'],DF["MOD"],DF["Mxco"],DF["Abs_h"],"exp",plot_flag=plog_flag_id,site_name=nn,error_metric=suffix_csv)
            dfi.loc[ii, 'beta_mxco'] = my_pars[0]
            dfi.loc[ii, 'omega_mxco'] = my_pars[1]
            dfi.loc[ii, 'psi_mxco'] = my_pars[2]

            my_pars=CRNS_opt.parameter_estimator(DF['Pressure'],DF["MOD"],DF["Lmks"],DF["Abs_h"],"exp",plot_flag=plog_flag_id,site_name=nn,error_metric=suffix_csv)
            dfi.loc[ii, 'beta_lmks'] = my_pars[0]
            dfi.loc[ii, 'omega_lmks'] = my_pars[1]
            dfi.loc[ii, 'psi_lmks'] = my_pars[2]

            my_pars=CRNS_opt.parameter_estimator(DF['Pressure'],DF["MOD"],DF["Athens"],DF["Abs_h"],"exp",plot_flag=plog_flag_id,site_name=nn,error_metric=suffix_csv)
            dfi.loc[ii, 'beta_athens'] = my_pars[0]
            dfi.loc[ii, 'omega_athens'] = my_pars[1]
            dfi.loc[ii, 'psi_athens'] = my_pars[2]
            
            my_pars=CRNS_opt.parameter_estimator(DF['Pressure'],DF["MOD"],DF["Incoming"],DF["Abs_h"],"exp",plot_flag=plog_flag_id,site_name=nn,error_metric="mae")
            dfi.loc[ii, 'beta_mae'] = my_pars[0]
            dfi.loc[ii, 'omega_mae'] = my_pars[1]
            dfi.loc[ii, 'psi_mae'] = my_pars[2]
            
            my_pars=CRNS_opt.parameter_estimator(DF['Pressure'],DF["MOD"],DF["Incoming"],DF["Abs_h"],"exp",plot_flag=plog_flag_id,site_name=nn,error_metric="rmse")
            dfi.loc[ii, 'beta_rmse'] = my_pars[0]
            dfi.loc[ii, 'omega_rmse'] = my_pars[1]
            dfi.loc[ii, 'psi_rmse'] = my_pars[2]
            
            mean_air_pressure_for_McJD = my_pars[6]
            number_of_days_for_regression = my_pars[7]
            
            dfi.loc[ii, 'number_of_days_for_regression'] = number_of_days_for_regression
            dfi.loc[ii, 'mean_N'] = my_pars[8]
            g, x, f_lat, f_bar, F_scale, beta_McJD_eff, beta_McJD = CRNS_lib.beta_e(mean_air_pressure_for_McJD,CutoffRigidity[ii],sites_lat[ii])
            
            dfi.loc[ii, 'beta_McJD_eff'] = beta_McJD_eff
            dfi.loc[ii, 'beta_McJD'] = beta_McJD
            dfi.loc[ii, 'mean_P'] = mean_air_pressure_for_McJD
            

    dfi.to_csv(csv_filename, sep=";",index=True)
print("FINISHED THIS BLOCK")