# Rank the stations according to their temporal series

## Description

This program does the following using the Repeat station and IGRF database:
- It reads the file with the dataframe from the n occupation stations group
- It creates a dataframe with only the last occupation of each station in order to generate the name list of each station used for the figures creation
- It plots each station and their respective igrf13 values for each component. It also adds the calculated RMSE into the figure

In [1]:
# Import modules
import mestrado_module as mm
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pathlib import Path

In [2]:
## Define the minimum number of occupations a station must have (n), it is a cut out value
n = 12

# File with the chosen n repeat stations
n_rs_df_folder: Path = Path(mm.path_pipeline_05_rank_n_occupations)
n_rs_df_file: Path = Path(mm.output_5a_code_database_n_12)

# Save figures files
output_folder: Path = Path(mm.path_pipeline_06a_temporal_series_n12)

# Figure style
sns.set_style("darkgrid")

## Read the data

In [3]:
df = pd.read_csv(n_rs_df_folder / n_rs_df_file)
#df.info()
df

Unnamed: 0,Code,Lat_dd,Lon_dd,Alt_m,Time_dy,D_dd,IGRF_D_dd,I_dd,IGRF_I_dd,F_nT,...,RMSE_D_Original_values,RMSE_I_Original_values,RMSE_F_Original_values,RMSE_H_Original_values,RMSE_X_Original_values,RMSE_X_Calculated_values,RMSE_Y_Original_values,RMSE_Y_Calculated_values,RMSE_Z_Original_values,RMSE_Z_Calculated_values
0,AM_MAN,-3.142,-59.992,39.419,1910.546,0.112,-0.467,20.065,20.467,31342.0,...,0.327,0.208,128.662,133.015,4074.033,97.073,171.983,168.497,143.647,141.949
1,AM_MAN,-3.127,-59.983,40.511,1910.567,0.123,-0.483,20.159,20.500,31333.0,...,0.327,0.208,128.662,133.015,4074.033,97.073,171.983,168.497,143.647,141.949
2,AM_MAN,-3.127,-59.983,40.511,1911.239,-0.068,-0.600,20.263,20.583,31336.0,...,0.327,0.208,128.662,133.015,4074.033,97.073,171.983,168.497,143.647,141.949
3,AM_MAN,-3.127,-59.983,40.511,1913.708,-0.587,-1.050,20.579,20.900,31493.0,...,0.327,0.208,128.662,133.015,4074.033,97.073,171.983,168.497,143.647,141.949
4,AM_MAN,-3.127,-59.983,40.511,1914.729,-0.824,-1.250,20.781,21.033,31520.0,...,0.327,0.208,128.662,133.015,4074.033,97.073,171.983,168.497,143.647,141.949
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,SP_MRL,-22.202,-49.950,622.160,1986.828,-16.023,-15.917,-24.038,-24.267,23326.0,...,0.191,0.280,121.438,77.098,8706.213,89.873,49.395,51.041,152.543,149.293
339,SP_MRL,-22.202,-49.950,622.160,1986.838,-16.121,-15.917,-24.055,-24.283,23315.0,...,0.191,0.280,121.438,77.098,8706.213,89.873,49.395,51.041,152.543,149.293
340,SP_MRL,-22.202,-49.950,622.160,1996.203,-17.364,-17.083,-26.620,-26.817,23040.0,...,0.191,0.280,121.438,77.098,8706.213,89.873,49.395,51.041,152.543,149.293
341,SP_MRL,-22.198,-49.928,602.731,2000.692,-17.975,-17.667,-27.700,-28.067,22978.0,...,0.191,0.280,121.438,77.098,8706.213,89.873,49.395,51.041,152.543,149.293


## Create a list the code name of each station in the file and count how many of them there are

In [4]:
# Calculate the number of stations in this category
df_aux = df.drop_duplicates(subset="Code", keep="last", inplace=False)  # last occurence
df_aux.reset_index(drop=True, inplace=True)
list_stations_n = df.Code.unique()
stations_counting = len(list_stations_n)
print(list_stations_n)
print(stations_counting)

['AM_MAN' 'BA_CRV' 'BA_SAL' 'CE_FOR' 'GO_CAT' 'GO_GOI' 'MA_SLZ' 'MG_UBR'
 'MS_CGR' 'MS_COR' 'MS_TLG' 'MT_CUI' 'PA_SAN' 'PE_PET' 'PE_REC' 'PI_FLO'
 'PR_CUR' 'PR_FIG' 'RN_NAT' 'RO_PVL' 'RS_PAL' 'RS_RGR' 'SP_MRL']
23


## Plot the temporal series for each station in this n group

In [5]:
# Definitions for the figures

# Original
orig_symbol = "o"
orig_color = "blue"
orig_label = "Station original value"
orig_linestyle = '-'

# Calculated
calc_symbol = "+"
calc_color = "red"
calc_label = "Station calculated value"
calc_linestyle = "--"

# IGRF
igrf_symbol = "d"
igrf_color = "green"
igrf_label = "IGRF13"
igrf_linestyle = "-."

# General
axis_label_fsize = 14
title_fontsize = 16
tick_size = 14
legend_loc = "best"
dpi_quality = 300
value_bbox_inches = "tight"
fill_color = "purple"

In [6]:
# TOTAL FIELD

# Loop to reach each station code from the list, create and save the figure for each station component (IT TAKES TIME!!) 
for i in list_stations_n:
    station_code = i

    # Define dataframe for each station
    df2 = df[df["Code"] == station_code]
    time = df2["Time_dy"]
    
    # Define parameters to insert the RMSE of each component into the figure
    df3 = df_aux[df_aux["Code"] == station_code]

    unique_index_f = pd.Index(list_stations_n)
    index_rmse_f = unique_index_f.get_loc(station_code)
    f_rmse = df3.loc[index_rmse_f].at["RMSE_F_Original_values"]
    
    # F field info for plot
    f_orig = df2["F_nT"]
    f_igrf = df2["IGRF_F_nT"]
    fig_f = station_code + "_IGRF" + "_Total_Field" + ".png"
    fig1, ax1 = plt.subplots()
    ax1.plot(time, f_orig, marker = orig_symbol, color = orig_color, label = orig_label, linestyle = orig_linestyle)
    ax1.plot(time, f_igrf, marker = igrf_symbol, color = igrf_color, label = igrf_label, linestyle = igrf_linestyle)
#    ax1.fill_between(time, f_orig, f_igrf, color=fill_color, alpha=0.3)
    ax1.set_xlabel("Time (dy)", fontsize=axis_label_fsize)
    ax1.set_ylabel("Total Field (nT)", fontsize=axis_label_fsize)
    ax1.set_title(f"{station_code}, RMSE = {f_rmse}", fontsize=title_fontsize)
    ax1.tick_params(axis="both", labelsize=tick_size)
    ax1.legend(loc=legend_loc)
    plt.savefig(output_folder / fig_f, dpi=dpi_quality, bbox_inches=value_bbox_inches)
    plt.close(fig1)

In [7]:
# HORIZONTAL FIELD
# Loop to reach each station code from the list, create and save the figure for each station component (IT TAKES TIME!!)
for i in list_stations_n:
    station_code = i

    # Define dataframe for each station
    df2 = df[df["Code"] == station_code]
    time = df2["Time_dy"]
    
    # Define parameters to insert the RMSE of each component into the figure
    df3 = df_aux[df_aux["Code"] == station_code]
    
    unique_index_h = pd.Index(list_stations_n)
    index_rmse_h = unique_index_h.get_loc(station_code)
    h_rmse = df3.loc[index_rmse_h].at["RMSE_H_Original_values"]
    
    # H field info for plot
    h_orig = df2["H_nT"]
    h_igrf = df2["IGRF_H_nT"]
    fig_h = station_code + "_IGRF" + "_H_Field" + ".png"
    fig2, ax2 = plt.subplots()
    ax2.plot(time, h_orig, marker=orig_symbol, color=orig_color, label=orig_label , linestyle = orig_linestyle)
    ax2.plot(time, h_igrf, marker=igrf_symbol, color=igrf_color, label=igrf_label, linestyle= igrf_linestyle)
#    ax2.fill_between(time, h_orig, h_igrf, color=fill_color, alpha=0.3)
    ax2.set_xlabel("Time (dy)", fontsize=axis_label_fsize)
    ax2.set_ylabel("H Field (nT)", fontsize=axis_label_fsize)
    ax2.set_title(f"{station_code}, RMSE = {h_rmse}", fontsize=title_fontsize)
    ax2.tick_params(axis="both", labelsize=tick_size)
    ax2.legend(loc=legend_loc)
    plt.savefig(output_folder / fig_h, dpi=dpi_quality, bbox_inches=value_bbox_inches)
    plt.close(fig2)

In [8]:
# X FIELD
# Loop to reach each station code from the list, create and save the figure for each station component (IT TAKES TIME!!)
for i in list_stations_n:
    station_code = i

    # Define dataframe for each station
    df2 = df[df["Code"] == station_code]
    time = df2["Time_dy"]
    
    # Define parameters to insert the RMSE of each component into the figure
    df3 = df_aux[df_aux["Code"] == station_code]
    
    unique_index_x = pd.Index(list_stations_n)
    index_rmse_x_orig = unique_index_x.get_loc(station_code)
    orig_x_rmse = df3.loc[index_rmse_x_orig].at["RMSE_X_Original_values"]
    index_rmse_x_calc = unique_index_x.get_loc(station_code)
    calc_x_rmse = df3.loc[index_rmse_x_calc].at["RMSE_X_Calculated_values"]
    
    # X field info for plot
    x_orig = df2["X_nT"]
    x_calc = df2["Calculated_X"]
    x_igrf = df2["IGRF_X_nT"]
    fig_x = station_code + "_IGRF" + "_X_Field" + ".png"
    fig3, ax3 = plt.subplots()
    ax3.plot(time, x_orig, marker=orig_symbol, color=orig_color, label=orig_label , linestyle = orig_linestyle)
    ax3.plot(time, x_calc, marker = calc_symbol, color = calc_color, label = calc_label, linestyle = calc_linestyle )
    ax3.plot(time, x_igrf, marker = igrf_symbol, color = igrf_color, label = igrf_label, linestyle = igrf_linestyle)
#    ax3.fill_between(time, x_orig, x_igrf, color=fill_color, alpha=0.3)
    ax3.set_xlabel("Time (dy)", fontsize=axis_label_fsize)
    ax3.set_ylabel("X Field (nT)", fontsize=axis_label_fsize)
    ax3.set_title(f"{station_code}, RMSE Orig = {orig_x_rmse}, RMSE Calc = {calc_x_rmse}", fontsize=title_fontsize)
    ax3.tick_params(axis="both", labelsize=tick_size)
    ax3.legend(loc=legend_loc)
    plt.savefig(output_folder / fig_x, dpi=dpi_quality, bbox_inches=value_bbox_inches)
    plt.close(fig3)

In [9]:
# Y FIELD
# Loop to reach each station code from the list, create and save the figure for each station component (IT TAKES TIME!!)
for i in list_stations_n:
    station_code = i

    # Define dataframe for each station
    df2 = df[df["Code"] == station_code]
    time = df2["Time_dy"]
    
    # Define parameters to insert the RMSE of each component into the figure
    df3 = df_aux[df_aux["Code"] == station_code]
    
    unique_index_y = pd.Index(list_stations_n)
    index_rmse_y_orig = unique_index_y.get_loc(station_code)
    orig_y_rmse = df3.loc[index_rmse_y_orig].at["RMSE_Y_Original_values"]
    index_rmse_y_calc = unique_index_y.get_loc(station_code)
    calc_y_rmse = df3.loc[index_rmse_y_calc].at["RMSE_Y_Calculated_values"]
    
    # Y field info for plot
    y_orig = df2["Y_nT"]
    y_calc = df2["Calculated_Y"]
    y_igrf = df2["IGRF_Y_nT"]
    fig_y = station_code + "_IGRF" + "_Y_Field" + ".png"
    fig4, ax4 = plt.subplots()
    ax4.plot(time, y_orig, marker=orig_symbol, color=orig_color, label=orig_label , linestyle = orig_linestyle)
    ax4.plot(time, y_calc, marker = calc_symbol, color = calc_color, label = calc_label, linestyle= calc_linestyle)
    ax4.plot(time, y_igrf, marker=igrf_symbol, color=igrf_color, label=igrf_label, linestyle=igrf_linestyle)
#    ax4.fill_between(time, y_orig, y_igrf, color=fill_color, alpha=0.3)
    ax4.set_xlabel("Time (dy)", fontsize=axis_label_fsize)
    ax4.set_ylabel("Y Field (nT)", fontsize=axis_label_fsize)
    ax4.set_title(f"{station_code},  RMSE Orig = {orig_y_rmse}, RMSE Calc = {calc_y_rmse}", fontsize=title_fontsize)
    ax4.tick_params(axis="both", labelsize=tick_size)
    ax4.legend(loc=legend_loc)
    plt.savefig(output_folder / fig_y, dpi=dpi_quality, bbox_inches=value_bbox_inches)
    plt.close(fig4)

In [10]:
# Z FIELD
# Loop to reach each station code from the list, create and save the figure for each station component (IT TAKES TIME!!)
for i in list_stations_n:
    station_code = i

    # Define dataframe for each station
    df2 = df[df["Code"] == station_code]
    time = df2["Time_dy"]
    
    # Define parameters to insert the RMSE of each component into the figure
    df3 = df_aux[df_aux["Code"] == station_code]
    
    unique_index_z = pd.Index(list_stations_n)
    index_rmse_z_orig = unique_index_z.get_loc(station_code)
    orig_z_rmse = df3.loc[index_rmse_z_orig].at["RMSE_Z_Original_values"]
    index_rmse_z_calc = unique_index_z.get_loc(station_code)
    calc_z_rmse = df3.loc[index_rmse_z_calc].at["RMSE_Z_Calculated_values"]
    
    # Z field info for plot
    z_orig = df2["Z_nT"]
    z_calc = df2["Calculated_Z"]
    z_igrf = df2["IGRF_Z_nT"]
    fig_z = station_code + "_IGRF" + "_Z_Field" + ".png"
    fig5, ax5 = plt.subplots()
    ax5.plot(time, z_orig, marker=orig_symbol, color=orig_color, label=orig_label , linestyle = orig_linestyle)
    ax5.plot(time, z_calc, marker = calc_symbol, color = calc_color, label = calc_label, linestyle= calc_linestyle)
    ax5.plot(time, z_igrf, marker=igrf_symbol, color=igrf_color, label=igrf_label, linestyle=igrf_linestyle)
#    ax5.fill_between(time, z_orig, z_igrf, color=fill_color, alpha=0.3)
    ax5.set_xlabel("Time (dy)", fontsize=axis_label_fsize)
    ax5.set_ylabel("Z Field (nT)", fontsize=axis_label_fsize)
    ax5.set_title(f"{station_code},  RMSE Orig = {orig_z_rmse}, RMSE Calc = {calc_z_rmse}", fontsize=title_fontsize)
    ax5.tick_params(axis="both", labelsize=tick_size)
    ax5.legend(loc=legend_loc)
    plt.savefig(output_folder / fig_z, dpi=dpi_quality, bbox_inches=value_bbox_inches)
    plt.close(fig5)

In [11]:
# INCLINATION FIELD
# Loop to reach each station code from the list, create and save the figure for each station component (IT TAKES TIME!!)
for i in list_stations_n:
    station_code = i

    # Define dataframe for each station
    df2 = df[df["Code"] == station_code]
    time = df2["Time_dy"]
    
    # Define parameters to insert the RMSE of each component into the figure
    df3 = df_aux[df_aux["Code"] == station_code]
    
    unique_index_i = pd.Index(list_stations_n)
    index_rmse_inc = unique_index_i.get_loc(station_code)
    inc_rmse = df3.loc[index_rmse_inc].at["RMSE_I_Original_values"]
    
    # Inclination info for plot
    i_orig = df2["I_dd"]
    i_igrf = df2["IGRF_I_dd"]
    fig_inc = station_code + "_IGRF" + "_Inclination" + ".png"
    fig6, ax6 = plt.subplots()
    ax6.plot(time, i_orig, marker = orig_symbol, color = orig_color, label = orig_label , linestyle = orig_linestyle)
    ax6.plot(time, i_igrf, marker = igrf_symbol, color = igrf_color, label=igrf_label, linestyle = igrf_linestyle)
#    ax6.fill_between(time, i_orig, i_igrf, color = fill_color, alpha=0.3)
    ax6.set_xlabel("Time (dy)", fontsize = axis_label_fsize)
    ax6.set_ylabel("Inclination (dd)", fontsize = axis_label_fsize)
    ax6.set_title(f"{station_code}, RMSE = {inc_rmse}", fontsize = title_fontsize)
    ax6.tick_params(axis = "both", labelsize = tick_size)
    ax6.legend(loc = legend_loc)
    plt.savefig(output_folder/ fig_inc, dpi = dpi_quality, bbox_inches = value_bbox_inches)
    plt.close(fig6)

In [12]:
# DECLINATION FIELD
# Loop to reach each station code from the list, create and save the figure for each station component (IT TAKES TIME!!)
for i in list_stations_n:
    station_code = i
    
    # Define dataframe for each station
    df2 = df[df["Code"] == station_code]
    time = df2["Time_dy"]
    
    # Define parameters to insert the RMSE of each component into the figure
    df3 = df_aux[df_aux["Code"] == station_code]
    
    unique_index_dec = pd.Index(list_stations_n)
    index_rmse_dec = unique_index_dec.get_loc(station_code)
    dec_rmse = df3.loc[index_rmse_dec].at["RMSE_D_Original_values"]
    
    # Declination info for plot
    d_orig = df2["D_dd"]
    d_igrf = df2["IGRF_D_dd"]
    fig_dec = station_code + "_IGRF" + "_Declination" + ".png"
    fig7, ax7 = plt.subplots()
    ax7.plot(time, d_orig, marker = orig_symbol, color = orig_color, label = orig_label , linestyle = orig_linestyle)
    ax7.plot(time, d_igrf, marker = igrf_symbol, color = igrf_color, label = igrf_label, linestyle = igrf_linestyle)
#    ax7.fill_between(time, d_orig, d_igrf, color = fill_color, alpha = 0.3)
    ax7.set_xlabel("Time (dy)", fontsize = axis_label_fsize)
    ax7.set_ylabel("Declination (dd)", fontsize = axis_label_fsize)
    ax7.set_title(f"{station_code}, RMSE = {dec_rmse}", fontsize = title_fontsize)
    ax7.tick_params(axis ="both", labelsize = tick_size)
    ax7.legend(loc = legend_loc)
    plt.savefig(output_folder / fig_dec, dpi = dpi_quality, bbox_inches = value_bbox_inches)
    plt.close(fig7)