# Repeat station database creation

## Description

Purpose: this program creates the repeat station database. In order to create the database, this program does the following:
- It inserts the values of altitude from the ICGGEM website 
- It calculates the missing values for the Y and Z components using the following equations: 
 1. X =  F cos I cos D 
 2. Y = F cos I sin D
 3. Z = F sin I
 
- It calculates the distance between a station and the VSS and TTB observatories to see which is closer to the station. Then, it adds this info to the database.
- It calculates how many times a station has been occupied over time and associate the value with the station code into the dataframe

ATTENTION: IT IS UNCERTAIN IF THE REPEAT STATION DATA FROM 1980 TO 2019 WENT THROUGH THE NECESSARY PROCESSING STEPS TO REMOVE THE EXTERNAL EFFECTS FROM THE MAGNETIC FIELD IN ORDER TO USE IT TO STUDY SECULAR VARIATION.

In [1]:
# Import modules
import pandas as pd
import numpy as np
import mestrado_module as mm
import matplotlib.pyplot as plt
from pathlib import Path

In [2]:
# Define path
input_folder: Path = Path(mm.path_pipeline_01_data_processing) 
output_folder: Path = Path(mm.path_pipeline_03_rs_database_creation)
altitude_folder: Path = Path(mm.path_pipeline_02_icgem_file)

# File names
input_file: Path = Path(mm.output_1b_code_processed_data)
icgem_file: Path = Path("EIGEN-6C4_output_manual.csv")
ocp_list: Path = Path(mm.output_3_code_ocp_list)
output_file: Path = Path(mm.output_3_code_rs_database)

## Read data

In [3]:
# Load data with Pandas
df = pd.read_csv(input_folder / input_file)
df

Unnamed: 0,Code,Lat_dd,Lon_dd,Time_dy,D_dd,I_dd,H_nT,F_nT,X_nT,Y_nT,Z_nT,State,Region,RS_name
0,AC_CZS,-7.637,-72.670,1958.529,2.683,11.281,29097.0,29671.0,29065.0,1362.0,5804.0,AC,N,CRUZEIRO DO SUL
1,AC_CZS,-7.637,-72.670,1965.848,1.824,11.277,28662.0,29227.0,28648.0,912.0,5715.0,AC,N,CRUZEIRO DO SUL
2,AC_CZS,-7.620,-72.670,1978.640,-0.035,11.026,27835.0,28359.0,27835.0,-17.0,5423.0,AC,N,CRUZEIRO DO SUL
3,AC_CZS,-7.599,-72.770,1986.279,-1.343,10.685,27402.0,27886.0,27394.0,-642.0,5170.0,AC,N,CRUZEIRO DO SUL
4,AC_CZS,-7.599,-72.770,1989.503,-1.847,10.468,27222.0,27683.0,27208.0,-877.0,5029.0,AC,N,CRUZEIRO DO SUL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,TO_PNL,-10.727,-48.408,1985.119,-18.142,-7.084,24938.0,25130.0,23698.0,-7765.0,-3099.0,TO,N,PORTO NACIONAL
1078,TO_PNL,-10.727,-48.408,1986.670,-18.325,-7.556,24865.0,25083.0,23604.0,-7817.0,-3298.0,TO,N,PORTO NACIONAL
1079,TO_PNL,-10.727,-48.408,1995.817,-19.315,-10.562,24358.0,24778.0,22987.0,-8056.0,-4541.0,TO,N,PORTO NACIONAL
1080,TO_PNL,-10.721,-48.401,2003.702,-20.117,-13.008,23896.0,24526.0,22438.0,-8218.0,-5520.0,TO,N,PORTO NACIONAL


In [4]:
# Check df info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1082 entries, 0 to 1081
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Code     1082 non-null   object 
 1   Lat_dd   1082 non-null   float64
 2   Lon_dd   1082 non-null   float64
 3   Time_dy  1082 non-null   float64
 4   D_dd     1082 non-null   float64
 5   I_dd     1082 non-null   float64
 6   H_nT     1082 non-null   float64
 7   F_nT     1082 non-null   float64
 8   X_nT     1082 non-null   float64
 9   Y_nT     1000 non-null   float64
 10  Z_nT     999 non-null    float64
 11  State    1082 non-null   object 
 12  Region   1082 non-null   object 
 13  RS_name  1082 non-null   object 
dtypes: float64(10), object(4)
memory usage: 118.5+ KB


## Insert altitude values into the database

In [5]:
# Load ICGEM file with altitude
df_alt = pd.read_csv(altitude_folder / icgem_file)
df_alt

Unnamed: 0,index,lon,lat,h_over_ell_meter,h_topo_over_ell_meter,h_topo_over_geoid_meter
0,1,-72.670,-7.637,0.0,206.342584,182.4640
1,2,-72.670,-7.637,0.0,206.342584,182.4640
2,3,-72.670,-7.620,0.0,219.479090,195.6000
3,4,-72.770,-7.599,0.0,220.109149,196.5080
4,5,-72.770,-7.599,0.0,220.109149,196.5080
...,...,...,...,...,...,...
1077,1078,-48.408,-10.727,0.0,224.275592,240.2992
1078,1079,-48.408,-10.727,0.0,224.275592,240.2992
1079,1080,-48.408,-10.727,0.0,224.275592,240.2992
1080,1081,-48.401,-10.721,0.0,240.805661,256.8352


In [6]:
# Choosing the altitude
# alt = df_alt["topo_ell"]
alt = df_alt["h_topo_over_geoid_meter"]

# Create new dataframe to insert altitude values
df_alt_inserted = df
# df2

# Create new database frame with altitude values
df_alt_inserted.insert(3, "Alt_m", alt)

# Visualize new dataframe with altitude values added
df_alt_inserted

Unnamed: 0,Code,Lat_dd,Lon_dd,Alt_m,Time_dy,D_dd,I_dd,H_nT,F_nT,X_nT,Y_nT,Z_nT,State,Region,RS_name
0,AC_CZS,-7.637,-72.670,182.4640,1958.529,2.683,11.281,29097.0,29671.0,29065.0,1362.0,5804.0,AC,N,CRUZEIRO DO SUL
1,AC_CZS,-7.637,-72.670,182.4640,1965.848,1.824,11.277,28662.0,29227.0,28648.0,912.0,5715.0,AC,N,CRUZEIRO DO SUL
2,AC_CZS,-7.620,-72.670,195.6000,1978.640,-0.035,11.026,27835.0,28359.0,27835.0,-17.0,5423.0,AC,N,CRUZEIRO DO SUL
3,AC_CZS,-7.599,-72.770,196.5080,1986.279,-1.343,10.685,27402.0,27886.0,27394.0,-642.0,5170.0,AC,N,CRUZEIRO DO SUL
4,AC_CZS,-7.599,-72.770,196.5080,1989.503,-1.847,10.468,27222.0,27683.0,27208.0,-877.0,5029.0,AC,N,CRUZEIRO DO SUL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,TO_PNL,-10.727,-48.408,240.2992,1985.119,-18.142,-7.084,24938.0,25130.0,23698.0,-7765.0,-3099.0,TO,N,PORTO NACIONAL
1078,TO_PNL,-10.727,-48.408,240.2992,1986.670,-18.325,-7.556,24865.0,25083.0,23604.0,-7817.0,-3298.0,TO,N,PORTO NACIONAL
1079,TO_PNL,-10.727,-48.408,240.2992,1995.817,-19.315,-10.562,24358.0,24778.0,22987.0,-8056.0,-4541.0,TO,N,PORTO NACIONAL
1080,TO_PNL,-10.721,-48.401,256.8352,2003.702,-20.117,-13.008,23896.0,24526.0,22438.0,-8218.0,-5520.0,TO,N,PORTO NACIONAL


## X component calculation

In [7]:
# Create new df to work
df_aux01 = df_alt_inserted

# Separate the columns to apply in the equation in order to calculte Y values (X = F cos I cos D)
dx = df_aux01["D_dd"]
ix = df_aux01["I_dd"]
fx = df_aux01["F_nT"]

# Conversion between DECIMAL DEGREES TO RADIANS
ix_rad = ix.apply(np.radians)
dx_rad = dx.apply(np.radians)

# Calculate cos I
cos_ix_rad = ix_rad.apply(np.cos)

# Calculate cos D
cos_dx_rad = dx_rad.apply(np.cos)

# Solve the equation: Y = F cos I sin D
calc_x = fx * cos_ix_rad * cos_dx_rad
print(calc_x)

0       29065.847181
1       28648.198288
2       27835.501733
3       27394.967537
4       27208.115808
            ...     
1077    23698.435287
1078    23604.245135
1079    22987.174124
1080    22438.749907
1081    25156.679550
Length: 1082, dtype: float64


## Y component calculation

In [8]:
# Separate the columns to apply in the equation in order to calculte Y values (Y = F cos I sen D)
dy = df_aux01["D_dd"]
iy = df_aux01["I_dd"]
fy = df_aux01["F_nT"]

# Conversion between DECIMAL DEGREES TO RADIANS
iy_rad = iy.apply(np.radians)
dy_rad = dy.apply(np.radians)

# Calculate cos I
cos_iy_rad = iy_rad.apply(np.cos)

# Calculate sen D
sin_dy_rad = dy_rad.apply(np.sin)

# Solve the equation: Y = F cos I sin D
calc_y = fy * cos_iy_rad * sin_dy_rad
print(calc_y)

0       1362.067489
1        912.318041
2        -17.003743
3       -642.249416
4       -877.391056
           ...     
1077   -7765.074945
1078   -7817.785140
1079   -8056.750570
1080   -8218.966515
1081   -7283.938967
Length: 1082, dtype: float64


## Z component calculation

In [9]:
# Separate the columns to apply in the equation Z = F sin I
iz = df_aux01["I_dd"]
fz = df_aux01["F_nT"]

# Conversion between DECIMAL DEGREES TO RADIANS
iz_rad = iz.apply(np.radians)

# Calulate sin of I
sin_iz_rad = iz_rad.apply(np.sin)
#print(sin_iz_rad)

# Solve the equation: Z = F sin I
calc_z = fz * sin_iz_rad
print(calc_z)

0       5804.269188
1       5715.412465
2       5423.784210
3       5170.325458
4       5029.623060
           ...     
1077   -3099.141181
1078   -3298.292909
1079   -4541.792602
1080   -5520.486212
1081      42.510442
Length: 1082, dtype: float64


## Comparison between calculated values and observed for Y and Z components

In [10]:
# Create new dataframe
df_comparison_calculated_obs = df_aux01

# Add columns with the calculated values for each component
df_comparison_calculated_obs["Calculated_X"] = calc_x
df_comparison_calculated_obs["Calculated_Y"] = calc_y
df_comparison_calculated_obs["Calculated_Z"] = calc_z
df_comparison_calculated_obs

Unnamed: 0,Code,Lat_dd,Lon_dd,Alt_m,Time_dy,D_dd,I_dd,H_nT,F_nT,X_nT,Y_nT,Z_nT,State,Region,RS_name,Calculated_X,Calculated_Y,Calculated_Z
0,AC_CZS,-7.637,-72.670,182.4640,1958.529,2.683,11.281,29097.0,29671.0,29065.0,1362.0,5804.0,AC,N,CRUZEIRO DO SUL,29065.847181,1362.067489,5804.269188
1,AC_CZS,-7.637,-72.670,182.4640,1965.848,1.824,11.277,28662.0,29227.0,28648.0,912.0,5715.0,AC,N,CRUZEIRO DO SUL,28648.198288,912.318041,5715.412465
2,AC_CZS,-7.620,-72.670,195.6000,1978.640,-0.035,11.026,27835.0,28359.0,27835.0,-17.0,5423.0,AC,N,CRUZEIRO DO SUL,27835.501733,-17.003743,5423.784210
3,AC_CZS,-7.599,-72.770,196.5080,1986.279,-1.343,10.685,27402.0,27886.0,27394.0,-642.0,5170.0,AC,N,CRUZEIRO DO SUL,27394.967537,-642.249416,5170.325458
4,AC_CZS,-7.599,-72.770,196.5080,1989.503,-1.847,10.468,27222.0,27683.0,27208.0,-877.0,5029.0,AC,N,CRUZEIRO DO SUL,27208.115808,-877.391056,5029.623060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,TO_PNL,-10.727,-48.408,240.2992,1985.119,-18.142,-7.084,24938.0,25130.0,23698.0,-7765.0,-3099.0,TO,N,PORTO NACIONAL,23698.435287,-7765.074945,-3099.141181
1078,TO_PNL,-10.727,-48.408,240.2992,1986.670,-18.325,-7.556,24865.0,25083.0,23604.0,-7817.0,-3298.0,TO,N,PORTO NACIONAL,23604.245135,-7817.785140,-3298.292909
1079,TO_PNL,-10.727,-48.408,240.2992,1995.817,-19.315,-10.562,24358.0,24778.0,22987.0,-8056.0,-4541.0,TO,N,PORTO NACIONAL,22987.174124,-8056.750570,-4541.792602
1080,TO_PNL,-10.721,-48.401,256.8352,2003.702,-20.117,-13.008,23896.0,24526.0,22438.0,-8218.0,-5520.0,TO,N,PORTO NACIONAL,22438.749907,-8218.966515,-5520.486212


In [11]:
# RMSE calculation
obs_x = df_comparison_calculated_obs["X_nT"]
obs_y = df_comparison_calculated_obs["Y_nT"]
obs_z = df_comparison_calculated_obs["Z_nT"]

rmse_x = mm.rmse(calc_x, obs_x)
rmse_y = mm.rmse(calc_y, obs_y)
rmse_z = mm.rmse(calc_z, obs_z)

## Calculate distances between a station and an Observatory using the Haversine formula

In [12]:
# Create new dataframe to work on
df_dist_calc = df_comparison_calculated_obs

# Observatories location according to Intermagnet in degrees
TTB_lat = mm.TTB_lat
TTB_lon = mm.TTB_lon
VSS_lat = mm.VSS_lat
VSS_lon = mm.VSS_lon

# Calculate
TTB_distances = mm.haversine_array(df_dist_calc["Lon_dd"], df_dist_calc["Lat_dd"], TTB_lon, TTB_lat)
VSS_distances = mm.haversine_array(df_dist_calc["Lon_dd"], df_dist_calc["Lat_dd"], VSS_lon, VSS_lat)

# Insert distances into the dataframe
df_dist = df_dist_calc
df_dist["TTB_distances_km"] = TTB_distances 
df_dist["VSS_distances_km"] = VSS_distances

# View it
df_dist

Unnamed: 0,Code,Lat_dd,Lon_dd,Alt_m,Time_dy,D_dd,I_dd,H_nT,F_nT,X_nT,Y_nT,Z_nT,State,Region,RS_name,Calculated_X,Calculated_Y,Calculated_Z,TTB_distances_km,VSS_distances_km
0,AC_CZS,-7.637,-72.670,182.4640,1958.529,2.683,11.281,29097.0,29671.0,29065.0,1362.0,5804.0,AC,N,CRUZEIRO DO SUL,29065.847181,1362.067489,5804.269188,2770.493823,3511.151310
1,AC_CZS,-7.637,-72.670,182.4640,1965.848,1.824,11.277,28662.0,29227.0,28648.0,912.0,5715.0,AC,N,CRUZEIRO DO SUL,28648.198288,912.318041,5715.412465,2770.493823,3511.151310
2,AC_CZS,-7.620,-72.670,195.6000,1978.640,-0.035,11.026,27835.0,28359.0,27835.0,-17.0,5423.0,AC,N,CRUZEIRO DO SUL,27835.501733,-17.003743,5423.784210,2770.043946,3512.127230
3,AC_CZS,-7.599,-72.770,196.5080,1986.279,-1.343,10.685,27402.0,27886.0,27394.0,-642.0,5170.0,AC,N,CRUZEIRO DO SUL,27394.967537,-642.249416,5170.325458,2780.199210,3522.772456
4,AC_CZS,-7.599,-72.770,196.5080,1989.503,-1.847,10.468,27222.0,27683.0,27208.0,-877.0,5029.0,AC,N,CRUZEIRO DO SUL,27208.115808,-877.391056,5029.623060,2780.199210,3522.772456
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,TO_PNL,-10.727,-48.408,240.2992,1985.119,-18.142,-7.084,24938.0,25130.0,23698.0,-7765.0,-3099.0,TO,N,PORTO NACIONAL,23698.435287,-7765.074945,-3099.141181,1058.861616,1393.115514
1078,TO_PNL,-10.727,-48.408,240.2992,1986.670,-18.325,-7.556,24865.0,25083.0,23604.0,-7817.0,-3298.0,TO,N,PORTO NACIONAL,23604.245135,-7817.785140,-3298.292909,1058.861616,1393.115514
1079,TO_PNL,-10.727,-48.408,240.2992,1995.817,-19.315,-10.562,24358.0,24778.0,22987.0,-8056.0,-4541.0,TO,N,PORTO NACIONAL,22987.174124,-8056.750570,-4541.792602,1058.861616,1393.115514
1080,TO_PNL,-10.721,-48.401,256.8352,2003.702,-20.117,-13.008,23896.0,24526.0,22438.0,-8218.0,-5520.0,TO,N,PORTO NACIONAL,22438.749907,-8218.966515,-5520.486212,1058.203245,1393.469547


## Determine which observatory is closer to each station

In [13]:
# Define conditions to see which is closer
conditions = [df_dist["TTB_distances_km"] < df_dist["VSS_distances_km"], 
              df_dist["TTB_distances_km"] > df_dist["VSS_distances_km"]]

# Define choices
choices = ["TTB", "VSS"]

# Create new column in DataFrame that displays results of comparisons
df_dist["Closest_OBS"] = np.select(conditions, choices, default='Tie')

# View it
df_dist

Unnamed: 0,Code,Lat_dd,Lon_dd,Alt_m,Time_dy,D_dd,I_dd,H_nT,F_nT,X_nT,...,Z_nT,State,Region,RS_name,Calculated_X,Calculated_Y,Calculated_Z,TTB_distances_km,VSS_distances_km,Closest_OBS
0,AC_CZS,-7.637,-72.670,182.4640,1958.529,2.683,11.281,29097.0,29671.0,29065.0,...,5804.0,AC,N,CRUZEIRO DO SUL,29065.847181,1362.067489,5804.269188,2770.493823,3511.151310,TTB
1,AC_CZS,-7.637,-72.670,182.4640,1965.848,1.824,11.277,28662.0,29227.0,28648.0,...,5715.0,AC,N,CRUZEIRO DO SUL,28648.198288,912.318041,5715.412465,2770.493823,3511.151310,TTB
2,AC_CZS,-7.620,-72.670,195.6000,1978.640,-0.035,11.026,27835.0,28359.0,27835.0,...,5423.0,AC,N,CRUZEIRO DO SUL,27835.501733,-17.003743,5423.784210,2770.043946,3512.127230,TTB
3,AC_CZS,-7.599,-72.770,196.5080,1986.279,-1.343,10.685,27402.0,27886.0,27394.0,...,5170.0,AC,N,CRUZEIRO DO SUL,27394.967537,-642.249416,5170.325458,2780.199210,3522.772456,TTB
4,AC_CZS,-7.599,-72.770,196.5080,1989.503,-1.847,10.468,27222.0,27683.0,27208.0,...,5029.0,AC,N,CRUZEIRO DO SUL,27208.115808,-877.391056,5029.623060,2780.199210,3522.772456,TTB
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,TO_PNL,-10.727,-48.408,240.2992,1985.119,-18.142,-7.084,24938.0,25130.0,23698.0,...,-3099.0,TO,N,PORTO NACIONAL,23698.435287,-7765.074945,-3099.141181,1058.861616,1393.115514,TTB
1078,TO_PNL,-10.727,-48.408,240.2992,1986.670,-18.325,-7.556,24865.0,25083.0,23604.0,...,-3298.0,TO,N,PORTO NACIONAL,23604.245135,-7817.785140,-3298.292909,1058.861616,1393.115514,TTB
1079,TO_PNL,-10.727,-48.408,240.2992,1995.817,-19.315,-10.562,24358.0,24778.0,22987.0,...,-4541.0,TO,N,PORTO NACIONAL,22987.174124,-8056.750570,-4541.792602,1058.861616,1393.115514,TTB
1080,TO_PNL,-10.721,-48.401,256.8352,2003.702,-20.117,-13.008,23896.0,24526.0,22438.0,...,-5520.0,TO,N,PORTO NACIONAL,22438.749907,-8218.966515,-5520.486212,1058.203245,1393.469547,TTB


## Count the number of occupation for each station

In [14]:
# Dataframe to work
df_count = df_dist

# Count the number of times each station was occupied
freq_series = df_count.groupby("Code").size()
freq_series

# Another way to count the number of occupations: it creates a dataframe 
#df2 = df_count.groupby("Code").count()
#df2 

# Check variable type
print(type(freq_series))

# Convert series to number
rs_freq_array = freq_series.to_numpy()

# Create variable to hold stations code
rs_names_array = df_count.Code.unique() 

print(type(rs_freq_array), len(rs_freq_array))
print(type(rs_names_array), len(rs_names_array))

# Create a dataframe from the arrays
rs_freq_info = pd.DataFrame({ "RS_code": rs_names_array })
rs_freq_info["N_occupations"]=pd.Series(rs_freq_array)
rs_freq_info

<class 'pandas.core.series.Series'>
<class 'numpy.ndarray'> 218
<class 'numpy.ndarray'> 218


Unnamed: 0,RS_code,N_occupations
0,AC_CZS,6
1,AC_EPC,2
2,AC_EPC_B,1
3,AC_RBC,10
4,AL_MCO,9
...,...,...
213,SP_SJC,8
214,SP_SPO,8
215,SP_TAU,5
216,TO_PNL,9


In [15]:
# Save this to a file
rs_freq_info.to_csv(output_folder / ocp_list, index=False, float_format="%.3f")

## Add a column to the dataframe with the number of occupations for each stations based on the count done before

In [16]:
# Create a new dataframe to work on and the number of occupations column with an fixed value to be changed later
df_ocp = df_count
df_ocp["N_occupations"] = 0 
#df_ocp

In [17]:
# Create a loop to read the names and frequencies arrays to substitute the 0 value for the correct one
for i in range(len(rs_names_array)):
    df_ocp.loc[df_ocp['Code'] == rs_names_array[i], "N_occupations"] = rs_freq_array[i]

df_ocp

Unnamed: 0,Code,Lat_dd,Lon_dd,Alt_m,Time_dy,D_dd,I_dd,H_nT,F_nT,X_nT,...,State,Region,RS_name,Calculated_X,Calculated_Y,Calculated_Z,TTB_distances_km,VSS_distances_km,Closest_OBS,N_occupations
0,AC_CZS,-7.637,-72.670,182.4640,1958.529,2.683,11.281,29097.0,29671.0,29065.0,...,AC,N,CRUZEIRO DO SUL,29065.847181,1362.067489,5804.269188,2770.493823,3511.151310,TTB,6
1,AC_CZS,-7.637,-72.670,182.4640,1965.848,1.824,11.277,28662.0,29227.0,28648.0,...,AC,N,CRUZEIRO DO SUL,28648.198288,912.318041,5715.412465,2770.493823,3511.151310,TTB,6
2,AC_CZS,-7.620,-72.670,195.6000,1978.640,-0.035,11.026,27835.0,28359.0,27835.0,...,AC,N,CRUZEIRO DO SUL,27835.501733,-17.003743,5423.784210,2770.043946,3512.127230,TTB,6
3,AC_CZS,-7.599,-72.770,196.5080,1986.279,-1.343,10.685,27402.0,27886.0,27394.0,...,AC,N,CRUZEIRO DO SUL,27394.967537,-642.249416,5170.325458,2780.199210,3522.772456,TTB,6
4,AC_CZS,-7.599,-72.770,196.5080,1989.503,-1.847,10.468,27222.0,27683.0,27208.0,...,AC,N,CRUZEIRO DO SUL,27208.115808,-877.391056,5029.623060,2780.199210,3522.772456,TTB,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,TO_PNL,-10.727,-48.408,240.2992,1985.119,-18.142,-7.084,24938.0,25130.0,23698.0,...,TO,N,PORTO NACIONAL,23698.435287,-7765.074945,-3099.141181,1058.861616,1393.115514,TTB,9
1078,TO_PNL,-10.727,-48.408,240.2992,1986.670,-18.325,-7.556,24865.0,25083.0,23604.0,...,TO,N,PORTO NACIONAL,23604.245135,-7817.785140,-3298.292909,1058.861616,1393.115514,TTB,9
1079,TO_PNL,-10.727,-48.408,240.2992,1995.817,-19.315,-10.562,24358.0,24778.0,22987.0,...,TO,N,PORTO NACIONAL,22987.174124,-8056.750570,-4541.792602,1058.861616,1393.115514,TTB,9
1080,TO_PNL,-10.721,-48.401,256.8352,2003.702,-20.117,-13.008,23896.0,24526.0,22438.0,...,TO,N,PORTO NACIONAL,22438.749907,-8218.966515,-5520.486212,1058.203245,1393.469547,TTB,9


In [18]:
## Save the repeat station database

In [19]:
# Create main database
df_final = df_ocp

# Organize header order according to the IGRF software calculator
df_final = df_final[
    [
        "Code",
        "Lat_dd",
        "Lon_dd",
        "Alt_m",
        "Time_dy",
        "D_dd",
        "I_dd",
        "H_nT",
        "X_nT",
        "Y_nT",
        "Z_nT",
        "F_nT",
        "State",
        "Region",
        "RS_name",
        "Calculated_X",
        "Calculated_Y",
        "Calculated_Z",
        "TTB_distances_km",
        "VSS_distances_km",
        "Closest_OBS",
        "N_occupations"
    ]
]
df_final

Unnamed: 0,Code,Lat_dd,Lon_dd,Alt_m,Time_dy,D_dd,I_dd,H_nT,X_nT,Y_nT,...,State,Region,RS_name,Calculated_X,Calculated_Y,Calculated_Z,TTB_distances_km,VSS_distances_km,Closest_OBS,N_occupations
0,AC_CZS,-7.637,-72.670,182.4640,1958.529,2.683,11.281,29097.0,29065.0,1362.0,...,AC,N,CRUZEIRO DO SUL,29065.847181,1362.067489,5804.269188,2770.493823,3511.151310,TTB,6
1,AC_CZS,-7.637,-72.670,182.4640,1965.848,1.824,11.277,28662.0,28648.0,912.0,...,AC,N,CRUZEIRO DO SUL,28648.198288,912.318041,5715.412465,2770.493823,3511.151310,TTB,6
2,AC_CZS,-7.620,-72.670,195.6000,1978.640,-0.035,11.026,27835.0,27835.0,-17.0,...,AC,N,CRUZEIRO DO SUL,27835.501733,-17.003743,5423.784210,2770.043946,3512.127230,TTB,6
3,AC_CZS,-7.599,-72.770,196.5080,1986.279,-1.343,10.685,27402.0,27394.0,-642.0,...,AC,N,CRUZEIRO DO SUL,27394.967537,-642.249416,5170.325458,2780.199210,3522.772456,TTB,6
4,AC_CZS,-7.599,-72.770,196.5080,1989.503,-1.847,10.468,27222.0,27208.0,-877.0,...,AC,N,CRUZEIRO DO SUL,27208.115808,-877.391056,5029.623060,2780.199210,3522.772456,TTB,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,TO_PNL,-10.727,-48.408,240.2992,1985.119,-18.142,-7.084,24938.0,23698.0,-7765.0,...,TO,N,PORTO NACIONAL,23698.435287,-7765.074945,-3099.141181,1058.861616,1393.115514,TTB,9
1078,TO_PNL,-10.727,-48.408,240.2992,1986.670,-18.325,-7.556,24865.0,23604.0,-7817.0,...,TO,N,PORTO NACIONAL,23604.245135,-7817.785140,-3298.292909,1058.861616,1393.115514,TTB,9
1079,TO_PNL,-10.727,-48.408,240.2992,1995.817,-19.315,-10.562,24358.0,22987.0,-8056.0,...,TO,N,PORTO NACIONAL,22987.174124,-8056.750570,-4541.792602,1058.861616,1393.115514,TTB,9
1080,TO_PNL,-10.721,-48.401,256.8352,2003.702,-20.117,-13.008,23896.0,22438.0,-8218.0,...,TO,N,PORTO NACIONAL,22438.749907,-8218.966515,-5520.486212,1058.203245,1393.469547,TTB,9


In [20]:
# Save main database file, three decimals places only
df_final.to_csv(output_folder / output_file, index=False, float_format="%.3f")