# Satelite Drag Database V2

Create a new satellite drag database for investingating drag. 

### Differnce from the first
- Do not interpolate the data
    - Use Pandas merge_asof() to merge the datasets by matching time with a set tolerance
- Add higher resolution FISM2 data set
    - 5 minute cadence 
    - Stan Bands
- Create two data sets; Grace A and Grace B
- Add JB2008 solar indices

In [1]:
import os, sys
import pandas as pd

#print all output in a cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#add read_io module to current path ()
file_path = 'D:\\GitHub\\DataIO\\'
sys.path.append(os.path.dirname(file_path))

In [2]:
# import data input/output module
import data_io as dio
sat = 'CHAMP'
int_si = False

In [3]:
# read grace data
gr_d, gr_m = dio.load_gr_norm(sat=sat,sdate='2002-01-01',nd=13)
gr_d = gr_d.drop(columns=['Year', 'DOY', 'Sec'])

d_min = gr_d['DateTime'].min()
d_max = gr_d['DateTime'].max()

In [4]:
# read omni data and truncate data to similar range as grace data
om_d, om_m = dio.load_omni(res='5m',sdate='2002-01-01', nd=13)

om_d = om_d[['DateTime','B', 'Bx_GSEGSM', 'By_GSE', 'Bz_GSE',
       'By_GSM', 'Bz_GSM', 'Vsw', 'Vx_GSE',
       'Vy_GSE', 'Vz_GSE', 'Prho', 'Tp', 'dynP', 'Esw','AE', 'AL', 'AU',
       'SYM_D index', 'SYM_H index', 'ASY_D index', 'ASY_H index', 'PC index',
       'Goes Proton flux (>10 MeV)', 'Goes Proton flux (>30 MeV)', 
       'Goes Proton flux (>60 MeV)']]

om_d = om_d[(om_d['DateTime'] >= d_min-pd.DateOffset(minutes=5)) & (om_d['DateTime'] <= d_max+pd.DateOffset(minutes=5))]

In [5]:
# read solar indices
si_d, si_m = dio.load_solar_JBindices(sdate=d_min-pd.DateOffset(days=1), edate=d_max+pd.DateOffset(days=1))
si_d = si_d.drop(columns=['JulianDay','Ssrc'])

In [6]:
# read fsim2 data and truncate to similar range as grace data
fi_d, fi_m = dio.load_fism2()
fi_d = fi_d[(fi_d['DateTime'] >= d_min-pd.DateOffset(minutes=5)) & (fi_d['DateTime'] <= d_max+pd.DateOffset(minutes=5))]

## Combine the DataFrames together

Use a time delta of 2.5 minutes, the time cadence of the OMNI and FISM2 data sets. 

When combining the grace data use a time delta of 50 seconds (the largest differnce in the grace cadence)


In [7]:
tol = pd.Timedelta('2.5 minute')

In [8]:
# create database of fism and omni data

fi_d = fi_d.rename(columns={'DateTime':'DateTime_fism2'})
fi_d.index = fi_d['DateTime_fism2']

om_d.index = om_d['DateTime']

database = pd.merge_asof(left=om_d,right=fi_d,right_index=True,left_index=True,direction='nearest',tolerance=tol)
database = database.rename(columns={'DateTime':'DateTime_omni'})

om_d.shape
database.shape

(912385, 26)

(912385, 50)

In [9]:
# add the solar indices
si_d = si_d.rename(columns={'DateTime':'DateTime_si'})
si_d.index = si_d['DateTime_si']

if int_si:
    # don't interpolate but add same Solar indicies to every index in the database
    database = pd.merge_asof(left=database,right=si_d,right_index=True,left_index=True,direction='nearest')
else:
    database = pd.merge_asof(left=database,right=si_d,right_index=True,left_index=True,direction='nearest',tolerance=tol)

om_d.shape
database.shape

(912385, 26)

(912385, 59)

In [10]:
# add the grace data

gr_d = gr_d.rename(columns={'DateTime':'DateTime_gr'})
gr_d.index = gr_d['DateTime_gr']

tol = pd.Timedelta('50 second')

database = pd.merge_asof(left=database,right=gr_d,right_index=True,left_index=True,direction='nearest',tolerance=tol)

om_d.shape
database.shape

(912385, 26)

(912385, 76)

In [11]:
# get storm times

# read in storm start and end times
storm_txt = 'D:\\GitHub\\SatDrag\\data\\storms_drag_epochs_no_overlap.txt'
storm_time = pd.read_csv(storm_txt, header=None, skiprows=1, 
                     delim_whitespace=1, names = ['t_st','t_dst','t_en'], parse_dates=[0, 1, 2],
                     infer_datetime_format=True)

storm_time['t_st'].min()
storm_time['t_st'].max()

Timestamp('2002-01-23 03:00:00')

Timestamp('2012-11-21 17:00:00')

## Add storm times to the dataframe

In [12]:
database.columns

Index(['DateTime_omni', 'B', 'Bx_GSEGSM', 'By_GSE', 'Bz_GSE', 'By_GSM',
       'Bz_GSM', 'Vsw', 'Vx_GSE', 'Vy_GSE', 'Vz_GSE', 'Prho', 'Tp', 'dynP',
       'Esw', 'AE', 'AL', 'AU', 'SYM_D index', 'SYM_H index', 'ASY_D index',
       'ASY_H index', 'PC index', 'Goes Proton flux (>10 MeV)',
       'Goes Proton flux (>30 MeV)', 'Goes Proton flux (>60 MeV)',
       'DateTime_fism2', '225_00', '600_01', '1300_02', '2500_03', '5100_04',
       '11250_05', '18950_06', '25700_07', '30500_08', '43000_09', '59500_10',
       '72400_11', '72400_12', '85550_13', '85550_14', '85550_15', '94400_16',
       '94400_17', '94400_18', '98100_19', '100700_20', '103850_21',
       '113000_22', 'DateTime_si', 'F10', 'F81', 'S10', 'S81c', 'M10', 'M81c',
       'Y10', 'Y81c', 'DateTime_gr', 'CenterLat', 'SatLat', 'SatLon',
       'SatHeight', 'SatLT', 'SatDipoleLat', 'SatMagLon', 'SatMagLT',
       'SatDensity', '400kmDensity', '410kmDensity', 'NRLMSISe00atSat',
       'DenUncertainty', 'NumPts', 'NuminBinThru

In [13]:
# loop through the storms and set
# flags in the satellite drag database
# for the different times

database = database.reset_index()
database['storm'] = -1
database['storm phase'] = -1

for index, row in storm_time.iterrows():
    stp = (database['DateTime']>=row['t_st']) & (database['DateTime']<row['t_en'])
    mpp = (database['DateTime']>=row['t_st']) & (database['DateTime']<row['t_dst'])
    rpp = (database['DateTime']>=row['t_dst']) & (database['DateTime']<row['t_en'])

    database.loc[stp,'storm'] = 1
    database.loc[mpp,'storm phase'] = 1
    database.loc[rpp,'storm phase'] = 2

## Save data

In [14]:
if int_si:
    fn = f'D:\\data\\SatDensities\\satdrag_database_grace_{gr_m["sat"]}_SI_int.hdf5'
else:
    fn = f'D:\\data\\SatDensities\\satdrag_database_grace_{gr_m["sat"]}.hdf5'

database.to_hdf(fn,key='database', mode='w', format='fixed', complevel=9)

In [15]:
fn

'D:\\data\\SatDensities\\satdrag_database_grace_CHAMP.hdf5'

In [16]:
database.loc[142:146,['DateTime','B','F10','DateTime_si']]
database.loc[286:290,['DateTime','B','F10','DateTime_si']]
database.loc[430:434,['DateTime','B','F10','DateTime_si']]

Unnamed: 0,DateTime,B,F10,DateTime_si
142,2002-01-01 11:50:00,7.04,,NaT
143,2002-01-01 11:55:00,7.03,,NaT
144,2002-01-01 12:00:00,7.04,232.2,2002-01-01 12:00:00
145,2002-01-01 12:05:00,7.03,,NaT
146,2002-01-01 12:10:00,7.03,,NaT


Unnamed: 0,DateTime,B,F10,DateTime_si
286,2002-01-01 23:50:00,7.94,,NaT
287,2002-01-01 23:55:00,7.94,,NaT
288,2002-01-02 00:00:00,8.01,,NaT
289,2002-01-02 00:05:00,8.02,,NaT
290,2002-01-02 00:10:00,8.04,,NaT


Unnamed: 0,DateTime,B,F10,DateTime_si
430,2002-01-02 11:50:00,4.83,,NaT
431,2002-01-02 11:55:00,5.68,,NaT
432,2002-01-02 12:00:00,6.05,231.1,2002-01-02 12:00:00
433,2002-01-02 12:05:00,4.92,,NaT
434,2002-01-02 12:10:00,6.04,,NaT
