# Drag Database V3

Satellite drag database to develop new machine learning algorithm that incoporates altitude.

## Datasets

All at 5 minute cadence

- Grace B
    - add geomagnetic coordinates
- Omni
- FISM2
- MSIS profiles

In [37]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import os, sys
import pandas as pd

# for converting to
# geomagnetic coord
import aacgmv2

# add read_io module to current path ()
# and import
file_path = 'D:\\GitHub\\DataIO\\'
sys.path.append(os.path.dirname(file_path))
import data_io as dio

In [3]:
# dates to read in
sdate = '2002-01-01'
edate = '2016-01-01'

# number of years
ldate = pd.to_datetime(edate)-pd.to_datetime(sdate)  
ldate = int(ldate.total_seconds()/(365.2*86400)+1)

In [4]:
# load grace data

gr_d, gr_u, gr_m = dio.toleos_den.load_toleos(sat='gb',sdate=sdate,edate=edate)

d_min = gr_d['DateTime'].min()
d_max = gr_d['DateTime'].max()

In [5]:
# load omni data
om_d, om_m = dio.load_omni(res='5m',sdate=sdate, nd=ldate)
om_d = om_d[(om_d['DateTime'] >= d_min-pd.DateOffset(minutes=5)) & (om_d['DateTime'] <= d_max+pd.DateOffset(minutes=5))]

In [6]:
# read fsim2 data and truncate to similar range as grace data
fi_d, fi_m = dio.load_fism2()
fi_d = fi_d[(fi_d['DateTime'] >= d_min-pd.DateOffset(minutes=5)) & (fi_d['DateTime'] <= d_max+pd.DateOffset(minutes=5))]

## Combine the DataFrames together

Use a time delta of 2.5 minutes, the time cadence of the OMNI and FISM2 data sets. 

When combining the grace data use a time delta of 50 seconds (the largest differnce in the grace cadence)


In [7]:
tol = pd.Timedelta('2.5 minute')

In [8]:
# create database of fism and omni data

fi_d = fi_d.rename(columns={'DateTime':'DateTime_fism2'})
fi_d.index = fi_d['DateTime_fism2']

om_d.index = om_d['DateTime']

database = pd.merge_asof(left=om_d,right=fi_d,right_index=True,left_index=True,direction='nearest',tolerance=tol)
database = database.rename(columns={'DateTime':'DateTime_omni'})

om_d.shape
database.shape

(1445752, 75)

In [9]:
# add the grace data

gr_d = gr_d.rename(columns={'DateTime':'DateTime_gr'})
gr_d.index = gr_d['DateTime_gr']

tol = pd.Timedelta('50 second')

database = pd.merge_asof(left=database,right=gr_d,right_index=True,left_index=True,direction='nearest',tolerance=tol)

om_d.shape
database.shape

(1445752, 88)

In [10]:
# get storm times

# read in storm start and end times
storm_txt = 'D:\\GitHub\\SatDrag\\data\\storms_drag_epochs_no_overlap.txt'
storm_time = pd.read_csv(storm_txt, header=None, skiprows=1, 
                     delim_whitespace=1, names = ['t_st','t_dst','t_en'], parse_dates=[0, 1, 2],
                     infer_datetime_format=True)

storm_time['t_st'].min()
storm_time['t_st'].max()

  storm_time = pd.read_csv(storm_txt, header=None, skiprows=1,
  storm_time = pd.read_csv(storm_txt, header=None, skiprows=1,


Timestamp('2017-12-02 01:00:00')

In [11]:
storm_time.head()

Unnamed: 0,t_st,t_dst,t_en
0,2002-01-23 03:00:00,2002-01-25 20:00:00,2002-01-27 22:00:00
1,2002-01-30 18:00:00,2002-02-02 09:00:00,2002-02-02 20:00:00
2,2002-02-02 20:00:00,2002-02-05 20:00:00,2002-02-10 12:00:00
3,2002-02-10 12:00:00,2002-02-13 00:00:00,2002-02-15 02:00:00
4,2002-02-23 13:00:00,2002-02-26 07:00:00,2002-02-26 16:00:00


## Add storm times to the dataframe

In [12]:
database.columns

Index(['DateTime_omni', 'Year', 'DOY', 'Hour', 'Minute', 'IMF_id', 'SW_id',
       'IMF_pt', 'SW_pt', 'Per_int', 'Timeshift', 'RMS_Timeshift',
       'RMS_PhaseFrontNormal', 'Time_btwn_observations', 'B', 'Bx_GSEGSM',
       'By_GSE', 'Bz_GSE', 'By_GSM', 'Bz_GSM', 'RMS_SD_B',
       'RMS_SD_field_vector', 'Vsw', 'Vx_GSE', 'Vy_GSE', 'Vz_GSE', 'Prho',
       'Tp', 'dynP', 'Esw', 'Beta', 'AlfvenMach', 'X(s/c), GSE', 'Y(s/c), GSE',
       'Z(s/c), GSE', 'BSN location, Xgse', 'BSN location, Ygse',
       'BSN location, Zgse', 'AE', 'AL', 'AU', 'SYM_D index', 'SYM_H index',
       'ASY_D index', 'ASY_H index', 'PC index', 'Na_Np Ratio',
       'MagnetosonicMach', 'Goes Proton flux (>10 MeV)',
       'Goes Proton flux (>30 MeV)', 'Goes Proton flux (>60 MeV)',
       'DateTime_fism2', '225_00', '600_01', '1300_02', '2500_03', '5100_04',
       '11250_05', '18950_06', '25700_07', '30500_08', '43000_09', '59500_10',
       '72400_11', '72400_12', '85550_13', '85550_14', '85550_15', '94400_16',
 

In [14]:
# loop through the storms and set
# flags in the satellite drag database
# for the different times

database = database.reset_index()
database['storm'] = -1
database['storm phase'] = -1

for index, row in storm_time.iterrows():
    stp = (database['DateTime']>=row['t_st']) & (database['DateTime']<row['t_en'])
    mpp = (database['DateTime']>=row['t_st']) & (database['DateTime']<row['t_dst'])
    rpp = (database['DateTime']>=row['t_dst']) & (database['DateTime']<row['t_en'])

    database.loc[stp,'storm'] = 1
    database.loc[mpp,'storm phase'] = 1
    database.loc[rpp,'storm phase'] = 2

In [23]:
database.columns

Index(['DateTime', 'DateTime_omni', 'Year', 'DOY', 'Hour', 'Minute', 'IMF_id',
       'SW_id', 'IMF_pt', 'SW_pt', 'Per_int', 'Timeshift', 'RMS_Timeshift',
       'RMS_PhaseFrontNormal', 'Time_btwn_observations', 'B', 'Bx_GSEGSM',
       'By_GSE', 'Bz_GSE', 'By_GSM', 'Bz_GSM', 'RMS_SD_B',
       'RMS_SD_field_vector', 'Vsw', 'Vx_GSE', 'Vy_GSE', 'Vz_GSE', 'Prho',
       'Tp', 'dynP', 'Esw', 'Beta', 'AlfvenMach', 'X(s/c), GSE', 'Y(s/c), GSE',
       'Z(s/c), GSE', 'BSN location, Xgse', 'BSN location, Ygse',
       'BSN location, Zgse', 'AE', 'AL', 'AU', 'SYM_D index', 'SYM_H index',
       'ASY_D index', 'ASY_H index', 'PC index', 'Na_Np Ratio',
       'MagnetosonicMach', 'Goes Proton flux (>10 MeV)',
       'Goes Proton flux (>30 MeV)', 'Goes Proton flux (>60 MeV)',
       'DateTime_fism2', '225_00', '600_01', '1300_02', '2500_03', '5100_04',
       '11250_05', '18950_06', '25700_07', '30500_08', '43000_09', '59500_10',
       '72400_11', '72400_12', '85550_13', '85550_14', '85550_15', '

In [36]:
database = database.dropna(subset='DateTime_gr')
print(database.shape)

(1251435, 91)


## Save data

In [37]:
fn = f'D:\\data\\SatDensities\\satdrag_database_grace_b_v3.hdf5'

database.to_hdf(fn,key='database', mode='w', format='fixed', complevel=9)

In [38]:
database.head()

Unnamed: 0,DateTime,DateTime_omni,Year,DOY,Hour,Minute,IMF_id,SW_id,IMF_pt,SW_pt,...,lat,lst,arglat,dens_x,dens_mean,flag_0,flag_1,DateTime_gr,storm,storm phase
1,2002-04-04 00:50:00,2002-04-04 00:50:00,2002,94,0,50,71.0,71.0,5.0,5.0,...,-58.533,10.508,301.602,1.344974e-12,1.242133e-12,0.0,0.0,2002-04-04 00:50:00,1,2
2,2002-04-04 00:55:00,2002-04-04 00:55:00,2002,94,0,55,71.0,71.0,5.0,4.0,...,-39.572,10.56,320.585,1.46832e-12,1.239372e-12,0.0,0.0,2002-04-04 00:55:00,1,2
3,2002-04-04 01:00:00,2002-04-04 01:00:00,2002,94,1,0,71.0,71.0,5.0,5.0,...,-20.544,10.589,339.559,1.643068e-12,1.23661e-12,0.0,0.0,2002-04-04 01:00:00,1,2
4,2002-04-04 01:05:00,2002-04-04 01:05:00,2002,94,1,5,71.0,71.0,5.0,5.0,...,-1.469,10.611,358.529,1.700921e-12,1.234232e-12,0.0,0.0,2002-04-04 01:05:00,1,2
5,2002-04-04 01:10:00,2002-04-04 01:10:00,2002,94,1,10,71.0,71.0,5.0,5.0,...,17.613,10.633,17.503,1.745759e-12,1.231855e-12,0.0,0.0,2002-04-04 01:10:00,1,2
