# Coorelations of the v3 database 

Correlation matrices for the v3 database

v3 uses GRACE data with altitude (e.g., hasn't been normalized to 400 km)

## Purpose

- Identify the key features for new models 
- Remove features with high colinearity

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib qt

#print all output in a cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [8]:
dens_col = 'dens_x'

fism_columns = ['225_00', '600_01', '1300_02', '2500_03', '5100_04',
            '11250_05', '18950_06', '25700_07', '30500_08', '43000_09', '59500_10',
            '72400_11', '72400_12', '85550_13', '85550_14', '85550_15', '94400_16',
            '94400_17', '94400_18', '98100_19', '100700_20', '103850_21',
            '113000_22']
fism_columns.append(dens_col)

omni_columns = ['B', 'Bx_GSEGSM', 'By_GSE', 'Bz_GSE', 'By_GSM', 'Bz_GSM', 
        'Vsw', 'Vx_GSE', 'Vy_GSE', 'Vz_GSE', 'Prho',
       'Tp', 'dynP', 'Esw', 'Beta', 'AlfvenMach', 'X(s/c), GSE', 'Y(s/c), GSE',
       'Z(s/c), GSE', 'BSN location, Xgse', 'BSN location, Ygse',
       'BSN location, Zgse', 'AE', 'AL', 'AU', 'SYM_D index', 'SYM_H index',
       'ASY_D index', 'ASY_H index', 'PC index', 'Na_Np Ratio',
       'MagnetosonicMach', 'Goes Proton flux (>10 MeV)',
       'Goes Proton flux (>30 MeV)', 'Goes Proton flux (>60 MeV)']
omni_columns.append(dens_col)

In [5]:
fn = 'D:\data\SatDensities\satdrag_database_grace_B_v3.hdf5'
df = pd.read_hdf(fn)

In [6]:
# identify the storm and quiet times
st_t = (df['storm']>0.95) & (df['storm']<1.05) # storm times
qt_t = (df['storm']<0) # quiet times
mn_p = (df['storm phase']>0.95) & (df['storm phase']<1.05) # main phase
rc_p = (df['storm phase']>1.95) & (df['storm phase']<2.05) # recovery phase

In [7]:
qt_t.value_counts()
st_t.value_counts()

storm
False    628368
True     623067
Name: count, dtype: int64

storm
True     628368
False    623067
Name: count, dtype: int64

### Generate correlation matrices

In [None]:
def cor_matrix(data, columns, dens_col, 
               qt_t,st_t,mn_p,rc_p):

    cor_plot = pd.DataFrame()
    cor_plot['All'] = data[columns].dropna().corr()[dens_col]
    cor_plot['Quiet'] = data[qt_t][columns].dropna().corr()[dens_col]
    cor_plot['Storm'] = data[st_t][columns].dropna().corr()[dens_col]
    cor_plot['Main'] = data[mn_p][columns].dropna().corr()[dens_col]
    cor_plot['Recovery'] = data[rc_p][columns].dropna().corr()[dens_col]

    return cor_plot

    