# Coorelations of the database 

Correlation matrices for the data set 


In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib qt

#print all output in a cell 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
columns_solar = ['F10', 'F81','S10', 'S81c', 'M10', 'M81c', 'Y10', 'Y81c','400kmDensity']  

columns_fism = ['225_00', '600_01', '1300_02', '2500_03', '5100_04',
            '11250_05', '18950_06', '25700_07', '30500_08', '43000_09', '59500_10',
            '72400_11', '72400_12', '85550_13', '85550_14', '85550_15', '94400_16',
            '94400_17', '94400_18', '98100_19', '100700_20', '103850_21',
            '113000_22','400kmDensity']

columns_swgeo = ['Bx_GSEGSM','By_GSM', 'Bz_GSM', 
           'Vsw', 'Vx_GSE', 'Vy_GSE', 'Vz_GSE', 'Prho', 'Tp',
           'dynP', 'Esw', 'AE', 'AL', 'AU', 'SYM_H index',
           'storm', 'storm phase','400kmDensity']  

col = columns_solar

In [3]:
fn = 'D:\\data\\SatDensities\\satdrag_database_grace_B_SI_int.hdf5'
df = pd.read_hdf(fn)

In [4]:
df.columns

Index(['DateTime', 'DateTime_omni', 'B', 'Bx_GSEGSM', 'By_GSE', 'Bz_GSE',
       'By_GSM', 'Bz_GSM', 'Vsw', 'Vx_GSE', 'Vy_GSE', 'Vz_GSE', 'Prho', 'Tp',
       'dynP', 'Esw', 'AE', 'AL', 'AU', 'SYM_D index', 'SYM_H index',
       'ASY_D index', 'ASY_H index', 'PC index', 'Goes Proton flux (>10 MeV)',
       'Goes Proton flux (>30 MeV)', 'Goes Proton flux (>60 MeV)',
       'DateTime_fism2', '225_00', '600_01', '1300_02', '2500_03', '5100_04',
       '11250_05', '18950_06', '25700_07', '30500_08', '43000_09', '59500_10',
       '72400_11', '72400_12', '85550_13', '85550_14', '85550_15', '94400_16',
       '94400_17', '94400_18', '98100_19', '100700_20', '103850_21',
       '113000_22', 'DateTime_si', 'F10', 'F81', 'S10', 'S81c', 'M10', 'M81c',
       'Y10', 'Y81c', 'DateTime_gr', 'CenterLat', 'SatLat', 'SatLon',
       'SatHeight', 'SatLT', 'SatDipoleLat', 'SatMagLon', 'SatMagLT',
       'SatDensity', '400kmDensity', '410kmDensity', 'NRLMSISe00atSat',
       'DenUncertainty', 'NumPts', '

In [5]:
# storm/quiet data 
st_t = (df['storm']>0.95) & (df['storm']<1.05) # storm times
qt_t = (df['storm']<0) # quiet times
mn_p = (df['storm phase']>0.95) & (df['storm phase']<1.05) # main phase
rc_p = (df['storm phase']>1.95) & (df['storm phase']<2.05) # recovery phase


In [6]:
qt_t.value_counts()
st_t.value_counts()

False    570564
True     472573
Name: storm, dtype: int64

True     570564
False    472573
Name: storm, dtype: int64

In [7]:
cor_plot = pd.DataFrame()
cor_mat = df[col].dropna().corr()
cor_plot['All'] = df[col].dropna().corr()['400kmDensity']
cor_plot['Quiet'] = df[qt_t][col].dropna().corr()['400kmDensity']
cor_plot['Storm'] = df[st_t][col].dropna().corr()['400kmDensity']
cor_plot['Main'] = df[mn_p][col].dropna().corr()['400kmDensity']
cor_plot['Recovery'] = df[rc_p][col].dropna().corr()['400kmDensity']
try:
    cor_plot['Balanced']= pd.concat([df[st_t][col].dropna().sample(100000),df[qt_t][col].dropna().sample(100000)]).corr()['400kmDensity']
except:
    cor_plot['Balanced']= pd.concat([df[st_t][col].dropna().sample(10000),df[qt_t][col].dropna().sample(10000)]).corr()['400kmDensity']

In [12]:
cor_plot = pd.DataFrame()
cor_mat = df[col].fillna(method='ffill').corr()
cor_plot['All'] = df[col].fillna(method='ffill').corr()['400kmDensity']
cor_plot['Quiet'] = df[qt_t][col].fillna(method='ffill').corr()['400kmDensity']
cor_plot['Storm'] = df[st_t][col].fillna(method='ffill').corr()['400kmDensity']
cor_plot['Main'] = df[mn_p][col].fillna(method='ffill').corr()['400kmDensity']
cor_plot['Recovery'] = df[rc_p][col].fillna(method='ffill').corr()['400kmDensity']
try:
    cor_plot['Balanced']= pd.concat([df[st_t][col].fillna(method='ffill').sample(100000),df[qt_t][col].fillna(method='ffill').sample(100000)]).corr()['400kmDensity']
except:
    cor_plot['Balanced']= pd.concat([df[st_t][col].fillna(method='ffill').sample(10000),df[qt_t][col].fillna(method='ffill').sample(10000)]).corr()['400kmDensity']

In [28]:
plt.figure(figsize=(3, 3))
sns.heatmap(cor_plot[0:-1].abs(),annot=True, fmt='.2f', cbar_kws={'label':'Abs Correlation - abs(r)'})
plt.yticks(rotation=30) 
plt.show()

<Figure size 300x300 with 0 Axes>

<Axes: >

(array([0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]),
 [Text(0, 0.5, 'F10'),
  Text(0, 1.5, 'F81'),
  Text(0, 2.5, 'S10'),
  Text(0, 3.5, 'S81c'),
  Text(0, 4.5, 'M10'),
  Text(0, 5.5, 'M81c'),
  Text(0, 6.5, 'Y10'),
  Text(0, 7.5, 'Y81c')])

In [20]:
col.append('DateTime')


In [24]:
df[col].head()


Unnamed: 0,F10,F81,S10,S81c,M10,M81c,Y10,Y81c,400kmDensity,DateTime,DateTime.1
0,208.5,174.6,172.0,166.9,197.0,168.8,179.6,169.7,4.991472e-12,2002-08-01 00:00:00,2002-08-01 00:00:00
1,192.6,175.6,166.9,167.3,182.8,169.2,175.7,169.9,4.958553e-12,2002-08-01 00:05:00,2002-08-01 00:05:00
2,192.6,175.6,166.9,167.3,182.8,169.2,175.7,169.9,4.815389e-12,2002-08-01 00:10:00,2002-08-01 00:10:00
3,192.6,175.6,166.9,167.3,182.8,169.2,175.7,169.9,4.967782e-12,2002-08-01 00:15:00,2002-08-01 00:15:00
4,192.6,175.6,166.9,167.3,182.8,169.2,175.7,169.9,4.557778e-12,2002-08-01 00:20:00,2002-08-01 00:20:00
