# Data Exploration & Preparation

This notebook is used to explore the in-situ data for the entire list of STEREO A and B ICMEs

In [1]:
%matplotlib widget

from collections import defaultdict
import datetime as dt
import json

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.units as munits
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

converter = mdates.ConciseDateConverter()
munits.registry[np.datetime64] = converter
munits.registry[dt.date] = converter
munits.registry[dt.datetime] = converter

### Parse the full helcats ICME list and extract all of the stereo A and B ICMEs

In [2]:
with open('ICME_WP4_V10.json', 'r') as fobj:
    json_data = json.load(fobj)
    
df = pd.DataFrame(json_data['data'], columns=json_data['columns'])

sta_icme_df = df[df['SC_INSITU'].str.contains('STEREO-A')]
stb_icme_df = df[df['SC_INSITU'].str.contains('STEREO-B')]

sta_icme_df.index = pd.DatetimeIndex(sta_icme_df.loc[:,'ICME_START_TIME'])
stb_icme_df.index = pd.DatetimeIndex(stb_icme_df.loc[:,'ICME_START_TIME'])

In [3]:
with open('./data/sta_l2_magplasma.txt', 'r') as fobj:
    lines = fobj.readlines()

In [4]:
colnames = lines[0].split()

In [5]:
colnames

['EPOCH',
 'BTOTAL',
 'R_RTN',
 'NP',
 'SPEED',
 'TEMPERATURE',
 'THERMAL_SPEED',
 'VP_RTN',
 'BETA',
 'TOTAL_PRESSURE',
 'MAGNETIC_PRESSURE',
 'DYNANMIC_PRESSURE',
 'BX(RTN)',
 'BY(RTN)',
 'BZ(RTN)']

In [6]:
tmp = lines[1].split()
units = []
units.append(' '.join(tmp[:2]))
units += tmp[2:]

In [7]:
for col, unit in zip(colnames, units):
    print(col, unit)

EPOCH dd-mm-yyyy hh:mm:ss.ms
BTOTAL nT
R_RTN AU
NP 1/cm3
SPEED km/s
TEMPERATURE deg_K
THERMAL_SPEED km/s
VP_RTN km/sec
BETA Na
TOTAL_PRESSURE pPa
MAGNETIC_PRESSURE pPa
DYNANMIC_PRESSURE nPa
BX(RTN) nT
BY(RTN) nT
BZ(RTN) nT


In [10]:
data = []
for line in lines[2:]:
    lsplit = line.split()
    tmp = [' '.join(lsplit[:2])]
    tmp[1:] = list(map(float, lsplit[2:]))
    data.append(tmp)

In [11]:
sta_data_df = pd.DataFrame(data, columns=colnames)

In [12]:
sta_data_df.head()

Unnamed: 0,EPOCH,BTOTAL,R_RTN,NP,SPEED,TEMPERATURE,THERMAL_SPEED,VP_RTN,BETA,TOTAL_PRESSURE,MAGNETIC_PRESSURE,DYNANMIC_PRESSURE,BX(RTN),BY(RTN),BZ(RTN)
0,01-01-2007 00:00:00.000,3.18024,0.975637,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,3.98894,-1e+30,1.2635,-2.76573,0.845519
1,01-01-2007 00:01:00.000,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,3.50714,-1e+30,-1e+30,-1e+30,-1e+30
2,01-01-2007 00:00:00.000,2.89912,0.975637,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,3.3164,-1e+30,0.939458,-2.61098,0.763034
3,01-01-2007 00:01:00.000,3.06653,0.975637,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,3.71351,-1e+30,0.623079,-2.85861,0.760157
4,01-01-2007 00:02:00.000,3.2202,0.975637,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,-1e+30,4.10237,-1e+30,0.0839765,-3.06051,0.899706


In [13]:
sta_data_df.index = pd.DatetimeIndex(sta_data_df['EPOCH'])

In [17]:
sta_data_df[sta_data_df['BTOTAL'].gt(-1e30)].sort_index().rolling('20D', center=True).mean().plot(y='BTOTAL')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='EPOCH'>