# Proposal

This notebook is used to generate the figures shown in the proposal outline.

In [1]:
%matplotlib widget
import datetime as dt
import json

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.units as munits
import numpy as np
import pandas as pd
from pyts.image import RecurrencePlot

from sklearn.preprocessing import StandardScaler

converter = mdates.ConciseDateConverter()
munits.registry[np.datetime64] = converter
munits.registry[dt.date] = converter
munits.registry[dt.datetime] = converter

In [5]:
with open('ICME_WP4_V10.json', 'r') as fobj:
    json_data = json.load(fobj)
    
df = pd.DataFrame(json_data['data'], columns=json_data['columns'])

In [6]:
df.head()

Unnamed: 0,ICMECAT_ID,SC_INSITU,ICME_START_TIME,MO_START_TIME,MO_END_TIME,ICME_END_TIME,MO_BMAX,MO_BMEAN,MO_BSTD,MO_BZMEAN,...,MO_SPEED,MO_SPEED_STD,SHEATH_DENSITY,SHEATH_DENSITY_STD,MO_DENSITY,MO_DENSITY_STD,SHEATH_TEMPERATURE,SHEATH_TEMPERATURE_STD,MO_TEMPERATURE,MO_TEMPERATURE_STD
0,ICME_Wind_NASA_20070114_01,Wind,2007-01-14T11:31Z,2007-01-14T11:44Z,2007-01-15T07:45Z,2007-01-15T07:45Z,14.6,11.4,2.5,1.5,...,362.1,15.4,1.1,0.1,5.2,4.2,18881.1,3702.5,15654.3,9624.8
1,ICME_STEREO_A_JIAN_20070114_01,STEREO-A,2007-01-14T12:11Z,2007-01-14T14:39Z,2007-01-15T07:34Z,9999-99-99T99:99Z,14.9,12.4,1.8,1.0,...,,,,,,,,,,
2,ICME_STEREO_B_JIAN_20070114_01,STEREO-B,2007-01-14T12:24Z,2007-01-14T13:59Z,2007-01-15T07:37Z,9999-99-99T99:99Z,14.7,12.0,2.2,1.2,...,,,,,,,,,,
3,ICME_Wind_NASA_20070115_01,Wind,2007-01-15T20:49Z,2007-01-15T20:49Z,2007-01-16T04:45Z,2007-01-16T06:14Z,10.4,8.0,1.1,0.5,...,531.1,28.6,,,4.1,1.4,,,97330.3,37677.8
4,ICME_Wind_NASA_20070202_01,Wind,2007-02-02T04:04Z,2007-02-02T04:04Z,2007-02-02T14:35Z,2007-02-02T14:35Z,4.1,3.5,0.3,-0.7,...,502.0,15.2,,,2.6,0.3,,,56223.5,19545.4


In [7]:
stereo_a_df = df[df['SC_INSITU'].str.contains('STEREO-A')]
stereo_b_df = df[df['SC_INSITU'].str.contains('STEREO-B')]

In [8]:
stereo_a_icme_times = stereo_a_df.loc[:,['ICME_START_TIME','ICME_END_TIME']]
stereo_b_icme_times = stereo_b_df.loc[:,['ICME_START_TIME','ICME_END_TIME']]

In [40]:
stereo_a_icme_times['ICME_START_TIME'].min(), stereo_a_icme_times['ICME_START_TIME'].max()

('2007-01-14T12:11Z', '2014-08-11T09:03Z')

In [9]:
stereo_a_icme_times.head(10)

Unnamed: 0,ICME_START_TIME,ICME_END_TIME
1,2007-01-14T12:11Z,9999-99-99T99:99Z
13,2007-05-22T14:00Z,9999-99-99T99:99Z
24,2007-08-25T20:30Z,9999-99-99T99:99Z
31,2007-11-19T22:00Z,9999-99-99T99:99Z
39,2008-03-21T06:40Z,9999-99-99T99:99Z
42,2008-05-11T06:31Z,9999-99-99T99:99Z
46,2008-07-05T00:48Z,9999-99-99T99:99Z
50,2008-09-04T05:25Z,9999-99-99T99:99Z
54,2008-10-31T12:10Z,9999-99-99T99:99Z
55,2008-11-28T21:49Z,9999-99-99T99:99Z


In [10]:
stereo_b_icme_times

Unnamed: 0,ICME_START_TIME,ICME_END_TIME
2,2007-01-14T12:24Z,9999-99-99T99:99Z
12,2007-05-22T04:20Z,9999-99-99T99:99Z
28,2007-10-23T10:35Z,9999-99-99T99:99Z
32,2007-11-19T22:50Z,9999-99-99T99:99Z
35,2007-12-30T02:00Z,9999-99-99T99:99Z
...,...,...
628,2014-07-01T12:51Z,9999-99-99T99:99Z
634,2014-08-11T06:20Z,9999-99-99T99:99Z
638,2014-08-31T17:45Z,9999-99-99T99:99Z
641,2014-09-03T07:45Z,9999-99-99T99:99Z


In [11]:
df = pd.read_csv('/Users/ndmiles/Downloads/sta_mag_rtn_20070501T000000_20070531T000000_1m/sta_mag_rtn_20070501T000000_20070531T000000_1m.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [10]:
df.describe()

Unnamed: 0,BXSC A,BYSC A,BZSC A,BTSC A,BR A,BT A,BN A,BT A.1,X HAE A,Y HAE A,...,X CARR A,Y CARR A,Z CARR A,X RTN A,Y RTN A,Z RTN A,R A,Cone Ang,Clock Ang,Mag Pr
count,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,...,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0,43178.0
mean,0.048433,-0.29697,0.477378,4.702498,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,...,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,6.484784e+30,12.085564
std,3.351739,2.696572,3.311672,2.874207,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,...,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,2.545727e+32,17.826518
min,-15.862429,-17.48638,-16.66185,0.552341,-13.10139,-17.50967,-18.0778,0.5543972,-0.6860832,-0.9300005,...,-0.9595569,-0.9595431,-0.0628584,0.9586833,0.0,0.0,0.9586833,1.4432,-199.8586,0.121388
25%,-1.950538,-1.54462,-1.318519,2.86667,-2.194853,-1.294554,-1.418367,2.873092,-0.5871405,-0.8898689,...,-0.624405,-0.7505797,-0.04779595,0.9593207,0.0,0.0,0.9593207,57.43338,-69.99277,3.269754
50%,0.393088,-0.258655,0.894601,3.72132,-0.3909465,0.9659299,-0.1832184,3.732408,-0.4769516,-0.8329786,...,0.1449608,-0.1427851,-0.03180355,0.9600161,-0.0,0.0,0.9600161,96.58732,59.26415,5.510027
75%,2.195597,1.022887,2.561585,5.428199,1.953129,2.61739,1.062887,5.449364,-0.3580508,-0.760267,...,0.6700316,0.6273228,-0.01525048,0.9607598,0.0,0.0,0.9607598,125.4607,108.7137,11.723877
max,13.103516,17.408211,13.952129,20.147492,1e+34,1e+34,1e+34,1e+34,1e+34,1e+34,...,1e+34,1e+34,1e+34,1e+34,1e+34,1e+34,1e+34,1e+34,1e+34,161.510849


In [12]:
filtered_df = df[(df['BR A'].lt(1e31)) & (df['Np'].astype(float).lt(1e31))]

In [13]:
filtered_df.index = pd.DatetimeIndex(filtered_df['Time'])

In [14]:
import numpy as np

In [15]:
filtered_df['B_mag'] = np.sqrt(
    filtered_df['BR A']**2 + filtered_df['BT A']**2 + filtered_df['BN A']**2
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [16]:
filtered_df.plot(y=['BR A', 'BT A', 'BN A', 'B_mag'])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<AxesSubplot:xlabel='Time'>

In [30]:
scaled_Br = StandardScaler().fit_transform(filtered_df['BT A'].values.reshape(-1,1)).flatten()

In [31]:
fig, ax = plt.subplots(nrows=1, ncols=1)
ax.plot(filtered_df.index, scaled_Br)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7fb1bac4d5d0>]

In [26]:
scaled_Br

array([[-0.66916913],
       [-0.64522373],
       [-0.6625548 ],
       ...,
       [ 0.07389261],
       [ 0.05465961],
       [ 0.11437966]])

In [17]:
delta = dt.timedelta(days=5)
icme_cut = slice(
    dt.datetime(2007,5,25) - delta, 
    dt.datetime(2007,5,25) + delta
)
sir_cut = slice(
    dt.datetime(2007, 5, 8) - delta,
    dt.datetime(2007, 5, 8) + delta
)
columns_to_plot = ['BR A', 'BT A', 'BN A', 'B_mag', 'Np', 'Vp', 'Tp']

In [18]:
for col in columns_to_plot:
    filtered_df[col] = filtered_df[col].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [34]:
def make_recurrence_plots(filtered_df, cut, cols):
    fig, ax = plt.subplots(nrows=len(cols), ncols=2, sharex='col', figsize=(6,10))
    rp = RecurrencePlot(threshold='point', percentage=20)
    resampled_df = filtered_df.resample('5min').mean()
    resampled_df = resampled_df.dropna()
    for i,col in enumerate(cols):
        data = resampled_df[cut]
        data_scaled = StandardScaler().fit_transform(data[col].values.reshape(-1,1)).flatten()

        ax[i, 0].plot(data[col].index, data_scaled, lw=0.7)
        ax[i, 0].set_ylabel(col)
        ax[i, 0].yaxis.set_major_locator(plt.MaxNLocator(5))
        data_rp = rp.fit_transform(data_scaled.reshape(1,-1))
        ax[i, 1].imshow(data_rp[0], cmap='binary', origin='lower', aspect='equal')
        ax[i, 1].tick_params(axis='both', which='both', bottom=False, left=False,labelbottom=False, labelleft=False)
#         if col == 'Tp':
#             ax[i, 0].set_yscale('log')

    
    ax[0,0].set_title('Time Series')
    ax[0,1].set_title('Image Representation')
        
    return fig, ax

In [35]:
icme_fig, icme_ax = make_recurrence_plots(filtered_df, icme_cut, columns_to_plot)
icme_fig.suptitle(t='STEREO-A ICME', x=0.5, y=0.95)
# icme_fig.savefig('icme_example.jpg',format='jpg', dpi=250, bbox_inches='tight')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0.95, 'STEREO-A ICME')

In [77]:
sir_fig, sir_ax = make_recurrence_plots(filtered_df, sir_cut, columns_to_plot)
sir_fig.suptitle(t='STEREO-A SIR', x=0.5, y=0.95)
sir_fig.savefig('example_sir.jpg', format='jpg', dpi=250, bbox_inches='tight')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …