In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
from collections import Counter
import math
import datetime as datetime
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import statistics
import glob
import ipyplot
import numpy as np
import random 


In [None]:
filename = 'data/fulldata_20160101_to_20191231.csv'
data = pd.read_csv(filename)
data['datetime_str'] = data.loc[:,'TIME'].apply(datetime.datetime.utcfromtimestamp)

In [None]:
df = pd.DataFrame(data=data.loc[:,['GSM_X','GSM_Y','GSM_Z','BETA','DIST','BX_GSM','BY_GSM','BZ_GSM', 'STORM_PHASE','O_V', 'O_VPAR','O_VPERP', 'O_N', 'O_P','H_N','SW_P_PARA', 'SW_V_PARA', 'KP','DST','datetime_str','IMF_BZ']])

df['year'] = df['datetime_str'].dt.to_period('Y')
df['storm'] = df['STORM_PHASE'] > 0
df['kp_gt_2'] = df['KP'] > 2 
df['storm_phase'] = pd.Categorical(df['STORM_PHASE']).rename_categories({0:'nonstorm',1:'prestorm',2:'main phase',3:'fast recovery', 4:'long recovery'})

df.loc[df['BETA'] < 0.05,'region'] = 'lobe'
df.loc[(df['BETA'] < 1) & (df['BETA'] >= 0.05),'region'] = 'bl'
df.loc[df['BETA'] >= 1,'region'] = 'ps'

df['compression_mode'] = (df['datetime_str'] < pd.Timestamp('2019-4-16')) | (df['datetime_str'] > pd.Timestamp('2019-8-17'))

df['start_time'] = (((df['datetime_str'].dt.hour/4).apply(int)))*4
df['end_time'] = df['start_time'] + 4
df['start_time_dt'] = df['datetime_str'].apply(datetime.datetime.combine,time=datetime.time.min) + df['start_time'].apply(pd.Timedelta,unit="h")
df['end_time_dt'] = df['datetime_str'].apply(datetime.datetime.combine,time=datetime.time.min) + df['end_time'].apply(pd.Timedelta,unit="h")

df['o_beam_filepath'] = 'idl_plots/plots_with_subtraction/obeam_day/'+df['start_time_dt'].apply(pd.Timestamp.strftime,format='%Y') +'/o_beam' + df['start_time_dt'].apply(pd.Timestamp.strftime,format='%Y%m%d_%H%M%S') +'_to_' + df['end_time_dt'].apply(pd.Timestamp.strftime,format='%Y%m%d_%H%M%S') + '_plasma_condition_short.png'

index = (df['DIST'] >= 7) & (df['DIST'] < 9)
df.loc[index,'dist_region'] = 'near'
index = df['DIST'] >= 9
df.loc[index,'dist_region'] = 'tail'

index = ((df['GSM_X'] > -1) & (df['GSM_Z'] < 0)) | ((df['GSM_X'] < -1) & (df['BX_GSM'] < 0))
df.loc[index,'hemi'] = 'south'
index = ((df['GSM_X'] > -1) & (df['GSM_Z'] > 0)) | ((df['GSM_X'] < -1) & (df['BX_GSM'] > 0))
df.loc[index,'hemi'] = 'north'

df.loc[:, 'flag'] = 0
index = ((df['hemi'] == 'south') & (data['FLAG_PARA'] == 1))
df.loc[index, 'flag'] = 1
df.loc[index, 'flux'] = data['FLUX_PARA']
df.loc[index, 'energy'] = data['EN_PARA']
df.loc[index, 'eflux'] = data['EFLUX_PARA']
df.loc[index, 'imfBy'] = data['IMF_BY_PARA']
df.loc[index, 'imfBz'] = data['IMF_BZ_PARA']

index = ((df['hemi'] == 'north') & (data['FLAG_ANTI'] == 1))
df.loc[index, 'flag'] = -1
df.loc[index, 'flux'] = data['FLUX_ANTI']
df.loc[index, 'energy'] = data['EN_ANTI']
df.loc[index, 'eflux'] = data['EFLUX_ANTI']
df.loc[index, 'imfBy'] = data['IMF_BY_ANTI']
df.loc[index, 'imfBz'] = data['IMF_BZ_ANTI']

df['energy_int'] = round(df['energy'])

df = df.sort_values(by=['datetime_str'])

In [None]:
# property_to_plot = 'dist'

index = (df['eflux'] > 0) & (df['GSM_X'] < -1)  & (df['compression_mode']) & (df['year'] == 2017) & (df['region'] == 'bl') & (df['energy_int'] < 40)

fig = make_subplots(rows=2, cols=2, subplot_titles=("GSM_X", "GSM_Y", "GSM_Z", "DIST"))

fig.add_trace(go.Histogram(x=df.loc[index,'GSM_X']), row=1, col=1)
fig.add_trace(go.Histogram(x=df.loc[index,'GSM_Y']), row=1, col=2)
fig.add_trace(go.Histogram(x=df.loc[index,'GSM_Z']), row=2, col=1)
fig.add_trace(go.Histogram(x=df.loc[index,'DIST']), row=2, col=2)

fig.update_layout(showlegend=False, title='low energy beam')

fig.show()

In [None]:
# index_lobe = (df['energy_int'] < 40000) & (df['energy_int'] > 0) & (df['eflux'] > 0) & (df['GSM_X'] < -1) & (df['dist'] > 7) & (df['dist'] < 9) & (df['compression_mode']) & (df['year'] == iyear) & (df['beta'] < 0.01)
random.seed(42)   
to_plot_list = list()
n_samples = 5
index_list = list()
for iyear in ['2016','2017','2018','2019','2020']:
    for iregion in ['lobe', 'bl','ps']:
        for idistant in ['near','tail']:
            index = df.loc[(df['eflux'] > 0) & (df['GSM_X'] < -1)  & (df['compression_mode']) & (df['year'] == iyear) & (df['region'] == iregion) & (df['dist_region'] == idistant),:].index
#             print(iyear, iregion, idistant,len(new_list))
            
            if len(index) > n_samples:
                random_index_list = random.sample(list(index),n_samples)
            else:
                random_index_list = index
            
            index_list.extend(random_index_list)

# pd.DataFrame(df.loc[index_list, :]).to_csv('output/test_dataset_individual.csv')

to_plot_list = list(df.loc[index_list, 'o_beam_filepath'])

unique_to_plot_list = np.unique(to_plot_list)
pd.DataFrame(unique_to_plot_list).to_csv('output/test_plot_list.csv')

print(len(unique_to_plot_list))

In [None]:
index_test = (df['GSM_X'] < -1) & (df['compression_mode']) & (df['o_beam_filepath'].apply(lambda x: x in unique_to_plot_list))

dataset_test = df.loc[index_test,:].reindex()
print(dataset_test.shape)
dataset_test.loc[:,['datetime_str','flag', 'flux', 'dist', 'GSM_X', 'GSM_Y', 'GSM_Z', 'BETA', 'o_beam_filepath','hemi','BX_GSM']].to_csv('output/test_dataset.csv')
print(dataset_test.shape[0]/df.loc[(df['GSM_X'] < -1) & (df['compression_mode']),:].shape[0])

In [None]:
unique_to_plot_list = list(unique_to_plot_list[0:30])
unique_to_plot_list_ori = [sub.replace('plots_with_subtraction', 'plots_original') for sub in unique_to_plot_list] 


In [None]:
for i in range(len(unique_to_plot_list)):
    ipyplot.plot_images([unique_to_plot_list_ori[i],unique_to_plot_list[i]] , img_width=400)
