In [1]:
import sys 
import os 
sys.path.insert(1, '..')
sys.path.insert(2, '../modules/')

import pickle
import numpy as np
import pandas as pd


from sunpy.net import hek
from sunpy.time import parse_time

from ruffus import *
import re
from datetime import timedelta

import dataconfig
from modules import convert_datetime

In [2]:
WORKING_DIR = dataconfig.DATA_DIR_HEK_FLARES

tw = lambda x: os.path.join(WORKING_DIR, x)

# create initial time pairs for running through the hek AR query by month
time_array = []

x = pd.date_range(start='4/01/2010', end='11/30/2020', freq = 'M')

# x starts with the end date of april such that for every starting point we need to add an extra day such that we begin in the same month as the end month. 
for previous, current in zip(x, x[1:]):
    j = ([previous + timedelta(days = 1), current ])
    my_dict = {'name': f'{previous.year}_{previous.month}_flare_hek',
    			'start': j[0], 
    			'end':j[1]}
    time_array.append(my_dict)

In [3]:
time_array[0]

{'name': '2010_4_flare_hek',
 'start': Timestamp('2010-05-01 00:00:00', freq='M'),
 'end': Timestamp('2010-05-31 00:00:00', freq='M')}

In [4]:
start_file = time_array[0]
start_file


{'name': '2010_4_flare_hek',
 'start': Timestamp('2010-05-01 00:00:00', freq='M'),
 'end': Timestamp('2010-05-31 00:00:00', freq='M')}

In [5]:

client = hek.HEKClient()

event_type = 'FL'

tstart = start_file['start']

tend = start_file['end']

print(tstart, tend)



AR_results = client.search(hek.attrs.Time(tstart, tend),
							hek.attrs.EventType(event_type))
result = AR_results.to_pandas()


2010-05-01 00:00:00 2010-05-31 00:00:00


In [6]:
for column in result.columns:
    print(column)

gs_thumburl
comment_count
hpc_bbox
frm_humanflag
hgc_coord
event_coordsys
obs_levelnum
hpc_coord
event_npixels
gs_imageurl
ar_polarity
frm_paramset
hrc_coord
event_starttime
ar_mtwilsoncls
event_type
intensmin
fl_fluence
obs_meanwavel
frm_url
skel_chaincode
bound_chaincode
noposition
fl_fluenceunit
active
intensmax
frm_versionnumber
fl_peaktempunit
fl_halphaclass
area_uncert
obs_dataprepurl
hpc_geom
hgc_bbox
intensmedian
chaincodetype
obs_channelid
event_clippedspatial
ar_noaaclass
SOL_standard
event_avg_rating
eventtype
intensunit
hpc_boundcc
event_mapurl
frm_contact
ar_penumbracls
intensmean
bound_ccstartc1
frm_name
area_atdiskcenter
frm_identifier
obs_observatory
event_description
boundbox_c2ur
obs_firstprocessingdate
boundbox_c2ll
frm_institute
hrc_bbox
refs_orig
ar_mcintoshcls
event_maskurl
bound_ccstartc2
gs_movieurl
event_score
skel_startc2
skel_startc1
fl_efoldtime
event_expires
fl_efoldtimeunit
hrc_boundcc
event_probability
intensvar
frm_daterun
event_coordunit
hpc_y
hpc_x
sea

In [7]:

goes_results = AR_results

goes_event_list = []

for r in goes_results:
	try:
		hgc_poly_string = r['hgc_bbox']
		sep_coord_from_string_hgc = re.split('[(-)]+', hgc_poly_string)
		sep_coord_from_string_hgc = re.split('[(-)]+', hgc_poly_string)
		only_coord_string_list_hgc = sep_coord_from_string_hgc[1].split(',')
		this_coord_array_hgc = []
		for coord_pair_string_hgc in only_coord_string_list_hgc:
			numerical_coord_pairs_hgc = [np.float(coord_pair_string_hgc.split(' ')[0]), np.float(coord_pair_string_hgc.split(' ')[1])]
			this_coord_array_hgc.append(numerical_coord_pairs_hgc)
	except:
		this_coord_array_hgc = [0]

	try:
		hpc_poly_string = r['hpc_bbox']
		sep_coord_from_string_hpc = re.split('[(-)]+', hpc_poly_string)
		sep_coord_from_string_hpc = re.split('[(-)]+', hpc_poly_string)
		only_coord_string_list_hpc = sep_coord_from_string_hpc[1].split(',')
		this_coord_array_hpc = []
		for coord_pair_string_hpc in only_coord_string_list_hpc:
			numerical_coord_pairs_hpc = [np.float(coord_pair_string_hpc.split(' ')[0]), np.float(coord_pair_string_hpc.split(' ')[1])]
			this_coord_array_hpc.append(numerical_coord_pairs_hpc)
	except:
		this_coord_array_hpc = [0]


	try:
		hgs_poly_string = r['hgs_bbox']
		sep_coord_from_string_hgs = re.split('[(-)]+', hgs_poly_string)
		sep_coord_from_string_hgs = re.split('[(-)]+', hgs_poly_string)
		only_coord_string_list_hgs = sep_coord_from_string_hgs[1].split(',')
		this_coord_array_hgs = []
		for coord_pair_string_hgs in only_coord_string_list_hgs:
			numerical_coord_pairs_hgs = [np.float(coord_pair_string_hgs.split(' ')[0]), np.float(coord_pair_string_hgs.split(' ')[1])]
			this_coord_array_hgs.append(numerical_coord_pairs_hgs)
	except:
		this_coord_array_hgs = [0]
		
	if r['noposition'] == 'true':
		flare_loc_bool = 1
	else:
		flare_loc_bool = 0

	goes_class = str(r['fl_goescls'])

	try:
		# will fail if not a number
		goes_number = np.float(goes_class[1:])
	except:
		goes_number = np.nan

	goes_event = {
		'event_date': parse_time(r['event_starttime']).strftime(
			'%Y-%m-%d'),
		'start_time': convert_datetime.astropytime_to_pythondatetime(parse_time(r['event_starttime'])),
		'peak_time': convert_datetime.astropytime_to_pythondatetime(parse_time(r['event_peaktime'])),
		'end_time': convert_datetime.astropytime_to_pythondatetime(parse_time(r['event_endtime'])),
		'goes_class': goes_class,
		'goes_letter': goes_class[:1],
		'goes_number': goes_number,
		'AR_num': r['ar_noaanum'],
		'hgs_x': r['hgs_x'],
		'hgs_y': r['hgs_y'],
		'hgs_bbox_poly': this_coord_array_hgs,
		'hgc_x': r['hgc_x'],
		'hgc_y': r['hgc_y'],
		'hgc_bbox_poly': this_coord_array_hgc,
		'hpc_x': r['hpc_x'],
		'hpc_y': r['hpc_y'],
		'hpc_bbox_poly': this_coord_array_hpc,
		'event_type': r['event_type'],
		'telescope_used': r['obs_observatory'],
		'id_institute': r['frm_institute'],
		'id_team': r['frm_identifier'],
		'search_instrument': r['search_instrument'],
		'search_channel': r['search_channelid'],
		'noposition': flare_loc_bool 
		}
	goes_event_list.append(goes_event)

df = pd.DataFrame(goes_event_list)




In [8]:
df

Unnamed: 0,event_date,start_time,peak_time,end_time,goes_class,goes_letter,goes_number,AR_num,hgs_x,hgs_y,...,hpc_x,hpc_y,hpc_bbox_poly,event_type,telescope_used,id_institute,id_team,search_instrument,search_channel,noposition
0,2010-05-01,2010-05-01 01:24:17+00:00,2010-05-01 01:36:40+00:00,2010-05-01 03:07:25+00:00,,,,,-87.711603,25.437367,...,-873.0000,417.0000,"[[-945.0, 351.0], [-801.0, 351.0], [-801.0, 48...",FL,SDO,LMSAL,Annotator,AIA,171,0
1,2010-05-01,2010-05-01 01:34:00+00:00,2010-05-01 01:39:00+00:00,2010-05-01 01:43:00+00:00,C5.7,C,5.7,0.0,-81.000000,24.000000,...,-859.9380,396.5484,"[[16.69878, 86.4576], [16.69878, 86.4576], [16...",FL,SDO,LMSAL,SolarSoft,AIA,131171193211304,0
2,2010-05-01,2010-05-01 01:34:00+00:00,2010-05-01 01:39:00+00:00,2010-05-01 01:43:00+00:00,C5.7,C,5.7,11067.0,-73.000000,23.000000,...,-838.8540,390.0636,"[[-844.122, 381.936], [-839.688, 383.0358], [-...",FL,GOES,"U.S. Dept. of Commerce, NOAA, Space Weather Pr...",SWPC,GOES,XRA,0
3,2010-05-01,2010-05-01 05:23:00+00:00,2010-05-01 05:27:00+00:00,2010-05-01 05:31:00+00:00,B1.6,B,1.6,11064.0,0.000000,0.000000,...,0.0000,69.5046,"[[8.3e-05, -949.644], [-8.3e-05, -949.644], [0...",FL,GOES,"U.S. Dept. of Commerce, NOAA, Space Weather Pr...",SWPC,GOES,XRA,1
4,2010-05-01,2010-05-01 05:23:00+00:00,2010-05-01 05:27:00+00:00,2010-05-01 05:31:00+00:00,B1.6,B,1.6,0.0,-75.000000,22.000000,...,-853.9020,372.8568,"[[16.69812, 86.1888], [16.69812, 86.1888], [16...",FL,SDO,LMSAL,SolarSoft,AIA,131171193211304,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,2010-05-24,2010-05-24 13:05:00+00:00,2010-05-24 14:46:00+00:00,2010-05-24 15:50:00+00:00,B1.1,B,1.1,0.0,0.000000,0.000000,...,0.0000,26.2827,"[[8.3e-05, -946.722], [-8.3e-05, -946.722], [0...",FL,GOES,"U.S. Dept. of Commerce, NOAA, Space Weather Pr...",SWPC,GOES,XRA,1
133,2010-05-25,2010-05-25 15:46:00+00:00,2010-05-25 15:51:00+00:00,2010-05-25 15:51:00+00:00,B6.5,B,6.5,0.0,33.000000,-15.000000,...,500.3148,-226.5516,"[[16.60914, 40.70628], [16.60914, 40.70628], [...",FL,SDO,LMSAL,SolarSoft,AIA,131171193211304,0
134,2010-05-25,2010-05-25 15:46:00+00:00,2010-05-25 15:51:00+00:00,2010-05-25 15:55:00+00:00,B6.5,B,6.5,11072.0,32.000000,-16.000000,...,484.1100,-242.2518,"[[476.124, -250.158], [489.591, -250.3614], [4...",FL,GOES,"U.S. Dept. of Commerce, NOAA, Space Weather Pr...",SWPC,GOES,XRA,0
135,2010-05-27,2010-05-27 23:00:00+00:00,2010-05-27 23:15:00+00:00,2010-05-27 23:25:00+00:00,B1.4,B,1.4,0.0,-60.000000,13.000000,...,-800.9460,222.9756,"[[16.60254, 36.14178], [16.60254, 36.14178], [...",FL,SDO,LMSAL,SolarSoft,AIA,131171193211304,0
