In [1]:
import pytz
import os
from datetime import datetime, timedelta
import pandas as pd
from pandas.tseries.offsets import MonthBegin
import urllib2
import cPickle
from sunpy.time import TimeRange
from sunpy.instr import goes
from sunpy.net import hek
client = hek.HEKClient()

In [11]:
date_values = pd.date_range(pd.Timestamp('2015-03-01').tz_localize('utc'), 
                          pd.Timestamp('2015-04-01').tz_localize('utc'),  freq='MS')

In [10]:
directory = 'AR-FL_2/'
if not os.path.exists(directory):
    os.makedirs(directory)

In [13]:
def get_goes_data(time_range):
    success_data = False
    while not success_data:
        try:
            goes_data = goes.get_goes_event_list(time_range)
            success_data = True
        except urllib2.HTTPError as err:
            pass
    return goes_data

def query_hek_goes(tstart, tend):
    import urllib2
    success_data = False
    while not success_data:
        try:
            result_data = client.query(hek.attrs.Time(tstart,tend),
                                       hek.attrs.EventType('FL'),
                                       hek.attrs.OBS.Observatory == 'GOES')
            success_data = True
        except (urllib2.HTTPError, urllib2.URLError):
            pass
    return result_data

def query_hek(tstart, tend, event_type):
    import urllib2
    success_data = False
    while not success_data:
        try:
            result_data = client.query(hek.attrs.Time(tstart,tend),hek.attrs.EventType(event_type))
            success_data = True
        except (urllib2.HTTPError, urllib2.URLError):
            pass
    return result_data

# Group all data from GOES

In [None]:
%%time
for d in xrange(len(date_values) - 1):
    # Get FL data from GOES
    goes_data_query = query_hek_goes(date_values[d], date_values[d+1])
    goes_data_query = pd.DataFrame.from_dict(goes_data_query)
    goes_data = goes_data_query[[
            'ar_noaanum',
            'event_starttime',
            'event_peaktime',
            'event_endtime',
            'fl_goescls',
            'ar_mcintoshcls', 
            'obs_instrument',
            'obs_lastprocessingdate',
            'obs_levelnum',
            'obs_meanwavel',
            'obs_observatory',
            'obs_title',
            'obs_wavelunit',
            'event_coord1',
            'event_coord2',
            'boundbox_c1ll',
            'boundbox_c1ur',
            'hrc_bbox',
            'hrc_boundcc',
            'hrc_coord',
            'hgc_bbox',
            'hgc_boundcc',
            'hgc_coord',
            'hgc_x',
            'hgc_y',
            'hgs_bbox',
            'hgs_boundcc',
            'hgs_coord',
            'hgs_x',
            'hgs_y',
            'hpc_bbox',
            'hpc_boundcc',
            'hpc_coord',
            'hpc_geom',
            'hpc_radius',
            'hpc_x',
            'hpc_y',
            'hrc_a',
            'hrc_bbox',
            'hrc_boundcc',
            'hrc_coord',
            'hrc_r',
        ]]
    goes_data['flux'] = goes_data.goes_class.apply(lambda x: goes.flareclass_to_flux(x))
    goes_data.to_csv('{0}GOES-FL-{1}.csv'.format(directory, str(date_values[d].to_period('M'))))
    print "Done FL, {0}".format(str(date_values[d].to_period('M')))

In [4]:
goes_data_all = pd.DataFrame()
for d in xrange(len(date_values) - 1):
    # Get FL data from GOES
    goes_data = pd.DataFrame.from_csv('{0}GOES-FL-{1}.csv'.format(directory, str(date_values[d].to_period('M'))))
    goes_data_all = goes_data_all.append(goes_data, ignore_index=True)
    print "Done FL, {0}".format(str(date_values[d].to_period('M')))
    

Done FL, 2011-03
Done FL, 2011-04
Done FL, 2011-05
Done FL, 2011-06
Done FL, 2011-07
Done FL, 2011-08
Done FL, 2011-09
Done FL, 2011-10
Done FL, 2011-11
Done FL, 2011-12


In [6]:
goes_data_all[:3]

Unnamed: 0,end_time,event_date,goes_class,goes_location,noaa_active_region,peak_time,start_time,flux
0,2011-03-01 00:27:00,2011-02-28,C1.6,"(0, 0)",0,2011-03-01 00:05:00,2011-02-28 23:11:00,1.6e-06 W / m2
1,2011-03-01 04:40:00,2011-03-01,C6.0,"(-32, 24)",11164,2011-03-01 04:13:00,2011-03-01 02:30:00,6e-06 W / m2
2,2011-03-01 09:34:00,2011-03-01,C2.9,"(0, 0)",11164,2011-03-01 09:18:00,2011-03-01 08:52:00,2.9e-06 W / m2


In [5]:
goes_data_all['flux'] = ''
for ind, gc in zip(goes_data_all.index, goes_data_all.goes_class):
    goes_data_all.set_value(ind, 'flux', goes.flareclass_to_flux(gc))

In [30]:
set(goes_data_all.goes_class)

{'B1.1',
 'B1.2',
 'B1.3',
 'B1.5',
 'B1.6',
 'B1.7',
 'B1.8',
 'B1.9',
 'B2.0',
 'B2.1',
 'B2.2',
 'B2.3',
 'B2.4',
 'B2.5',
 'B2.6',
 'B2.7',
 'B2.8',
 'B2.9',
 'B3.0',
 'B3.1',
 'B3.2',
 'B3.3',
 'B3.4',
 'B3.5',
 'B3.6',
 'B3.7',
 'B3.8',
 'B3.9',
 'B4.0',
 'B4.1',
 'B4.2',
 'B4.3',
 'B4.4',
 'B4.5',
 'B4.6',
 'B4.7',
 'B4.8',
 'B4.9',
 'B5.0',
 'B5.1',
 'B5.2',
 'B5.3',
 'B5.4',
 'B5.5',
 'B5.6',
 'B5.7',
 'B5.8',
 'B5.9',
 'B6.0',
 'B6.1',
 'B6.2',
 'B6.3',
 'B6.4',
 'B6.5',
 'B6.6',
 'B6.7',
 'B6.8',
 'B6.9',
 'B7.0',
 'B7.1',
 'B7.2',
 'B7.3',
 'B7.4',
 'B7.5',
 'B7.6',
 'B7.7',
 'B7.8',
 'B7.9',
 'B8.0',
 'B8.1',
 'B8.2',
 'B8.3',
 'B8.4',
 'B8.5',
 'B8.6',
 'B8.7',
 'B8.8',
 'B8.9',
 'B9.0',
 'B9.1',
 'B9.2',
 'B9.3',
 'B9.4',
 'B9.5',
 'B9.6',
 'B9.7',
 'B9.8',
 'B9.9',
 'C1.0',
 'C1.1',
 'C1.2',
 'C1.3',
 'C1.4',
 'C1.5',
 'C1.6',
 'C1.7',
 'C1.8',
 'C1.9',
 'C2.0',
 'C2.1',
 'C2.2',
 'C2.3',
 'C2.4',
 'C2.5',
 'C2.6',
 'C2.7',
 'C2.8',
 'C2.9',
 'C3.0',
 'C3.1',
 'C3.2',
 

In [28]:
goes_data_all['flux'] = goes_data_all.goes_class.apply(lambda x: goes.flareclass_to_flux(x))

AttributeError: 'module' object has no attribute 'flareclass_to_flux'

In [16]:
flux_unit = ' W / m2'
test_val = float(test_val.replace(flux_unit, ''))
test_val

1.2e-06

In [20]:
test_val*10

1.2e-05

In [8]:
# goes_data_all.groupby(['noaa_active_region'])[['flux']].median()

# Group data from SDO

In [None]:
AR_data_all = pd.DataFrame()
for d in xrange(len(date_values) - 1):
    # Get FL data from GOES
    AR_data = pd.DataFrame.from_csv('{0}SDO-AR-{1}.csv'.format(directory, str(date_values[d].to_period('M'))))
    AR_data_all = AR_data_all.append(AR_data, ignore_index=True)
    print "Done FL, {0}".format(str(date_values[d].to_period('M')))
    