In [2]:
import numpy as np
import pandas as pd
from sunpy.net import attrs as a
from sunpy.net import Fido
from sunpy.time import parse_time

import os
import datetime

In [3]:
FLARE_CSV_DIR = './event_records/all_flare_events_year'
EVENTS_BY_DATE_DIR = './event_records/all_flare_events_date'
EVENTS_BY_YEAR_DIR = './event_records/events_by_year'
NEW_EVENTS_BY_DATE_DIR = './event_records/new_events_by_date'
NEW_EVENTS_BY_CLASS_DIR = './event_records/new_events_by_class'

In [4]:
def query_goes_year(year):
    print(year)
    t_start = datetime.datetime(year=year, month=1, day=1)
    t_end = datetime.datetime(year=year+1, month=1, day=1)
    results = Fido.search(
        a.Time(t_start, t_end),
        a.hek.EventType("FL"),
    )
    if not results.all_colnames: # no columns / no results
        return None

    event_table = results['hek'][
        'event_starttime'
        , 'event_endtime'
        , 'fl_goescls'
        , 'hpc_coord'
        , 'hpc_bbox'
        , 'hrc_coord'
        , 'hrc_bbox'
        , 'hgc_coord'
        , 'hgc_bbox'
        , 'event_coordsys'
        , 'hgs_coord'
        , 'hgs_bbox'
        , 'event_peaktime'
        , 'active'
        , 'ar_noaaclass'
        , 'ar_noaanum'
        , 'boundbox_c1ur'
        , 'boundbox_c2ur'
        , 'boundbox_c1ll'
        , 'boundbox_c2ll'
        , 'hpc_y'
        , 'hpc_x'
        , 'hgs_y'
        , 'hgs_x'
        , 'hpc_radius'
        , 'event_c2error'
    ]
    
    return event_table

In [5]:
def goes_query_to_df(query):
    event_df = query.to_pandas()

    if len(event_df) == 0:
        return None

    return event_df

In [6]:
def goes_flares_to_csv(start_year, end_year):
    if not os.path.exists(FLARE_CSV_DIR):
        os.makedirs(FLARE_CSV_DIR)
    
    for year in range(start_year, end_year+1):
        flares_table = query_goes_year(year)
        flares_df = goes_query_to_df(flares_table)
        flares_csv_path = os.path.join(FLARE_CSV_DIR, f'{str(year)}.csv') 
        flares_df.to_csv(flares_csv_path)

In [7]:
def CreateEventByDateCSV(flares_by_year_dir, save_dir):
    year_dict = {}
    
    for f in os.scandir(flares_by_year_dir):
        if f.is_file():
            year_df = pd.read_csv(f)
            
            for index, row in year_df.iterrows():
                dt = parse_time(row['event_starttime'])
                year, month = dt.datetime.year, dt.datetime.month
            
                if year in year_dict:
                    if month in year_dict[year]:
                        month_df = pd.DataFrame([row])
                        year_dict[year][month] = pd.concat([year_dict[year][month], month_df])
                    else:
                        month_df = pd.DataFrame([row])
                        year_dict[year][month] = month_df
                else:
                    month_df = pd.DataFrame([row])
                    year_dict[year] = {month: month_df}
    
    # iterate over years
    for k, v in year_dict.items():
        year_folder_dir = os.path.join(save_dir, str(k))

        if not os.path.exists(year_folder_dir):
            os.makedirs(year_folder_dir)
        
        # iterate over months
        for k1, v1 in v.items():
            event_path_csv_dir = os.path.join(save_dir, f'{str(k)}/{str(k1)}.csv')
            if os.path.exists(event_path_csv_dir):
                v1.to_csv(event_path_csv_dir, mode='a', index=False)
            else:
                v1.to_csv(event_path_csv_dir, index=False)

In [8]:
def CreateEventByClassCSV(flares_by_year_dir, save_dir):
    class_dict = {}
    
    for f in os.scandir(flares_by_year_dir):
        if f.is_file():
            year_df = pd.read_csv(f)
            
            for index, row in year_df.iterrows():
                e_class = row['fl_goescls']
                e_class = list(e_class)[0]
                
                if e_class in class_dict:
                    class_df = pd.DataFrame([row])
                    class_dict[e_class] = pd.concat([class_dict[e_class], class_df])
                
                else:
                    class_df = pd.DataFrame([row])
                    class_dict[e_class] = class_df
            
                
    
    # iterate over years
    for k, v in class_dict.items():
        class_csv_dir = os.path.join(save_dir, f'{str(k)}.csv')

        if os.path.exists(class_csv_dir):
            v.to_csv(class_csv_dir, mode='a', index=False)
        else:
            v.to_csv(class_csv_dir)

In [9]:
x_csv = pd.read_csv('./event_records/new_events_by_class/X.csv')

In [22]:
def RemoveDuplicateDataEntries(files_dir):
    for subdir, dirs, files in os.walk(files_dir):
        for f in files:
            filepath = os.path.join(subdir, f)
            print(filepath)
            df = pd.read_csv(filepath)
            df.drop_duplicates(subset=['event_starttime', 'fl_goescls'], inplace=True)
            df.to_csv(filepath)

In [23]:
RemoveDuplicateDataEntries('./event_records/new_events_by_class')

./event_records/new_events_by_class/C.csv
./event_records/new_events_by_class/B.csv
./event_records/new_events_by_class/X.csv
./event_records/new_events_by_class/N.csv
./event_records/new_events_by_class/M.csv
./event_records/new_events_by_class/A.csv
./event_records/new_events_by_class/.ipynb_checkpoints/X-checkpoint.csv


In [9]:
# goes_flares_to_csv(2010, 2018)

In [10]:
# CreateEventByDateCSV(EVENTS_BY_YEAR_DIR, NEW_EVENTS_BY_DATE_DIR)

In [11]:
# CreateEventByClassCSV(EVENTS_BY_YEAR_DIR, NEW_EVENTS_BY_CLASS_DIR)