In [1]:
import os
from pandas import *
import numpy as np
from astropy import time as time
import astropy.units as u
import csv

# Read CSV file

In [2]:
folder = os.path.join(".","integrated_flare_data")

data = read_csv(os.path.join(folder,'goes_flares_integrated.csv'))

flare_id           = np.array(data["flare_id"])
goes_class         = np.array(data["goes_class"])
noaa_active_region = np.array(data["noaa_active_region"])
hinode_ar          = np.array(data["hinode_ar"])
primary_verified   = np.array(data["primary_verified"])
secondary_verified = np.array(data["secondary_verified"])
start_time         = np.array(data["start_time"])
end_time           = np.array(data["end_time"])
peak_time          = np.array(data["peak_time"])

fl_loc_src         = np.array(data["fl_loc_src"])
goes_x_hpc         = np.array(data["x_hpc"])
goes_y_hpc         = np.array(data["y_hpc"])
ssw_x_hpc          = np.array(data["ssw_x_hpc"])
ssw_y_hpc          = np.array(data["ssw_y_hpc"])
hinode_x_hpc       = np.array(data["hinode_x_hpc"])
hinode_y_hpc       = np.array(data["hinode_y_hpc"])

# Select flare location from provider

In [3]:
x_hpc = np.zeros((len(goes_x_hpc),))
y_hpc = np.zeros((len(goes_y_hpc),))

idx_goes = np.where(fl_loc_src == 'GOES')
idx_ssw  = np.where(fl_loc_src == 'SSW')
idx_xrt  = np.where(fl_loc_src == 'XRT')

# GOES
x_hpc[idx_goes] = goes_x_hpc[idx_goes]
y_hpc[idx_goes] = goes_y_hpc[idx_goes]

# SSW
x_hpc[idx_ssw] = ssw_x_hpc[idx_ssw]
y_hpc[idx_ssw] = ssw_y_hpc[idx_ssw]

# XRT
x_hpc[idx_xrt] = hinode_x_hpc[idx_xrt]
y_hpc[idx_xrt] = hinode_y_hpc[idx_xrt]

# Select start time after mid May 2010 
(before that time, the SDO spacecraft location is not available)

In [4]:
ref_time = time.Time('2010-05-15 00:00:00')

cond_mid_may_2010   = []
for i in range(len(start_time)):
    
    this_start_time = time.Time(start_time[i])
    cond_mid_may_2010.append(this_start_time > ref_time)

idx = np.where(np.array(cond_mid_may_2010))

flare_id           = flare_id[idx]
goes_class         = goes_class[idx]
noaa_active_region = noaa_active_region[idx]
hinode_ar          = hinode_ar[idx]
primary_verified   = primary_verified[idx]
secondary_verified = secondary_verified[idx]
x_hpc              = x_hpc[idx]
y_hpc              = y_hpc[idx]
start_time         = start_time[idx]
end_time           = end_time[idx]
peak_time          = peak_time[idx]

# Select where we have the same NOAA and Hinode information on AR number

In [5]:
cond_noaa          = np.logical_not(np.isnan(noaa_active_region))
cond_hinode        = np.logical_not(np.isnan(hinode_ar))
idx                = np.where(cond_noaa & cond_hinode) 

flare_id           = flare_id[idx]
goes_class         = goes_class[idx]
noaa_active_region = noaa_active_region[idx]
hinode_ar          = hinode_ar[idx]
primary_verified   = primary_verified[idx]
secondary_verified = secondary_verified[idx]
x_hpc              = x_hpc[idx]
y_hpc              = y_hpc[idx]
start_time         = start_time[idx]
end_time           = end_time[idx]
peak_time          = peak_time[idx]

idx = np.where(noaa_active_region==hinode_ar)
flare_id           = flare_id[idx]
goes_class         = goes_class[idx]
noaa_active_region = noaa_active_region[idx]
primary_verified   = primary_verified[idx]
secondary_verified = secondary_verified[idx]
x_hpc              = x_hpc[idx]
y_hpc              = y_hpc[idx]
start_time         = start_time[idx]
end_time           = end_time[idx]
peak_time          = peak_time[idx]

# Select only primary or secondary verified events

In [6]:
idx                = np.where(primary_verified | secondary_verified)

flare_id           = flare_id[idx]
goes_class         = goes_class[idx]
noaa_active_region = noaa_active_region[idx]
primary_verified   = primary_verified[idx]
secondary_verified = secondary_verified[idx]
x_hpc              = x_hpc[idx]
y_hpc              = y_hpc[idx]
start_time         = start_time[idx]
end_time           = end_time[idx]
peak_time          = peak_time[idx]

# Select flares for which fl_lon and fl_lat are not Nans

In [7]:
cond_x_hpc         = np.logical_not(np.isnan(x_hpc))
cond_y_hpc         = np.logical_not(np.isnan(y_hpc))
idx                = np.where(cond_x_hpc & cond_y_hpc) 

flare_id           = flare_id[idx]
goes_class         = goes_class[idx]
noaa_active_region = noaa_active_region[idx]
primary_verified   = primary_verified[idx]
secondary_verified = secondary_verified[idx]
x_hpc              = x_hpc[idx]
y_hpc              = y_hpc[idx]
start_time         = start_time[idx]
end_time           = end_time[idx]
peak_time          = peak_time[idx]

# Check that difference between start time of a flare and end time of the previous one is greater than 30 min 

In [8]:
start_time_check = start_time[1:]
end_time_check   = end_time[:-1]

time_diff=[True]
for i in range(len(start_time_check)):
    
    this_start_time_check = time.Time(start_time_check[i])
    this_end_time_check   = time.Time(end_time_check[i])
    this_diff = this_start_time_check - this_end_time_check
    time_diff.append(this_diff > 30*u.min)
    
idx = np.where(np.array(time_diff))

flare_id           = flare_id[idx]
goes_class         = goes_class[idx]
noaa_active_region = noaa_active_region[idx]
primary_verified   = primary_verified[idx]
secondary_verified = secondary_verified[idx]
x_hpc              = x_hpc[idx]
y_hpc              = y_hpc[idx]
start_time         = start_time[idx]
end_time           = end_time[idx]
peak_time          = peak_time[idx]

# Write csv

In [9]:
# Header of the CSV file
header_csv = ['flare_id', 'goes_class', 'noaa_active_region', 'primary_verified', 
              'secondary_verified', 'x_hpc', 'y_hpc', 'start_time', 'end_time', 'peak_time']

count_events      = 0
count_flare_lists = 1

while count_events < len(flare_id):
    
    csv_filename = os.path.join(folder,"flare_list_" + str(count_flare_lists) + ".csv")
    
    count_this_list = 0
    function_csv = 'w'
    
    while count_this_list < 100 and count_events < len(flare_id):
        
        data2store = [flare_id[count_events],          
                      goes_class[count_events],
                      int(noaa_active_region[count_events]),
                      primary_verified[count_events],
                      secondary_verified[count_events],
                      x_hpc[count_events],
                      y_hpc[count_events],
                      start_time[count_events],
                      end_time[count_events],
                      peak_time[count_events]]
    
        # Store the data in the csv file
        with open(csv_filename, function_csv, encoding='UTF8', newline='') as file_csv:
            writer = csv.writer(file_csv)

            # If new file, write the header
            if function_csv == 'w':
                writer.writerow(header_csv)

            # Write the data
            writer.writerow(data2store)

            # Close file object
            file_csv.close()

        if function_csv == 'w':
            function_csv = 'a'
        
        
        count_this_list += 1
        count_events += 1
    
    
    count_flare_lists += 1
