In [19]:
import math
import numpy as np
import pandas as pd
import requests
from config import api_key
import time
#from datetime import datetime

import json

import flr_tools
import cme_tools
import matplotlib.pyplot as plt
import scipy.stats as stats
from datetime import datetime as dt

### Flare and CME data exloration.

Using the NASA APIs **link?** we pulled data from DONKI Flare and DONKI CME databases.  

# Flare Data Exploration
## Example of 1 list item in json response for flare query.
## Selected the following items from the json:
1. **['flrID']** - Identification
2. **['classType']** In order to find power of flare.
3. **['beginTime']** For time correlation with other events.
4. **['peakTime']** For time correlation with other events.
5. **['endTime']** For time correlation with other events.
6. **['linkedEvents']** To look for correlation with events from other databases.

# CME Data Exloration
## Example of CME json response list item.
## Selected the following items from the json:
1. **['activityID']** Identification.
2. **['startTime']** For time correlation with other events.
3. **['cmeAnalyses'][0]['speed']** To determine speed / energy of the emission.
4. **['cmeAnalyses'][0]['type']** To determine speed / energy of the emission.
5. **['linkedEvents']** To look for correlation with events from other databases.
#### Reasoning:
It was decided that these items could be used to explore relationships with the flare and sunspot data.

## Linked Events
{'activityID': '2010-04-03T09:54:00-**CME**-001'}

{'activityID': '2010-04-03T09:04:00-**FLR**-001'}

In [None]:
start_date = "2010-01-01"
end_date = "2019-10-20"

In [8]:
query_flr_url = f"https://api.nasa.gov/DONKI/FLR?startDate={start_date}&endDate={end_date}&api_key={api_key}"
flare_json = requests.get(query_flr_url).json()
flare_json[0]

{'flrID': '2010-04-03T09:04:00-FLR-001',
 'instruments': [{'id': 19, 'displayName': 'GOES14: SEM/XRS 1.0-8.0'}],
 'beginTime': '2010-04-03T09:04Z',
 'peakTime': '2010-04-03T09:54Z',
 'endTime': '2010-04-03T10:58Z',
 'classType': 'B7.4',
 'sourceLocation': 'S25W03',
 'activeRegionNum': 11059,
 'linkedEvents': [{'activityID': '2010-04-03T09:54:00-CME-001'}]}

In [15]:
query_cme_url = f"https://api.nasa.gov/DONKI/CME?startDate={start_date}&endDate={end_date}&api_key={api_key}"
cme_json = requests.get(query_cme_url).json()
cme_json[0]

{'activityID': '2010-04-03T09:54:00-CME-001',
 'startTime': '2010-04-03T09:54Z',
 'sourceLocation': 'S20E05',
 'activeRegionNum': None,
 'instruments': [{'id': 5, 'displayName': 'STEREO A: SECCHI/COR2'},
  {'id': 1, 'displayName': 'SOHO: LASCO/C2'},
  {'id': 2, 'displayName': 'SOHO: LASCO/C3'},
  {'id': 7, 'displayName': 'STEREO B: SECCHI/COR2'}],
 'cmeAnalyses': [{'time21_5': '2010-04-03T17:16Z',
   'latitude': 7.0,
   'longitude': 8.0,
   'halfAngle': 26.0,
   'speed': 620.0,
   'type': 'C',
   'isMostAccurate': True,
   'note': '',
   'levelOfData': 0,
   'enlilList': [{'modelCompletionTime': '2010-04-05T00:00Z',
     'au': 2.0,
     'estimatedShockArrivalTime': '2010-04-05T10:37Z',
     'estimatedDuration': 4.2,
     'rmin_re': 4.5,
     'kp_18': None,
     'kp_90': 3,
     'kp_135': 5,
     'kp_180': 6,
     'isEarthGB': False,
     'impactList': [{'isGlancingBlow': False,
       'location': 'STEREO A',
       'arrivalTime': '2010-04-05T19:00Z'},
      {'isGlancingBlow': True,
   

## Flare query and conversion to a DataFrame in Pandas/Jupyter.

In [18]:
# Reformat date / time from nasa format to be slightly more readable.
def convert_date_time(nasa_zulu="2017-01-21T07:26Z"):
    reformat_time = datetime.strptime(nasa_zulu,'%Y-%m-%dT%H:%MZ')
    date_time_out = reformat_time.strftime("%m/%d/%Y, %H:%M:%S")
    return date_time_out
# Parses response from api json response string.  Includes start / end time, when available.
def get_flare_all_df(api_json, nasa_time=False):
    df_dict = []
    for event in api_json:
        all_linked = []
        try:
            [all_linked.append(activity) for activity in event['linkedEvents']]
        except TypeError:
            all_linked.append("None Found.")

        if(nasa_time):
            df_dict.append({"flare_id" : event['flrID'], "start_time(zulu)" : event['beginTime'], "peak_time(zulu)" : event['peakTime'], 
            "end_time(zulu)" : event['endTime'], "class_type" : event['classType'], "power(w/m^2)" : flare_power(event['classType']), 
            "linked_events" : all_linked})
        else:
            try:
                start_time_holder = convert_date_time(event['beginTime'])
            except:
                start_time_holder = "Invalid format / no data."
            try:
                peak_time_holder = convert_date_time(event['peakTime'])
            except:
                peak_time_holder = "Invalid format / no data."
            try:
                end_time_holder = convert_date_time(event['endTime'])
            except:
                end_time_holder = "Invalid format / no data."
            df_dict.append({"flare_id" : event['flrID'], "start_time(zulu)" : start_time_holder, "peak_time(zulu)" : peak_time_holder, 
            "end_time(zulu)" : end_time_holder, "class_type" : event['classType'], "power(w/m^2)" : flare_power(event['classType']), 
            "linked_events" : all_linked})
    result_df = pd.DataFrame(df_dict)
    return result_df


# query for flare activity by start/end date.  Includes linked events and start / end time.  Return data in json format.
def flare_query_all(start_date="2019-01-01", end_date="2019-10-13", nasa_time=False):
    query_flr_url = f"https://api.nasa.gov/DONKI/FLR?startDate={start_date}&endDate={end_date}&api_key={api_key}"
    flare_json = requests.get(query_flr_url).json()
    flare_df = get_flare_all_df(flare_json, nasa_time)
    return flare_df
start_dt_query = "2010-01-01"
end_dt_query = "2019-10-20"
flrdf = flare_query_all(start_dt_query, end_dt_query,True)
flrdf.head(3)

Unnamed: 0,flare_id,start_time(zulu),peak_time(zulu),end_time(zulu),class_type,power(w/m^2),linked_events
0,2010-04-03T09:04:00-FLR-001,2010-04-03T09:04Z,2010-04-03T09:54Z,2010-04-03T10:58Z,B7.4,7.4e-07,[{'activityID': '2010-04-03T09:54:00-CME-001'}]
1,2010-06-12T00:30:00-FLR-001,2010-06-12T00:30Z,2010-06-12T00:57Z,2010-06-12T01:02Z,M2.0,2e-05,[None Found.]
2,2010-08-07T17:55:00-FLR-001,2010-08-07T17:55Z,2010-08-07T18:24Z,2010-08-07T18:47Z,M1.0,1e-05,[None Found.]


### Flare power conversion from flare classification to watts / square meter value.

In [2]:
# convert flare classification to watts/square meter (m^2)
def flare_power(flr_class = "C3.5"):
    try:
        pwr_scale = flr_class[0]
        pwr_range = float(flr_class[1:len(flr_class)])
    except:
        print("Error.  Check Syntax.")

    scale_dict ={"A" : {"high" : 10**-7 , "low" : 10**-8},
    "B" : {"high" : 10**-6, "low" : 10**-7},
    "C" : {"high" : 10**-5, "low" : 10**-6},
    "M" : {"high" : 10**-4, "low" : 10**-5},
    "X" : {"high" : 10**-3, "low" : 10**-4},
    "Z" : {"high" : 10**-2, "low" : 10**-3}}
    pwr_convertered = pwr_range * scale_dict[pwr_scale]['low']
    return pwr_convertered

In [4]:
pwr_ = flare_power("C4.0")
pwr_

4e-06

In [None]:
# Parses response from api json response string.
def get_flare_dframe(api_json, nasa_time=False):
    df_dict = []
    for event in api_json:
        if(nasa_time):
            df_dict.append({"flare_id" : event['flrID'], "peak_time(zulu)" : event['peakTime'], 
            "class_type" : event['classType'], "power(w/m^2)" : flare_power(event['classType'])})
        else:
            try:
                peak_time_holder = convert_date_time(event['peakTime'])
            except:
                peak_time_holder = "Invalid format / no data."
            df_dict.append({"flare_id" : event['flrID'], "peak_time(zulu)" : peak_time_holder, 
            "class_type" : event['classType'], "power(w/m^2)" : flare_power(event['classType'])})
    result_df = pd.DataFrame(df_dict)
    return result_df


In [None]:
# Parses response from api json response string.  Includes start / end time, when available.
def get_flare_all_df(api_json, nasa_time=False):
    df_dict = []
    for event in api_json:
        all_linked = []
        try:
            [all_linked.append(activity) for activity in event['linkedEvents']]
        except TypeError:
            all_linked.append("None Found.")

        if(nasa_time):
            df_dict.append({"flare_id" : event['flrID'], "start_time(zulu)" : event['beginTime'], "peak_time(zulu)" : event['peakTime'], 
            "end_time(zulu)" : event['endTime'], "class_type" : event['classType'], "power(w/m^2)" : flare_power(event['classType']), 
            "linked_events" : all_linked})
        else:
            try:
                start_time_holder = convert_date_time(event['beginTime'])
            except:
                start_time_holder = "Invalid format / no data."
            try:
                peak_time_holder = convert_date_time(event['peakTime'])
            except:
                peak_time_holder = "Invalid format / no data."
            try:
                end_time_holder = convert_date_time(event['endTime'])
            except:
                end_time_holder = "Invalid format / no data."
            df_dict.append({"flare_id" : event['flrID'], "start_time(zulu)" : start_time_holder, "peak_time(zulu)" : peak_time_holder, 
            "end_time(zulu)" : end_time_holder, "class_type" : event['classType'], "power(w/m^2)" : flare_power(event['classType']), 
            "linked_events" : all_linked})
    result_df = pd.DataFrame(df_dict)
    return result_df

In [None]:
# query for flare activity by start/end date. Return data in json format.
def flare_query_small(start_date="2019-01-01", end_date="2019-10-13", nasa_time=False):
    query_flr_url = f"https://api.nasa.gov/DONKI/FLR?startDate={start_date}&endDate={end_date}&api_key={api_key}"
    flare_json = requests.get(query_flr_url).json()
    flare_df = get_flare_dframe(flare_json, nasa_time)
    return flare_df

In [None]:
# Works, mars portion no longer needed. ************************************************************************
def cme_compare(start_date="2019-01-01", end_date="2019-10-13"):
    flare_cme_dict = []
    query_cme_url =f"https://kauai.ccmc.gsfc.nasa.gov/DONKI/WS/get/CME?startDate={start_date}&endDate={end_date}"
    query_flr_url = f"https://api.nasa.gov/DONKI/FLR?startDate={start_date}&endDate={end_date}&api_key={api_key}"
    try:
        flare_dict = get_flare_all_dict(requests.get(query_flr_url).json())
    except:
        print("Flare query error.")
    try:
        cme_dict = cme_analysis_dict(requests.get(query_cme_url).json())
    except:
        print("cme query error.")
    for flare in flare_dict:
        #flare_cme_dict.append("i started a loop")
        if flare['linked_events'][0] != "None Found.":
            for cme in cme_dict:
                #flare_cme_dict.append("I got to the cme loop.")
                for event in flare['linked_events']:
                    #flare_cme_dict.append("i got to the inner loop.")
                    if event['activityID'] == cme['activity_id']:
                        flare_cme_dict.append({"activity_id" : cme['activity_id'], "speed" : cme['speed'], "data_level" : cme['data_level'],
                        "lat" : cme['lat'], "long" : cme['long'], "type" : cme['type'], "half_angle" : cme['half_angle'], 
                        "is_most_accurate" : cme['is_most_accurate'], "time21_5" : cme['time21_5'], "note" : cme['note'],"flare_id" : flare['flare_id'], 
                        "start_time(zulu)" : flare['start_time(zulu)'], "peak_time(zulu)" : flare['peak_time(zulu)'], 
                        "end_time(zulu)" : flare['end_time(zulu)'], "class_type" : flare['class_type'], "power(w/m^2)" : flare['power(w/m^2)'], 
                        "linked_events" : event['activityID']})
                    else:
                        pass
        else:
            pass
    flare_cme_df = pd.DataFrame(flare_cme_dict)
    return flare_cme_df

In [None]:
# Reformat date / time from nasa format to be slightly more readable.
def convert_date_time(nasa_zulu="2017-01-21T07:26Z"):
    reformat_time = datetime.strptime(nasa_zulu,'%Y-%m-%dT%H:%MZ')
    date_time_out = reformat_time.strftime("%m/%d/%Y, %H:%M:%S")
    return date_time_out

In [None]:
# Takes in a datframe (ID column renamed to 'id') and event type, inputs should be opposite
# CME df with event_type = 'FLR' or Flare df with event_type = 'CME'
# Returns list of linked event ids, can be used to cross reference the two dataframes
def linked_events(df, event_type='FLR'):
    ids = []
    for i,row in df.iterrows():
        try:
            for each in row['linked_events']:
                if event_type in each['activityID']:
                    ids.append([row['id'],each['activityID']])
        except:
            pass
    return ids

In [11]:
query_cme_url = f"https://api.nasa.gov/DONKI/CME?startDate={start_date}&endDate={end_date}&api_key={api_key}"
cme_json = requests.get(query_cme_url).json()
cme_json[0]

{'activityID': '2010-04-03T09:54:00-CME-001',
 'startTime': '2010-04-03T09:54Z',
 'sourceLocation': 'S20E05',
 'activeRegionNum': None,
 'instruments': [{'id': 5, 'displayName': 'STEREO A: SECCHI/COR2'},
  {'id': 1, 'displayName': 'SOHO: LASCO/C2'},
  {'id': 2, 'displayName': 'SOHO: LASCO/C3'},
  {'id': 7, 'displayName': 'STEREO B: SECCHI/COR2'}],
 'cmeAnalyses': [{'time21_5': '2010-04-03T17:16Z',
   'latitude': 7.0,
   'longitude': 8.0,
   'halfAngle': 26.0,
   'speed': 620.0,
   'type': 'C',
   'isMostAccurate': True,
   'note': '',
   'levelOfData': 0,
   'enlilList': [{'modelCompletionTime': '2010-04-05T00:00Z',
     'au': 2.0,
     'estimatedShockArrivalTime': '2010-04-05T10:37Z',
     'estimatedDuration': 4.2,
     'rmin_re': 4.5,
     'kp_18': None,
     'kp_90': 3,
     'kp_135': 5,
     'kp_180': 6,
     'isEarthGB': False,
     'impactList': [{'isGlancingBlow': False,
       'location': 'STEREO A',
       'arrivalTime': '2010-04-05T19:00Z'},
      {'isGlancingBlow': True,
   

In [14]:
def convert_date_time(nasa_zulu="2017-01-21T07:26Z"):
    reformat_time = datetime.strptime(nasa_zulu,'%Y-%m-%dT%H:%MZ')
    date_time_out = reformat_time.strftime("%m/%d/%Y, %H:%M:%S")
    return date_time_out
# ************************************************************************  
# Takes API JSON for CMEs and returns dataframe with data
def get_cme_all_df(api_json, nasa_time=False):
    cme_ids = []
    linked = []
    speeds = []
    types = []
    start_time_nasa = []

    for event in api_json:
        try:
            linked.append(event['linkedEvents'])
        except:
            linked.append('None Found')
        try:
            speeds.append(event["cmeAnalyses"][0]['speed'])
            types.append(event["cmeAnalyses"][0]['type'])
        except:
            speeds.append('NA')
            types.append('NA')

        cme_ids.append(event["activityID"])
        start_time_nasa.append(event["startTime"])

    start_time = [convert_date_time(time) for time in start_time_nasa]

    df = pd.DataFrame({
                "cme_id": cme_ids,
                "speed": speeds,
                "type": types,
                "linked_events": linked
    })
    
    if nasa_time:
        df['start time'] = start_time_nasa
        return df
    else:
        df['start time'] = start_time
        return df

# query for cme activity by start/end date.  Includes linked events and start / end time.
def cme_query_all(start_date="2019-01-01", end_date="2019-10-13", nasa_time=False):
    query_cme_url = f"https://api.nasa.gov/DONKI/CME?startDate={start_date}&endDate={end_date}&api_key={api_key}"
    cme_json = requests.get(query_cme_url).json()
    cme_df = get_cme_all_df(cme_json, nasa_time)
    return cme_df
cmedf = cme_query_all()
cmedf.head(3)

Unnamed: 0,cme_id,speed,type,linked_events,start time
0,2019-01-02T23:12:00-CME-001,430.0,S,,"01/02/2019, 23:12:00"
1,2019-01-06T20:24:00-CME-001,295.0,S,[{'activityID': '2019-01-11T15:00:00-IPS-001'}],"01/06/2019, 20:24:00"
2,2019-01-21T02:00:00-CME-001,292.0,S,,"01/21/2019, 02:00:00"


In [20]:
dont_use_nasa_time_format = False
flare_alldf = flr_tools.flare_query_all(start_date, end_date, dont_use_nasa_time_format)
flare_alldf.head()

Unnamed: 0,flare_id,start_time(zulu),peak_time(zulu),end_time(zulu),class_type,power(w/m^2),linked_events
0,2010-04-03T09:04:00-FLR-001,"04/03/2010, 09:04:00","04/03/2010, 09:54:00","04/03/2010, 10:58:00",B7.4,7.4e-07,[{'activityID': '2010-04-03T09:54:00-CME-001'}]
1,2010-06-12T00:30:00-FLR-001,"06/12/2010, 00:30:00","06/12/2010, 00:57:00","06/12/2010, 01:02:00",M2.0,2e-05,[None Found.]
2,2010-08-07T17:55:00-FLR-001,"08/07/2010, 17:55:00","08/07/2010, 18:24:00","08/07/2010, 18:47:00",M1.0,1e-05,[None Found.]
3,2010-08-14T09:38:00-FLR-001,"08/14/2010, 09:38:00","08/14/2010, 10:05:00","08/14/2010, 10:31:00",C4.4,4.4e-06,[{'activityID': '2010-08-14T12:30:00-SEP-001'}]
4,2010-08-18T04:45:00-FLR-001,"08/18/2010, 04:45:00","08/18/2010, 05:48:00","08/18/2010, 06:51:00",C4.5,4.5e-06,[{'activityID': '2010-08-18T06:00:00-CME-001'}]


In [21]:
dont_use_nasa_time_format = False
cme_alldf = cme_tools.cme_query_all(start_date, end_date, dont_use_nasa_time_format)
cme_alldf.head()

Unnamed: 0,cme_id,speed,type,linked_events,start time
0,2010-04-03T09:54:00-CME-001,620,C,[{'activityID': '2010-04-03T09:04:00-FLR-001'}...,"04/03/2010, 09:54:00"
1,2010-06-13T07:32:00-CME-001,500,C,,"06/13/2010, 07:32:00"
2,2010-06-20T03:18:00-CME-001,570,C,[{'activityID': '2010-06-23T08:00:00-IPS-001'}],"06/20/2010, 03:18:00"
3,2010-07-03T01:30:00-CME-001,625,C,,"07/03/2010, 01:30:00"
4,2010-07-08T22:30:00-CME-001,290,S,,"07/08/2010, 22:30:00"


In [22]:
cme_ids = cme_tools.linked_events(flare_alldf.rename(columns={'flare_id':'id'}),'CME')
flr_ids = cme_tools.linked_events(cme_alldf.rename(columns={'cme_id':'id'}),'FLR')
both = pd.DataFrame(cme_ids, columns=['flare_id','cme_id'])
new_df = both.merge(cme_alldf[["cme_id","speed","type","start time"]], on="cme_id", how='left')
new_df = new_df.rename(columns={"speed":"cme speed","type": "cme type", "start time": "cme start time"})
full_df = new_df.merge(flare_alldf[["flare_id","start_time(zulu)","peak_time(zulu)","end_time(zulu)","class_type","power(w/m^2)"]],
                      on="flare_id", how="left")
full_df = full_df.rename(columns={"start_time(zulu)":"flare start time","peak_time(zulu)": "flare peak time",
                                  "end_time(zulu)":"flare end time","class_type":"flare type","power(w/m^2)":"flare power(w/m^2)"})
full_df.head()

Unnamed: 0,flare_id,cme_id,cme speed,cme type,cme start time,flare start time,flare peak time,flare end time,flare type,flare power(w/m^2)
0,2010-04-03T09:04:00-FLR-001,2010-04-03T09:54:00-CME-001,620,C,"04/03/2010, 09:54:00","04/03/2010, 09:04:00","04/03/2010, 09:54:00","04/03/2010, 10:58:00",B7.4,7.4e-07
1,2010-08-18T04:45:00-FLR-001,2010-08-18T06:00:00-CME-001,1091,O,"08/18/2010, 06:00:00","08/18/2010, 04:45:00","08/18/2010, 05:48:00","08/18/2010, 06:51:00",C4.5,4.5e-06
2,2011-02-15T01:44:00-FLR-001,2011-02-15T02:25:00-CME-001,920,C,"02/15/2011, 02:25:00","02/15/2011, 01:44:00","02/15/2011, 01:56:00","02/15/2011, 02:06:00",X2.2,0.00022
3,2011-02-24T07:23:00-FLR-001,2011-02-24T08:00:00-CME-001,900,C,"02/24/2011, 08:00:00","02/24/2011, 07:23:00","02/24/2011, 07:35:00","02/24/2011, 07:42:00",M3.5,3.5e-05
4,2011-03-07T13:44:00-FLR-001,2011-03-07T14:40:00-CME-001,710,C,"03/07/2011, 14:40:00","03/07/2011, 13:44:00","03/07/2011, 14:30:00","03/07/2011, 15:08:00",M2.0,2e-05
