In [1]:
# Import dotenv package for setting environment variables 
from dotenv import load_dotenv

# Import os package
import os

# Set environment variables from the .env in the local environment
load_dotenv()

# Retrieve API key and store as Python variable
api_key = os.getenv("NASA_API_KEY")

type(api_key)

# Test the API key with a request
import requests
import json
import pandas as pd

# Search NASA API URL for Coronal Mass Ejections (CMEs) over North America for a certain month
base_url = "https://api.nasa.gov/DONKI/CME"

# Search for Geomagnetic Storms over a certain time range
start_date = "2024-05-05"
end_date   = "2024-05-05"

# Define latitude and longitude for North America
latitude   = 37.0902  
longitude  = -95.7129 

# Build query URL

# Execute "GET" request with query_url


In [2]:
# Format data as JSON


# Use json.dumps with argument indent=4 to format data


[
    {
        "activityID": "2024-05-05T02:09:00-CME-001",
        "catalog": "M2M_CATALOG",
        "startTime": "2024-05-05T02:09Z",
        "sourceLocation": "N26W17",
        "activeRegionNum": 13663,
        "link": "https://webtools.ccmc.gsfc.nasa.gov/DONKI/view/CME/30472/-1",
        "note": "[TRUE START TIME 2024-05-05T01:12Z IN LASCO C2 - RETAINING COR2A/T02:09Z START TIME AS NOTIFICATION SENT PRIOR TO UPDATE]. Narrow northern CME with filamentary structures which is similar to some previous CMEs from AR 3663. This CME is very likely associated with the M9.0 and M8.4 flares from AR 3663, peaking at 2024-05-04T23:48Z and 2024-05-05T01:27Z respectively, however no clear additional lower coronal signatures have been found in EUV imagery. This CME appears as two separately emerging fronts (T01:12Z and T02:12Z) which combine early in SOHO LASCO C2 imagery--which has now backfilled as of 2024-05-07 that prevented full real-time analysis for flare association.",
        "submission

In [3]:
# Convert geomagnetic_storms json file to a Pandas DataFrame


Unnamed: 0,activityID,catalog,startTime,sourceLocation,activeRegionNum,link,note,submissionTime,instruments,cmeAnalyses,linkedEvents
0,2024-05-05T02:09:00-CME-001,M2M_CATALOG,2024-05-05T02:09Z,N26W17,13663.0,https://webtools.ccmc.gsfc.nasa.gov/DONKI/view...,[TRUE START TIME 2024-05-05T01:12Z IN LASCO C2...,2024-05-07T16:31Z,"[{'displayName': 'STEREO A: SECCHI/COR2'}, {'d...","[{'isMostAccurate': True, 'time21_5': '2024-05...",[{'activityID': '2024-05-04T23:28:00-FLR-001'}...
1,2024-05-05T03:38:00-CME-001,M2M_CATALOG,2024-05-05T03:38Z,N24W21,13663.0,https://webtools.ccmc.gsfc.nasa.gov/DONKI/view...,A wider more eastern and seemingly slower CME/...,2024-05-06T11:37Z,[{'displayName': 'STEREO A: SECCHI/COR2'}],"[{'isMostAccurate': True, 'time21_5': '2024-05...",[{'activityID': '2024-05-05T03:01:00-FLR-001'}]
2,2024-05-05T06:38:00-CME-001,M2M_CATALOG,2024-05-05T06:38Z,N26W20,13663.0,https://webtools.ccmc.gsfc.nasa.gov/DONKI/view...,Narrow minor CME/outflow going directly North....,2024-05-06T11:29Z,[{'displayName': 'STEREO A: SECCHI/COR2'}],"[{'isMostAccurate': True, 'time21_5': '2024-05...",[{'activityID': '2024-05-05T05:47:00-FLR-001'}]
3,2024-05-05T08:09:00-CME-001,M2M_CATALOG,2024-05-05T08:09Z,,,https://webtools.ccmc.gsfc.nasa.gov/DONKI/view...,Faint CME to the northeast closely following t...,2024-05-06T11:32Z,[{'displayName': 'STEREO A: SECCHI/COR2'}],"[{'isMostAccurate': True, 'time21_5': '2024-05...",[{'activityID': '2024-05-05T08:07:00-FLR-001'}]
4,2024-05-05T13:24:00-CME-001,M2M_CATALOG,2024-05-05T13:24Z,N26W23,13663.0,https://webtools.ccmc.gsfc.nasa.gov/DONKI/view...,This CME is visible to the north in SOHO LASCO...,2024-05-07T19:26Z,"[{'displayName': 'SOHO: LASCO/C2'}, {'displayN...","[{'isMostAccurate': True, 'time21_5': '2024-05...",[{'activityID': '2024-05-05T11:41:00-FLR-001'}]


In [4]:
# Keep only the columns: activityID, startTime, linkedEvents


Unnamed: 0,activityID,startTime,linkedEvents
0,2024-05-05T02:09:00-CME-001,2024-05-05T02:09Z,[{'activityID': '2024-05-04T23:28:00-FLR-001'}...
1,2024-05-05T03:38:00-CME-001,2024-05-05T03:38Z,[{'activityID': '2024-05-05T03:01:00-FLR-001'}]
2,2024-05-05T06:38:00-CME-001,2024-05-05T06:38Z,[{'activityID': '2024-05-05T05:47:00-FLR-001'}]
3,2024-05-05T08:09:00-CME-001,2024-05-05T08:09Z,[{'activityID': '2024-05-05T08:07:00-FLR-001'}]
4,2024-05-05T13:24:00-CME-001,2024-05-05T13:24Z,[{'activityID': '2024-05-05T11:41:00-FLR-001'}]


## 'for loop' and 'explode()'

Because the linkedEvents sometimes contains multiple events per row we want to spread these to individual rows. We will illustrate 2 approaches to spread the indivudual rows:

1) a 'for loop' as illustrated before
2) the explode() function


In [5]:
# Initialize an empty list to store the expanded rows

# Iterate over each index in the DataFrame


# Create a new DataFrame from the expanded rows

# Use head() to show the dataframe


missing activity
missing activity
missing activity


Unnamed: 0,activityID,startTime,linkedEvents
0,2024-05-05T02:09:00-CME-001,2024-05-05T02:09Z,{'activityID': '2024-05-04T23:28:00-FLR-001'}
1,2024-05-05T02:09:00-CME-001,2024-05-05T02:09Z,{'activityID': '2024-05-05T01:15:00-FLR-001'}
2,2024-05-05T03:38:00-CME-001,2024-05-05T03:38Z,{'activityID': '2024-05-05T03:01:00-FLR-001'}
3,2024-05-05T06:38:00-CME-001,2024-05-05T06:38Z,{'activityID': '2024-05-05T05:47:00-FLR-001'}
4,2024-05-05T08:09:00-CME-001,2024-05-05T08:09Z,{'activityID': '2024-05-05T08:07:00-FLR-001'}


In [6]:
# Use the explode() function to expand the rows annd drop missing observations

# Use head() to show the dataframe


Unnamed: 0,activityID,startTime,linkedEvents
0,2024-05-05T02:09:00-CME-001,2024-05-05T02:09Z,{'activityID': '2024-05-04T23:28:00-FLR-001'}
1,2024-05-05T02:09:00-CME-001,2024-05-05T02:09Z,{'activityID': '2024-05-05T01:15:00-FLR-001'}
2,2024-05-05T03:38:00-CME-001,2024-05-05T03:38Z,{'activityID': '2024-05-05T03:01:00-FLR-001'}
3,2024-05-05T06:38:00-CME-001,2024-05-05T06:38Z,{'activityID': '2024-05-05T05:47:00-FLR-001'}
4,2024-05-05T08:09:00-CME-001,2024-05-05T08:09Z,{'activityID': '2024-05-05T08:07:00-FLR-001'}
