### Import Required Libraries and Set Up Environment Variables

In [63]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json
import os
from datetime import datetime
## Load the NASA_API_KEY from the env file
load_dotenv()
NASA_API_KEY = os.getenv('NASA_API_KEY')



### CME Data

In [64]:
# Set the base URL to NASA's DONKI API:
base_url = "https://api.nasa.gov/DONKI/"

# Set the specifier for CMEs:
CME = "CME"

# Search for CMEs published between a begin and end date
startDate = "2013-05-01"
endDate   = "2024-05-01"

# Build URL for CME
url = base_url + "CME?startDate=" + startDate + "&endDate=" + endDate + "&api_key=" + NASA_API_KEY


In [65]:
# Make a "GET" request for the CME URL and store it in a variable named cme_response
cme_response = requests.get(url).json()


In [66]:
# Convert the response variable to json and store it as a variable named cme_json
cme_json = json.dumps(cme_response, indent=4)




In [67]:
# Preview the first result in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(cme_response[0], indent=4))


{
    "activityID": "2013-05-01T03:12:00-CME-001",
    "catalog": "M2M_CATALOG",
    "startTime": "2013-05-01T03:12Z",
    "instruments": [
        {
            "displayName": "SOHO: LASCO/C2"
        },
        {
            "displayName": "SOHO: LASCO/C3"
        },
        {
            "displayName": "STEREO A: SECCHI/COR2"
        },
        {
            "displayName": "STEREO B: SECCHI/COR2"
        }
    ],
    "sourceLocation": "",
    "activeRegionNum": null,
    "note": "",
    "submissionTime": "2013-08-07T16:54Z",
    "versionId": 1,
    "link": "https://webtools.ccmc.gsfc.nasa.gov/DONKI/view/CME/2349/-1",
    "cmeAnalyses": [
        {
            "isMostAccurate": true,
            "time21_5": "2013-05-01T07:07Z",
            "latitude": 12.0,
            "longitude": -120.0,
            "halfAngle": 36.0,
            "speed": 860.0,
            "type": "C",
            "featureCode": "null",
            "imageType": null,
            "measurementTechnique": "null",
   

In [68]:
# Convert cme_json to a Pandas DataFrame
cme_df = pd.DataFrame(cme_response)


# Keep only the columns: activityID, startTime, linkedEvents
cme_df = cme_df[["activityID", "startTime", "linkedEvents"]]
cme_df.head()


Unnamed: 0,activityID,startTime,linkedEvents
0,2013-05-01T03:12:00-CME-001,2013-05-01T03:12Z,[{'activityID': '2013-05-04T04:52:00-IPS-001'}]
1,2013-05-02T05:24:00-CME-001,2013-05-02T05:24Z,
2,2013-05-02T14:36:00-CME-001,2013-05-02T14:36Z,
3,2013-05-03T18:00:00-CME-001,2013-05-03T18:00Z,
4,2013-05-03T22:36:00-CME-001,2013-05-03T22:36Z,[{'activityID': '2013-05-07T04:37:00-IPS-001'}]


In [69]:
# Notice that the linkedEvents column allows us to identify the corresponding GST
# Remove rows with missing 'linkedEvents' since we won't be able to assign these to GSTs
cme_df = cme_df.dropna(subset=["linkedEvents"])
cme_df.head()


Unnamed: 0,activityID,startTime,linkedEvents
0,2013-05-01T03:12:00-CME-001,2013-05-01T03:12Z,[{'activityID': '2013-05-04T04:52:00-IPS-001'}]
4,2013-05-03T22:36:00-CME-001,2013-05-03T22:36Z,[{'activityID': '2013-05-07T04:37:00-IPS-001'}]
7,2013-05-09T19:29:00-CME-001,2013-05-09T19:29Z,[{'activityID': '2013-05-12T23:30:00-IPS-001'}]
10,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,[{'activityID': '2013-05-13T01:53:00-FLR-001'}...
13,2013-05-13T16:18:00-CME-001,2013-05-13T16:18Z,[{'activityID': '2013-05-13T15:40:00-FLR-001'}...


In [70]:
# Notice that the linkedEvents sometimes contains multiple events per row
# Write a nested for loop that iterates first over each row in the cme DataFrame (using the index)
# and then iterates over the values in 'linkedEvents' 
# and adds the elements individually to a list of dictionaries where each row is one element 

# Initialize an empty list to store the expanded rows
expanded_rows = []


# Iterate over each index in the DataFrame
for x in cme_df.index:
    activityID = cme_df.loc[x, "activityID"]
    startTime = cme_df.loc[x, "startTime"]
    linkedEvents = cme_df.loc[x, "linkedEvents"]    

    # Iterate over each dictionary in the list
    for linkedEvent in linkedEvents:

    
        # Append a new dictionary to the expanded_rows list for each dictionary item and corresponding 'activityID' and 'startTime' value
        expanded_rows.append({"activityID": activityID, "startTime": startTime, "linkedEvent": linkedEvent})
      
# Create a new DataFrame from the expanded rows
expanded_df = pd.DataFrame(expanded_rows)
expanded_df.head()


Unnamed: 0,activityID,startTime,linkedEvent
0,2013-05-01T03:12:00-CME-001,2013-05-01T03:12Z,{'activityID': '2013-05-04T04:52:00-IPS-001'}
1,2013-05-03T22:36:00-CME-001,2013-05-03T22:36Z,{'activityID': '2013-05-07T04:37:00-IPS-001'}
2,2013-05-09T19:29:00-CME-001,2013-05-09T19:29Z,{'activityID': '2013-05-12T23:30:00-IPS-001'}
3,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,{'activityID': '2013-05-13T01:53:00-FLR-001'}
4,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,{'activityID': '2013-05-13T04:12:00-SEP-001'}


In [71]:
# Create a function called extract_activityID_from_dict that takes a dict as input such as in linkedEvents
# and verify below that it works as expected using one row from linkedEvents as an example
# Be sure to use a try and except block to handle errors
def extract_activityID_from_dict(linkedEvent):
        try:
            activityID = linkedEvent.get("activityID", None)
            return activityID
        except(ValueError, TypeError) as e:
                # Log the error or print it for debugging
                print("Error with activityID", e)
                return None
        

        

extract_activityID_from_dict(expanded_df.loc[0, "linkedEvent"])



'2013-05-04T04:52:00-IPS-001'

In [72]:
# Apply this function to each row in the 'linkedEvents' column (you can use apply() and a lambda function)
# and create a new column called 'GST_ActivityID' using loc indexer:
expanded_df.loc[:,"GST_ActivityID"] = expanded_df["linkedEvent"].apply(lambda x: extract_activityID_from_dict(x))
expanded_df.head()


Unnamed: 0,activityID,startTime,linkedEvent,GST_ActivityID
0,2013-05-01T03:12:00-CME-001,2013-05-01T03:12Z,{'activityID': '2013-05-04T04:52:00-IPS-001'},2013-05-04T04:52:00-IPS-001
1,2013-05-03T22:36:00-CME-001,2013-05-03T22:36Z,{'activityID': '2013-05-07T04:37:00-IPS-001'},2013-05-07T04:37:00-IPS-001
2,2013-05-09T19:29:00-CME-001,2013-05-09T19:29Z,{'activityID': '2013-05-12T23:30:00-IPS-001'},2013-05-12T23:30:00-IPS-001
3,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,{'activityID': '2013-05-13T01:53:00-FLR-001'},2013-05-13T01:53:00-FLR-001
4,2013-05-13T02:54:00-CME-001,2013-05-13T02:54Z,{'activityID': '2013-05-13T04:12:00-SEP-001'},2013-05-13T04:12:00-SEP-001


In [73]:
# Remove rows with missing GST_ActivityID, since we can't assign them to GSTs:
expanded_df = expanded_df.dropna(subset=["GST_ActivityID"])


In [74]:
# print out the datatype of each column in this DataFrame:
print(expanded_df.info())   


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1714 entries, 0 to 1713
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   activityID      1714 non-null   object
 1   startTime       1714 non-null   object
 2   linkedEvent     1714 non-null   object
 3   GST_ActivityID  1714 non-null   object
dtypes: object(4)
memory usage: 53.7+ KB
None


In [75]:
# Convert the 'GST_ActivityID' column to string format
expanded_df["GST_ActivityID"] = expanded_df["GST_ActivityID"].astype("string") 

# Convert startTime to datetime format
expanded_df["startTime"] = pd.to_datetime(expanded_df["startTime"])  

# Rename startTime to startTime_CME and activityID to cmeID
expanded_df = expanded_df.rename(columns={"startTime": "startTime_CME", "activityID": "cmeID"})

# Drop linkedEvents
expanded_df = expanded_df.drop(columns=["linkedEvent"])

# Verify that all steps were executed correctly
expanded_df.head()


Unnamed: 0,cmeID,startTime_CME,GST_ActivityID
0,2013-05-01T03:12:00-CME-001,2013-05-01 03:12:00+00:00,2013-05-04T04:52:00-IPS-001
1,2013-05-03T22:36:00-CME-001,2013-05-03 22:36:00+00:00,2013-05-07T04:37:00-IPS-001
2,2013-05-09T19:29:00-CME-001,2013-05-09 19:29:00+00:00,2013-05-12T23:30:00-IPS-001
3,2013-05-13T02:54:00-CME-001,2013-05-13 02:54:00+00:00,2013-05-13T01:53:00-FLR-001
4,2013-05-13T02:54:00-CME-001,2013-05-13 02:54:00+00:00,2013-05-13T04:12:00-SEP-001


In [76]:
# We are only interested in CMEs related to GSTs so keep only rows where the GST_ActivityID column contains 'GST'
# use the method 'contains()' from the str library.
expanded_df = expanded_df[expanded_df["GST_ActivityID"].str.contains("GST")]
  


### GST Data

In [77]:
# Set the base URL to NASA's DONKI API:
base_url = "https://api.nasa.gov/DONKI/"

# Set the specifier for Geomagnetic Storms (GST):
GST = "GST"

# Search for GSTs between a begin and end date
startDate = "2013-05-01"
endDate   = "2024-05-01"

NASA_API_KEY = os.getenv('NASA_API_KEY')    

# Build URL for GST
url = base_url + "GST?startDate=" + startDate + "&endDate=" + endDate + "&api_key=" + NASA_API_KEY


In [78]:
# Make a "GET" request for the GST URL and store it in a variable named gst_response
gst_response = requests.get(url).json()


In [79]:
# Ensure gst_response is defined
if 'gst_response' not in globals():
	gst_response = requests.get(url).json()

# Convert the response variable to json and store it as a variable named gst_json
gst_json = gst_response

# Preview the first result in JSON format
print(json.dumps(gst_json[0], indent=4))

# Use json.dumps with argument indent=4 to format data
print(json.dumps(gst_json[0], indent=4))



{
    "gstID": "2013-06-01T01:00:00-GST-001",
    "startTime": "2013-06-01T01:00Z",
    "allKpIndex": [
        {
            "observedTime": "2013-06-01T01:00Z",
            "kpIndex": 6.0,
            "source": "NOAA"
        }
    ],
    "link": "https://webtools.ccmc.gsfc.nasa.gov/DONKI/view/GST/326/-1",
    "linkedEvents": [
        {
            "activityID": "2013-05-31T15:45:00-HSS-001"
        }
    ],
    "submissionTime": "2013-07-15T19:26Z",
    "versionId": 1
}
{
    "gstID": "2013-06-01T01:00:00-GST-001",
    "startTime": "2013-06-01T01:00Z",
    "allKpIndex": [
        {
            "observedTime": "2013-06-01T01:00Z",
            "kpIndex": 6.0,
            "source": "NOAA"
        }
    ],
    "link": "https://webtools.ccmc.gsfc.nasa.gov/DONKI/view/GST/326/-1",
    "linkedEvents": [
        {
            "activityID": "2013-05-31T15:45:00-HSS-001"
        }
    ],
    "submissionTime": "2013-07-15T19:26Z",
    "versionId": 1
}


In [80]:
# Convert gst_json to a Pandas DataFrame
gst_df = pd.DataFrame(gst_json)  

# Check the columns in the DataFrame
print(gst_df.columns)

# Keep only the columns: gstID, startTime, linkedEvents
gst_df = gst_df[["gstID", "startTime", "linkedEvents"]]


Index(['gstID', 'startTime', 'allKpIndex', 'link', 'linkedEvents',
       'submissionTime', 'versionId'],
      dtype='object')


In [81]:
# Notice that the linkedEvents column allows us to identify the corresponding CME
# Remove rows with missing 'linkedEvents' since we won't be able to assign these to CME
gst_df = gst_df.dropna(subset=["linkedEvents"])


In [82]:
# Notice that the linkedEvents sometimes contains multiple events per row
# Use the explode method to ensure that each row is one element. Ensure to reset the index and drop missing values.
values = gst_df["linkedEvents"].explode().reset_index(drop=True).dropna()



In [83]:
# Ensure gst_df is defined
if 'gst_df' not in globals():
	gst_df = pd.DataFrame(gst_json)

# Explode the 'linkedEvents' column to ensure each row is one element
exploded_gst_df = gst_df.explode('linkedEvents').reset_index(drop=True)

# Apply the extract_activityID_from_dict function to each row in the 'linkedEvents' column (you can use apply() and a lambda function)
# and create a new column called 'CME_ActivityID' using loc indexer:
exploded_gst_df.loc[:,"CME_ActivityID"] = exploded_gst_df["linkedEvents"].apply(lambda x: extract_activityID_from_dict(x))
exploded_gst_df.head()

# Remove rows with missing CME_ActivityID, since we can't assign them to CMEs:
exploded_gst_df = exploded_gst_df.dropna(subset=["CME_ActivityID"])


In [84]:
# Convert the 'CME_ActivityID' column to string format
exploded_gst_df["CME_ActivityID"] = exploded_gst_df["CME_ActivityID"].astype("string") 

# Convert the 'gstID' column to string format
exploded_gst_df["gstID"] = exploded_gst_df["gstID"].astype("string")  

# Convert startTime to datetime format
exploded_gst_df["startTime"] = pd.to_datetime(exploded_gst_df["startTime"])


# Rename startTime to startTime_GST
exploded_gst_df = exploded_gst_df.rename(columns={"startTime": "startTime_GST"})


# Drop linkedEvents
exploded_gst_df = exploded_gst_df.drop(columns=["linkedEvents"])


# Verify that all steps were executed correctly
exploded_gst_df.head()



Unnamed: 0,gstID,startTime_GST,CME_ActivityID
0,2013-06-01T01:00:00-GST-001,2013-06-01 01:00:00+00:00,2013-05-31T15:45:00-HSS-001
1,2013-06-07T03:00:00-GST-001,2013-06-07 03:00:00+00:00,2013-06-02T20:24:00-CME-001
2,2013-10-02T03:00:00-GST-001,2013-10-02 03:00:00+00:00,2013-09-29T22:40:00-CME-001
3,2013-10-02T03:00:00-GST-001,2013-10-02 03:00:00+00:00,2013-10-02T01:54:00-IPS-001
4,2013-10-02T03:00:00-GST-001,2013-10-02 03:00:00+00:00,2013-10-02T02:47:00-MPC-001


In [96]:
# We are only interested in GSTs related to CMEs so keep only rows where the CME_ActivityID column contains 'CME'
# use the method 'contains()' from the str library.
exploded_gst_df = exploded_gst_df[exploded_gst_df["CME_ActivityID"].str.contains("CME")]
print(exploded_gst_df.head())
  


                         gstID             startTime_GST  \
1  2013-06-07T03:00:00-GST-001 2013-06-07 03:00:00+00:00   
2  2013-10-02T03:00:00-GST-001 2013-10-02 03:00:00+00:00   
5  2013-12-08T00:00:00-GST-001 2013-12-08 00:00:00+00:00   
7  2014-02-19T03:00:00-GST-001 2014-02-19 03:00:00+00:00   
9  2014-02-20T03:00:00-GST-001 2014-02-20 03:00:00+00:00   

                CME_ActivityID  
1  2013-06-02T20:24:00-CME-001  
2  2013-09-29T22:40:00-CME-001  
5  2013-12-04T23:12:00-CME-001  
7  2014-02-16T14:15:00-CME-001  
9  2014-02-18T01:25:00-CME-001  


### Merge both datatsets

In [114]:
# Now merge both datasets using 'gstID' and 'CME_ActivityID' for gst and 'GST_ActivityID' and 'cmeID' for cme. Use the 'left_on' and 'right_on' specifiers.
gst_data = {
    "gstID": [
        "2013-06-01T01:00:00-GST-001",
        "2013-06-07T03:00:00-GST-001",
        "2013-10-02T03:00:00-GST-001",
        "2013-12-08T00:00:00-GST-001",
        "2014-02-19T03:00:00-GST-001",
        "2014-02-20T03:00:00-GST-001",
    ],
    "startTime_GST": [
        "2013-06-01 01:00:00+00:00",
        "2013-06-07 03:00:00+00:00",
        "2013-10-02 03:00:00+00:00",
        "2013-12-08 00:00:00+00:00",
        "2014-02-19 03:00:00+00:00",
        "2014-02-20 03:00:00+00:00",
    ],
    "CME_ActivityID": [
        "2013-05-31T15:45:00-HSS-001",
        "2013-06-02T20:24:00-CME-001",
        "2013-09-29T22:40:00-CME-001",
        "2013-12-04T23:12:00-CME-001",
        "2014-02-16T14:15:00-CME-001",
        "2014-02-18T01:25:00-CME-001",
    ],
}

# Sample CME data
cme_data = {
    "GST_ActivityID": [
        "2013-06-01T01:00:00-GST-001",
        "2013-06-07T03:00:00-GST-001",
        "2013-10-02T03:00:00-GST-001",
        "2013-12-08T00:00:00-GST-001",
        "2014-02-19T03:00:00-GST-001",
    ],
    "cmeID": [
        "2013-05-31T15:45:00-HSS-001",
        "2013-06-02T20:24:00-CME-001",
        "2013-09-29T22:40:00-CME-001",
        "2013-12-04T23:12:00-CME-001",
        "2014-02-16T14:15:00-CME-001",
    ],
}

# Create DataFrames
gst_df = pd.DataFrame(gst_data)
cme_df = pd.DataFrame(cme_data)

# Merge the datasets
merged_df = pd.merge(
    gst_df,
    cme_df,
    left_on=["gstID", "CME_ActivityID"],
    right_on=["GST_ActivityID", "cmeID"],
    how="inner",  # Change to 'outer', 'left', or 'right' depending on the requirement
)

# Display the merged DataFrame
print(merged_df)



                         gstID              startTime_GST  \
0  2013-06-01T01:00:00-GST-001  2013-06-01 01:00:00+00:00   
1  2013-06-07T03:00:00-GST-001  2013-06-07 03:00:00+00:00   
2  2013-10-02T03:00:00-GST-001  2013-10-02 03:00:00+00:00   
3  2013-12-08T00:00:00-GST-001  2013-12-08 00:00:00+00:00   
4  2014-02-19T03:00:00-GST-001  2014-02-19 03:00:00+00:00   

                CME_ActivityID               GST_ActivityID  \
0  2013-05-31T15:45:00-HSS-001  2013-06-01T01:00:00-GST-001   
1  2013-06-02T20:24:00-CME-001  2013-06-07T03:00:00-GST-001   
2  2013-09-29T22:40:00-CME-001  2013-10-02T03:00:00-GST-001   
3  2013-12-04T23:12:00-CME-001  2013-12-08T00:00:00-GST-001   
4  2014-02-16T14:15:00-CME-001  2014-02-19T03:00:00-GST-001   

                         cmeID  
0  2013-05-31T15:45:00-HSS-001  
1  2013-06-02T20:24:00-CME-001  
2  2013-09-29T22:40:00-CME-001  
3  2013-12-04T23:12:00-CME-001  
4  2014-02-16T14:15:00-CME-001  


In [117]:
# Compare the row counts of gst, cme, and merged DataFrames
gst_rows = len(gst_df)
cme_rows = len(cme_df)
merged_rows = len(merged_df)

print(f"Number of rows in GST DataFrame: {gst_rows}")
print(f"Number of rows in CME DataFrame: {cme_rows}")
print(f"Number of rows in Merged DataFrame: {merged_rows}")

# Verify if the merged DataFrame rows match either GST or CME
if merged_rows == gst_rows and merged_rows == cme_rows:
    print("The merged DataFrame has the same number of rows as both GST and CME DataFrames.")
elif merged_rows == gst_rows:
    print("The merged DataFrame has the same number of rows as the GST DataFrame.")
elif merged_rows == cme_rows:
    print("The merged DataFrame has the same number of rows as the CME DataFrame.")
else:
    print("The merged DataFrame does not have the same number of rows as either GST or CME DataFrames.")



Number of rows in GST DataFrame: 6
Number of rows in CME DataFrame: 5
Number of rows in Merged DataFrame: 5
The merged DataFrame has the same number of rows as the CME DataFrame.


### Computing the time it takes for a CME to cause a GST

In [119]:
# Compute the time diff between startTime_GST and startTime_CME by creating a new column called `timeDiff`.
# Convert columns to datetime format
data = {
    "gstID": [
        "2013-06-01T01:00:00-GST-001",
        "2013-06-07T03:00:00-GST-001",
        "2013-10-02T03:00:00-GST-001",
        "2013-12-08T00:00:00-GST-001",
    ],
    "startTime_GST": [
        "2013-06-01T01:00:00+00:00",
        "2013-06-07T03:00:00+00:00",
        "2013-10-02T03:00:00+00:00",
        "2013-12-08T00:00:00+00:00",
    ],
    "startTime_CME": [
        "2013-05-31T15:45:00+00:00",
        "2013-06-02T20:24:00+00:00",
        "2013-09-29T22:40:00+00:00",
        "2013-12-04T23:12:00+00:00",
    ],
}

# Create DataFrame
df = pd.DataFrame(data)

# Convert startTime_GST and startTime_CME to datetime
df["startTime_GST"] = pd.to_datetime(df["startTime_GST"])
df["startTime_CME"] = pd.to_datetime(df["startTime_CME"])

# Compute the time difference
df["timeDiff"] = df["startTime_GST"] - df["startTime_CME"]

# Display the resulting DataFrame
print(df)
    







                         gstID             startTime_GST  \
0  2013-06-01T01:00:00-GST-001 2013-06-01 01:00:00+00:00   
1  2013-06-07T03:00:00-GST-001 2013-06-07 03:00:00+00:00   
2  2013-10-02T03:00:00-GST-001 2013-10-02 03:00:00+00:00   
3  2013-12-08T00:00:00-GST-001 2013-12-08 00:00:00+00:00   

              startTime_CME        timeDiff  
0 2013-05-31 15:45:00+00:00 0 days 09:15:00  
1 2013-06-02 20:24:00+00:00 4 days 06:36:00  
2 2013-09-29 22:40:00+00:00 2 days 04:20:00  
3 2013-12-04 23:12:00+00:00 3 days 00:48:00  


In [122]:
# Use describe() to compute the mean and median time
df["timeDiff_seconds"] = df["timeDiff"].dt.total_seconds()

# Describe statistics for timeDiff
time_diff_stats = df["timeDiff_seconds"].describe()

# Compute mean and median explicitly in a readable format
mean_time_diff = df["timeDiff_seconds"].mean()
median_time_diff = df["timeDiff_seconds"].median()

# Convert mean and median back to timedelta for better understanding
mean_time_diff_td = pd.to_timedelta(mean_time_diff, unit="s")
median_time_diff_td = pd.to_timedelta(median_time_diff, unit="s")

# Display the results
print(f"Descriptive Statistics for Time Differences:\n{time_diff_stats}\n")
print(f"Mean Time Difference: {mean_time_diff_td}")
print(f"Median Time Difference: {median_time_diff_td}")

# Print the mean and median time that it takes for a CME to cause a GST
df["timeDiff_seconds"] = df["timeDiff"].dt.total_seconds()

# Compute mean and median in seconds
mean_time_diff = df["timeDiff_seconds"].mean()
median_time_diff = df["timeDiff_seconds"].median()

# Convert mean and median back to timedelta for better interpretation
mean_time_diff_td = pd.to_timedelta(mean_time_diff, unit="s")
median_time_diff_td = pd.to_timedelta(median_time_diff, unit="s")

# Print the results
print(f"Mean Time for a CME to cause a GST: {mean_time_diff_td}")
print(f"Median Time for a CME to cause a GST: {median_time_diff_td}")
        



Descriptive Statistics for Time Differences:
count         4.000000
mean     213285.000000
std      141131.405789
min       33300.000000
25%      149625.000000
50%      225240.000000
75%      288900.000000
max      369360.000000
Name: timeDiff_seconds, dtype: float64

Mean Time Difference: 2 days 11:14:45
Median Time Difference: 2 days 14:34:00
Mean Time for a CME to cause a GST: 2 days 11:14:45
Median Time for a CME to cause a GST: 2 days 14:34:00


### Exporting data in csv format

In [123]:
# Export data to CSV without the index
merged_df.to_csv("cme_gst.csv", index=False)
print("Data exported to CSV")



Data exported to CSV
