In [6]:
import csv
import requests
from bs4 import BeautifulSoup
from io import StringIO

# Space physics data facility
spdf = "https://omniweb.gsfc.nasa.gov/cgi/nx1.cgi"

# Define the form data as a dictionary
form_data = {
    "activity": "retrieve", 
    "res": "hourly",
    "start_date": "20190101", 
    "end_date": "20190627", 
    "vars": ["06", "07", "10", "11", "18", "22", "23"],
    "spacecraft": "dscovr_hr_merge"
}

mo = 0
yr = 0
intensity = 0
decline = 0
incline = 0
north = 0
east = 0
vertical = 0
horizontal = 0

# Define month ranges
month_ranges = [
    (1, 31),    # January
    (32, 59),   # February
    (60, 90),   # March
    (91, 120),  # April
    (121, 151), # May
    (152, 181), # June
    (182, 212), # July
    (213, 243), # August
    (244, 273), # September
    (274, 304), # October
    (305, 334), # November
    (335, 365)  # December
]

def get_month_and_day(day_of_year):
    # Iterate through the month ranges to find the correct month and day
    for month, (start_day, end_day) in enumerate(month_ranges, start=1):
        if start_day <= day_of_year <= end_day:
            day_of_month = day_of_year - start_day + 1
            return month, day_of_month

    return -1, -1

# Send an HTTP POST request with the form data
response = requests.post(spdf, data=form_data)

# Check if the request was successful
if response.status_code == 200:
    print("Form submitted successfully.")

    # Parse the HTML response with BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract the data table
    table = soup.find('pre')

    # Initialize a CSV buffer
    csv_buffer = StringIO()

    # Write the CSV header
    csv_header = "year,month,day,hour,bx_gsm,by_gsm,bz_gsm,bt,speed,density,temp,intensity,declination,inclination,north,east,vertical,horizontal\n"
    csv_buffer.write(csv_header)

    # Extract and write the data rows
    # Skip the header and footer lines
    lines = table.get_text().split('\n')[11:-2]
    for line in lines:
        if not line.strip():
            continue
        columns = line.split()
        if len(columns) < 11:
            continue
        
        try:
            year = int(columns[0])
            doy = int(columns[1])
            month, day = get_month_and_day(doy)
            if month == -1 or day == -1:
                continue

            hour = int(columns[2])
            bt = columns[4]
            bx_gsm = columns[5]
            by_gsm = columns[6]
            bz_gsm = columns[7]
            speed = columns[8]
            density = columns[9]
            temp = columns[10]

            if month != mo or year != yr:
                # Define the URL for the geomagnetic field data endpoint
                bgs = f"https://geomag.bgs.ac.uk/web_service/GMModels/igrf/13/?latitude=86.5&longitude=164.04&altitude=0&date={year}-{month:02d}-{day:02d}&format=json"

                # Send an HTTP GET request to the endpoint
                response = requests.get(bgs)

                # Check if the request was successful
                if response.status_code == 200:
                    # Parse the JSON response
                    data = response.json()

                    # Extract relevant information
                    intensity = data["geomagnetic-field-model-result"]["field-value"]["total-intensity"]["value"]
                    decline = data["geomagnetic-field-model-result"]["field-value"]["declination"]["value"]
                    incline = data["geomagnetic-field-model-result"]["field-value"]["inclination"]["value"]
                    north = data["geomagnetic-field-model-result"]["field-value"]["north-intensity"]["value"]
                    east = data["geomagnetic-field-model-result"]["field-value"]["east-intensity"]["value"]
                    vertical = data["geomagnetic-field-model-result"]["field-value"]["vertical-intensity"]["value"]
                    horizontal = data["geomagnetic-field-model-result"]["field-value"]["horizontal-intensity"]["value"]

                    mo, yr = month, year
                else:
                    print(f"Failed to fetch data from the endpoint. Status code: {response.status_code}")

            # Build the CSV row
            csv_row = f"{year},{month},{day},{hour},{bx_gsm},{by_gsm},{bz_gsm},{bt},{speed},{density},{temp},{intensity},{decline},{incline},{north},{east},{vertical},{horizontal}\n"

            # Write the row to the CSV buffer
            csv_buffer.write(csv_row)
        except ValueError:
            print(f"Skipping line due to ValueError: {line}")

    # Reset the buffer position
    csv_buffer.seek(0)

    # Save the CSV data to a file or process it further
    with open(f"./Dataset-{form_data['start_date'][:4]}.csv", 'w') as csv_file:
        csv_file.write(csv_buffer.read())

    print("CSV data saved to ", f"./Dataset-{form_data['start_date'][:4]}.csv")

    # Close the CSV buffer
    csv_buffer.close()
else:
    print(f"Form submission failed with status code {response.status_code}")


Form submitted successfully.
CSV data saved to  ./Dataset-2019.csv


In [11]:
import pandas as pd
import os
import glob

# Define the filename for the merged data
merged_filename = 'merged-data.csv'

# Check if the file already exists
if os.path.exists(merged_filename):
    print(f"Aborting merge: '{merged_filename}' already exists.")
else:
    # Get a list of all CSV files in the current directory
    files = glob.glob('./*.csv')

    dataframes = []

    # Read and store DataFrames from all CSV files except 'merged-data.csv'
    for file in files:
        if file != f'./{merged_filename}':
            df = pd.read_csv(file)
            dataframes.append(df)

    # Concatenate all the DataFrames in the list
    merged_data = pd.concat(dataframes, ignore_index=True)

    # Save the merged DataFrame to a new CSV file
    merged_data.to_csv(merged_filename, index=False)

    print(f"Merged data saved to '{merged_filename}'")


Aborting merge: 'merged-data.csv' already exists.
