To import the raw data in FITS format, I will use the [Astropy](https://www.astropy.org) package.

In [1]:
# pip install astropy
from astropy.io import fits

In [9]:
import os
import requests

base_url = "https://umbra.nascom.nasa.gov/goes/fits"

- As we will see, importing even one year of data is challenging. 
- For this example I will focus on the year 2018.

In [45]:
import pandas as pd
import numpy as np
year = 2018
df = pd.read_html(f"{base_url}/{year}")[0][["Name"]].dropna()
files = df["Name"][df["Name"].str.endswith("fits")]

In [3]:
f = fits.open('fits/go1020050101.fits')

In [18]:
f["FLUXES"].data["FLUX"]

array([[[1.2839962e-07, 5.1667759e-09],
        [1.2717541e-07, 5.0859854e-09],
        [1.2924733e-07, 5.0656310e-09],
        ...,
        [1.8686031e-07, 3.8584749e-09],
        [1.8686031e-07, 3.4860725e-09],
        [1.8685836e-07, 3.4859755e-09]]], dtype='>f4')

In [57]:
import urllib.request
xl = []
for file in files:
    month = int(file[:-5][-4:-2])
    day = int(file[:-5][-2:])
    urllib.request.urlretrieve(f"{base_url}/{year}/{file}", file)
    with fits.open(file) as io:
        fld = np.asarray(io["FLUXES"].data)
        # The time unit is seconds from midnight UTC, observations
        # are approximately once every two seconds
        tim = fld["Time"][0,:]
        # The flux levels in watts per square meter
        flx = fld["Flux"][0,:,:]
        x = np.hstack((tim[:, None], flx))
        x = pd.DataFrame(x, columns=["Time", "Flux1", "Flux2"])
        x["Year"] = year
        x["Month"] = month
        x["Day"] = day
        x = x[["Year", "Month", "Day", "Time", "Flux1", "Flux2"]]
        xl.append(x)

In [58]:
df = pd.concat(xl, axis=0)
df

Unnamed: 0,Year,Month,Day,Time,Flux1,Flux2
0,2018,1,1,-1.021,3.705000e-08,1.178400e-08
1,2018,1,1,1.025,3.705000e-08,1.120500e-08
2,2018,1,1,3.072,3.705000e-08,1.294000e-08
3,2018,1,1,5.122,3.191700e-08,1.351900e-08
4,2018,1,1,7.169,2.935100e-08,1.351900e-08
...,...,...,...,...,...,...
42162,2018,12,31,86388.529,1.032900e-08,1.274700e-08
42163,2018,12,31,86390.579,7.705800e-09,1.386100e-08
42164,2018,12,31,86392.625,7.705800e-09,1.386100e-08
42165,2018,12,31,86394.675,7.705800e-09,1.051800e-08


In [74]:
df12 = df[df["Month"] == 12]
df12.to_csv("goes_2018_12.csv.gz")