In [2]:
%matplotlib inline
import numpy as np
import glob
import tqdm
import json
import re

In [3]:
def get_iaga_max_stations(tiny=False):
    yearlist = list(np.arange(2010,2019).astype(int))
    if tiny:
        files = [g for y in yearlist for g in sorted(glob.glob(f"../full_data_panos/iaga/{y}/supermag_iaga_tiny*.npz"),key=lambda f: int(re.sub("\D", "", f)),) ]
    else:
        files = [g for y in yearlist for g in sorted(glob.glob(f"../full_data_panos/iaga/{y}/supermag_iaga_[!tiny]*.npz"),key=lambda f: int(re.sub("\D", "", f)),) ]
    assert len(files) > 0
    stations = []

    print("loading supermag iaga data...")
    for i, f in enumerate(tqdm.tqdm(files)):
        x = np.load(f, allow_pickle=True)
        stations.append(x["stations"])

    max_stations = max([len(s) for s in stations])
    return max_stations

In [4]:
get_iaga_max_stations()

loading supermag iaga data...


100%|██████████████████████████████████████████████████████████████████████████████████| 108/108 [00:04<00:00, 24.77it/s]


175

In [5]:
def get_iaga_data(path, tiny=False, load_data=True,max_stations=None):
    import glob

    import tqdm

    if tiny:
        files = sorted(
            [f for f in glob.glob(path + "supermag_iaga_tiny*.npz")],
            key=lambda f: int(re.sub("\D", "", f)),
        )
    else:
        files = sorted(
            [f for f in glob.glob(path + "supermag_iaga_[!tiny]*.npz")],
            key=lambda f: int(re.sub("\D", "", f)),
        )
    assert len(files) > 0

    data = []
    dates = []
    stations = []
    # idx = []

    print("loading supermag iaga data...")
    for i, f in enumerate(tqdm.tqdm(files)):
        x = np.load(f, allow_pickle=True)
        if load_data:
            data.append(x["data"])
        dates.append(x["dates"])
        # print(np.datetime64(datetime.utcfromtimestamp(dates[-1][0])))
        #idx.extend(data[-1].shape[0] * [i])
        features = x["features"]
        stations.append(x["stations"])
    if max_stations is None:
        max_stations = max([len(s) for s in stations])
    else:
        max_stations = max_stations
    for i, d in enumerate(data):
        data[i] = np.concatenate(
            [d, np.zeros([d.shape[0], max_stations - d.shape[1], d.shape[2]]) * np.nan],
            axis=1,
        )
    dates = np.concatenate(dates)
    if load_data:
        data = np.concatenate(data)
    return dates, data, features

In [6]:
def get_iaga_data_as_list(year,tiny=False):
    if isinstance(year,str):
        return get_iaga_data(f"../full_data_panos/iaga/{year}/",tiny=tiny)
    elif isinstance(year,list):
        dates = []
        data = [] 
        features = []
        max_stations = get_iaga_max_stations()
        for y in year:
            dt,dat,feat = get_iaga_data(f"../full_data_panos/iaga/{y}/",tiny=tiny,max_stations=max_stations)
            print(dt.shape,dat.shape,feat.shape)
            dates.append(dt)
            data.append(dat)
            features.append(feat)
        return np.concatenate(dates,axis=0),np.concatenate(data,axis=0),feat
    else:
        raise TypeError("year must be either a list of years, or a single year.")


In [7]:
yearlist = list(np.arange(2010,2019).astype(int))
print(yearlist)

[2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]


In [8]:
dt,data,ft = get_iaga_data_as_list(yearlist)

loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████| 108/108 [00:00<00:00, 2343.24it/s]


loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:30<00:00,  2.56s/it]


(525600,) (525600, 175, 6) (6,)
loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:31<00:00,  2.61s/it]


(525600,) (525600, 175, 6) (6,)
loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:34<00:00,  2.86s/it]


(527040,) (527040, 175, 6) (6,)
loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:31<00:00,  2.65s/it]


(525600,) (525600, 175, 6) (6,)
loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:28<00:00,  2.39s/it]


(525600,) (525600, 175, 6) (6,)
loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:30<00:00,  2.52s/it]


(525600,) (525600, 175, 6) (6,)
loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:30<00:00,  2.55s/it]


(527040,) (527040, 175, 6) (6,)
loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:30<00:00,  2.56s/it]


(525600,) (525600, 175, 6) (6,)
loading supermag iaga data...


100%|████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:29<00:00,  2.49s/it]


(525600,) (525600, 175, 6) (6,)


In [9]:
data.shape

(4733280, 175, 6)

In [22]:
with open("../train.txt") as f:
    test_inds = json.load(f)

In [23]:
np.max(np.asarray(test_inds),axis=0)

array([4732679, 4733279])

In [25]:
4733280-4733279

1