# AMPERE database

Create a database of AMPERE data. See if we can make the data set smaller. 

## Variables 

- year [nrecs]
- doy [nrecs]
- time [nrecs]
- jPar [nrecs, nobs]
- mlt_hr [nrecs, nobs], AACGM Magnetic Local Time (MLT) in hours.
- cLat_deg [nrecs, nobs], co-latitude in AACGM coordinates in degrees.

In [1]:
import pathlib
import zipfile

import os as os
import pandas as pd
import numpy as np
import netCDF4 as nc

In [9]:
tmp_dir = 'C:/data'
amp_dir = 'E:/OneDrive/data/AMPERE'
amp_fn = 'E:/OneDrive/data/AMPERE/ampere.20091002.k060_m08.north.grd.nc'
amp_path = pathlib.Path(amp_dir)
amp_keys = ['cLat_deg','mlt_hr','year','doy','time','jPar']

In [10]:
# open a single ampere file to get 
# the position arrays
# these will be used to compare to 
# those in other files to make sure
# they are the same

# read in the AMPERE data to get
# the AMPERE colat/mlt grid
amp_dat = nc.Dataset(amp_fn)

f_colat = amp_dat['cLat_deg'][:][0,:]
f_mlt = amp_dat['mlt_hr'][:][0,:]

f_year = amp_dat['year'][:][:]
f_doy = amp_dat['doy'][:][:]
f_t = amp_dat['time'][:][:]

f_j = amp_dat['jPar'][:]

amp_dat.close()


In [11]:
a_y = None

for zf in list(amp_path.rglob("*north*.zip")):
    print(f'Processing {zf}')
    with zipfile.ZipFile(zf, "r") as f:
        for ncf in f.namelist():
            
            f.extract(ncf, path=tmp_dir)
            data = nc.Dataset(f'{tmp_dir}/{ncf}')

            # make sure all the keys we need are in the data
            t_keys = [v for v in amp_keys if v in data.variables ]
            if np.array_equal(t_keys,amp_keys):
                print(f'Extracting/processing {ncf} to temp file at {tmp_dir}')    
                t_colat = data['cLat_deg'][:][0,:] 
                t_mlt = data['mlt_hr'][:][0,:] 

                if np.array_equal(f_colat, t_colat) and np.array_equal(f_mlt, t_mlt):
                    if a_y is None:
                        a_y = data['year'][:][:]
                        a_doy = data['doy'][:][:]
                        a_t = data['time'][:][:]
                        a_j = data['jPar'][:][:]
                    else:
                        a_y = np.append(a_y,data['year'][:][:])
                        a_doy = np.append(a_doy,data['doy'][:][:])
                        a_t = np.append(a_t,data['time'][:][:])
                        a_j = np.append(a_j,data['jPar'][:][:],axis=0)
                else:
                    print('False')

            # delete the temp file
            data.close()
            os.remove(f'{tmp_dir}/{ncf}')
        



Processing E:\OneDrive\data\AMPERE\2009\ampere.200910.k060_m08.north.grd.zip
Extracting/processing ampere.20091002.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091004.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091005.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091006.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091007.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091009.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091014.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091019.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091024.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091026.k060_m08.north.grd.nc to temp file at C:/data
Extracting/processing ampere.20091028.k060_m08.north.grd.nc to temp file at C:/data

In [14]:
amp_df = pd.DataFrame(a_j.astype(float), columns=np.arange(0,a_j.shape[1]).astype(str))

In [15]:
amp_df['time'] = [pd.to_datetime(f'{y:04}-{doy:03}',format="%Y-%j")+pd.Timedelta(dh,unit='h') 
                     for y, doy, dh in zip(a_y, a_doy, a_t)]  
amp_df['time'] = amp_df.time.dt.round('min')

In [16]:
amp_coor = pd.DataFrame({'aacgm_colat':f_colat.astype(float), 'mlt':f_mlt.astype(float)})

In [18]:
amp_df.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1191,1192,1193,1194,1195,1196,1197,1198,1199,time
0,0.056975,0.037803,0.038518,0.049165,0.059672,0.060608,0.044058,0.006738,-0.046455,-0.101901,...,0.063134,0.062676,0.029469,-0.010243,-0.029591,-0.020619,0.002271,0.015732,0.006109,2009-10-02 00:00:00
1,0.059664,0.0998,0.142275,0.156618,0.131288,0.076095,0.011862,-0.043416,-0.082854,-0.10951,...,0.067157,0.063972,0.031553,-0.006913,-0.027982,-0.024404,-0.007964,0.001923,-0.00606,2009-10-02 00:02:00


In [19]:
amp_coor.head(2)

Unnamed: 0,aacgm_colat,mlt
0,1.0,0.0
1,2.0,0.0


In [20]:
amp_df.to_hdf(f'{tmp_dir}/ampere_jpar.hdf5', key='AMPERE_J', mode='w', complevel=9) 
amp_coor.to_hdf(f'{tmp_dir}/ampere_coor.hdf5', key='AMPERE_coor', mode='w', complevel=9) 