In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import cdflib

In [2]:
base_f_loc = '/storage/silver/stfc_cg/hf832176/data/THEMIS/'

## Basic commands:

data = some .cdf file.

data.cdf_info() - lists variables and more

data.varattsget() - lists variables

data.varattsget(variable_name) - lists variable info

In [49]:
spacecraft = 'tha'

In [50]:
def get_file_paths(base_f_loc, instrument, spacecraft):
    f_path = base_f_loc+instrument+'/'+spacecraft
    files = !ls {f_path}?*.cdf
    return files

# <center>SST data: Spectra</center>

In [51]:
instrument = 'sst'
files = get_file_paths(base_f_loc, instrument, spacecraft)

sub_dfs = []
for i in files:
    # Load data
    try:
        data = cdflib.CDF(i)
    except OSError:
        print('File failed to load: '+i)
        pass
    # Read electron spectrogram
    eflux = data.varget(spacecraft+'_psef_en_eflux')
    # Read spectrogram quality flag (>0 is bad)
    quality = data.varget(spacecraft+'_psef_data_quality')
    
    # Read energy levels for data
    ene = data.varget(spacecraft+'_psef_en_eflux_yaxis')
    e_col_names = ['E_'+str(i+1) for i in range(len(ene[0]))]
    # Create datetimes for timestamps
    try:
        dti = [dt.datetime(1970,1,1) + dt.timedelta(seconds=i)
           for i in data.varget(spacecraft+'_psef_time')]
    except TypeError:
        print('No data: '+i)
    
    df = pd.DataFrame(eflux,index=dti,columns=e_col_names)
    df['quality'] = quality
    
    sub_dfs.append(df)

Variable name not found.
File failed to load: /storage/silver/stfc_cg/hf832176/data/THEMIS/sst/tha_l2_sst_20071002_v01.cdf


In [52]:
# instrument = 'sst'
# files = get_file_paths(base_f_loc, instrument, spacecraft)
# data = cdflib.CDF(files[0])
# data.varattsget(spacecraft+'_psef_en_eflux')

In [53]:
comb_df = pd.concat(sub_dfs)
comb_df.info()
comb_df.to_csv(base_f_loc+spacecraft+'/'+spacecraft+'_'+instrument+'.csv')
comb_df.to_pickle(base_f_loc+spacecraft+'/'+spacecraft+'_'+instrument+'.pkl')

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 142364 entries, 2007-09-26 16:19:45.142290 to 2008-05-31 23:59:16.693261
Data columns (total 17 columns):
E_1        142364 non-null float64
E_10       142364 non-null float64
E_11       142364 non-null float64
E_12       0 non-null float64
E_13       0 non-null float64
E_14       0 non-null float64
E_15       0 non-null float64
E_16       0 non-null float64
E_2        142364 non-null float64
E_3        142364 non-null float64
E_4        142364 non-null float64
E_5        142364 non-null float64
E_6        142364 non-null float64
E_7        142364 non-null float64
E_8        142364 non-null float64
E_9        142364 non-null float64
quality    142289 non-null object
dtypes: float64(16), object(1)
memory usage: 19.6+ MB


In [54]:
# instrument = 'sst'
# spacecraft = 'tha'
# files = get_file_paths(base_f_loc, instrument, spacecraft)
# data = cdflib.CDF(files[0])
# ene = data.varget(spacecraft+'_psef_en_eflux_yaxis')
# ene[0]
# df = pd.DataFrame([i for i in range(len(ene[0]))],index=ene[0],columns=['eV'])
# df.to_csv(base_f_loc+spacecraft+'/'+spacecraft+'_'+instrument+'_energy_levels.csv')
# df.to_pickle(base_f_loc+spacecraft+'/'+spacecraft+'_'+instrument+'_energy_levels.pkl')

# <center> STATE: Position, orientation etc. </center>

In [55]:
instrument = 'state'
files = get_file_paths(base_f_loc, instrument, spacecraft)

sub_dfs = []
for i in files:
    # Load data
    try:
        data = cdflib.CDF(i)
    except OSError:
        print('File failed to load: '+i)
        pass
    # Read GSM position
    pos_gsm = data.varget(spacecraft+'_pos_gsm')
    pos_gei = data.varget(spacecraft+'_pos')
    
    spin_ra = data.varget(spacecraft+'_spinras') # right ascension
    spin_dec = data.varget(spacecraft+'_spindec') # declination
    spin_per = data.varget(spacecraft+'_spinper') # spin period
    spin_phase = data.varget(spacecraft+'_spinphase') #spin phase
    
    # Create datetimes for timestamps
    try:
        dti = [dt.datetime(1970,1,1) + dt.timedelta(seconds=i)
           for i in data.varget(spacecraft+'_state_time')]
    except TypeError:
        print('No data: '+i)
    
    df = pd.DataFrame(pos_gsm,index=dti,columns=['pos_x_gsm','pos_y_gsm','pos_z_gsm'])
    df['spin_ra'] = spin_ra
    df['spin_dec'] = spin_dec
    df['spin_per'] = spin_per
    df['spin_phase'] = spin_phase
    df['pos_x_gei'] = pos_gei[:,0]
    df['pos_y_gei'] = pos_gei[:,1]
    df['pos_z_gei'] = pos_gei[:,2]
    
    sub_dfs.append(df)

In [56]:
# instrument = 'state'
# spacecraft = 'tha'
# files = get_file_paths(base_f_loc, instrument, spacecraft)

# data = cdflib.CDF(files[0])

# data.varattsget('tha_pos')

In [57]:
comb_df = pd.concat(sub_dfs)
comb_df.info()
comb_df.to_csv(base_f_loc+spacecraft+'/'+spacecraft+'_'+instrument+'.csv')
comb_df.to_pickle(base_f_loc+spacecraft+'/'+spacecraft+'_'+instrument+'.pkl')

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 349920 entries, 2007-09-15 00:00:00 to 2008-05-31 23:59:00
Data columns (total 10 columns):
pos_x_gsm     349920 non-null float32
pos_y_gsm     349920 non-null float32
pos_z_gsm     349920 non-null float32
spin_ra       349920 non-null float32
spin_dec      349920 non-null float32
spin_per      349920 non-null float32
spin_phase    349920 non-null float32
pos_x_gei     349920 non-null float32
pos_y_gei     349920 non-null float32
pos_z_gei     349920 non-null float32
dtypes: float32(10)
memory usage: 16.0 MB


# <center> FGM: B-field etc. </center>

In [58]:
instrument = 'fgm'
data_rate = 'l' # or 'h'
files = get_file_paths(base_f_loc, instrument, spacecraft)

sub_dfs = []
for i in files:
    # Load data
    try:
        data = cdflib.CDF(i)
    except OSError:
        print('File failed to load: '+i)
        pass
    # Read GSM position
    try:
        b_gsm = data.varget(spacecraft+'_fg'+data_rate+'_gsm')
    except ValueError as err:
        print("Value error: {0}".format(err))
        print(i)
        pass
    except EOFError as err:
        print("EOF error: {0}".format(err))
        print(i)
        pass
    
    try:    
        quality = data.varget(spacecraft+'_fg'+data_rate+'_gsmQ')
    except ValueError as err:
        print("Value error: {0}".format(err))
        print(i)
        pass
    except EOFError as err:
        print("EOF error: {0}".format(err))
        print(i)
        pass
    
    # Create datetimes for timestamps
    try:
        dti = [dt.datetime(1970,1,1) + dt.timedelta(seconds=i)
               for i in data.varget(spacecraft+'_fg'+data_rate+'_time')]
    except TypeError:
        print('No data: '+i)
    except ValueError as err:
        print("Value error: {0}".format(err))
        print(i)
        pass
    except EOFError as err:
        print("EOF error: {0}".format(err))
        print(i)
        pass

    
    df = pd.DataFrame(b_gsm,index=dti,columns=['Bx_gsm','By_gsm','Bz_gsm'])
    df['quality'] = quality
    
    sub_dfs.append(df)

No data: /storage/silver/stfc_cg/hf832176/data/THEMIS/fgm/tha_l2_fgm_20070915_v01.cdf
No data: /storage/silver/stfc_cg/hf832176/data/THEMIS/fgm/tha_l2_fgm_20070916_v01.cdf
No data: /storage/silver/stfc_cg/hf832176/data/THEMIS/fgm/tha_l2_fgm_20071002_v01.cdf


In [59]:
comb_df = pd.concat(sub_dfs)
comb_df.info()
comb_df.to_csv(base_f_loc+spacecraft+'/'+spacecraft+'_'+instrument+'.csv')
comb_df.to_pickle(base_f_loc+spacecraft+'/'+spacecraft+'_'+instrument+'.pkl')

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 52847424 entries, 2008-05-31 00:00:00 to 2008-06-01 00:00:59.728973
Data columns (total 4 columns):
Bx_gsm     float32
By_gsm     float32
Bz_gsm     float32
quality    object
dtypes: float32(3), object(1)
memory usage: 1.4+ GB
