### 2019-06-26

In this Notebook I will be creating a general script to take in all the .cdf files for the neutral and ion spectrometer DE2 instruments (Neutral Atmospheric Composition Spectrometer (NACS) and Retarding Potential Analyzer RPA, respectively) and then produce a dataframe that contains measures for the mass density. 

This dataframe can then be exported to .csv and compared to the RMIT/SERC model in MATLAB

In [1]:
# Import packages
import numpy as np
import pandas as pd
import cdflib
from pathlib import Path
from datetime import datetime

In [2]:
# Glob all the .cdf files that have the 2 sec measurements:
NACS_cdf_list = list(Path.cwd().glob('neutral_gas_nacs/**/*.cdf'))
RPA_cdf_list = list(Path.cwd().glob('plasma_rpa/ion2s_cdaweb/**/*.cdf'))

In [3]:
# Check the length of the two lists:
print(f'The NACS file list contains {len(NACS_cdf_list)}, the RPA file list contains {len(RPA_cdf_list)}.')

The NACS file list contains 539, the RPA file list contains 450.


In [4]:
# Need to find the days that overlap and create a list of 'good' files to run
NACS_cdf_good = []
RPA_cdf_good = []
for n_i in range(0,len(NACS_cdf_list)):
    for r_i in range(0,len(RPA_cdf_list)):
        if str(NACS_cdf_list[n_i])[-16:] == str(RPA_cdf_list[r_i])[-16:]:
            NACS_cdf_good.append(NACS_cdf_list[n_i])
            RPA_cdf_good.append(RPA_cdf_list[r_i])
            break
        else:
            pass

In [5]:
# Create functions for making the dataframes
def reshape_fn(arr_list):
    reshaped_list = []
    for arr in arr_list:
        reshaped_list.append(np.reshape(arr,(len(arr),1)))
    return reshaped_list

def df_maker(path):
    # Open File
    cdf = cdflib.CDF(path)
    # Create a pandas dataframe from the data in the .cdf file:
    vrs = cdf.cdf_info()['zVariables']
    data_arrs = [cdf.varget(variable = var) for var in vrs]
    data_reshp = reshape_fn(data_arrs)
    data = np.hstack(data_reshp)
    df = pd.DataFrame(data)
    # Label columns
    df.columns = vrs
    # Convert any unreasonable values to NaN's and ensure all numbers are converted to floats
    df[df < -1.000000e+30] = None
    conv_float = lambda x: float(x)
    df = df.applymap(conv_float)
    return df

#df_nacs = pd.DataFrame()
#df_rpa = pd.DataFrame()

#for path in NACS_cdf_good:
#    df_nacs = df_nacs.append(df_maker(path),ignore_index=True)

#for path in RPA_cdf_good:
#    df_rpa = df_rpa.append(df_maker(path),ignore_index=True)


Writing up the functions above helped quite a bit in processing the data faster, but still need to write up additional functions to do the processing for each set of NACS-RPA files before it is appended to a master dataframe with both instruments' measurements present

### 2019-06-27 / 2019-06-28

I'll try to incorporate the processing of data to the final mass density measurements before creating the data frame and saving to .csv 

In [6]:
# After creating the dataframes for the given day, I need to calculate the densities and store in one dataframe. 
# This is done in two functions below:

def df_make(path, vrs):
    # Open File
    cdf = cdflib.CDF(path)
    # Create a pandas dataframe from the data in the .cdf file:
    data_arrs = [cdf.varget(variable = var) for var in vrs]
    data_reshp = reshape_fn(data_arrs)
    data = np.hstack(data_reshp)
    df = pd.DataFrame(data)
    # Label columns
    df.columns = vrs
    # Convert any unreasonable values to NaN's and ensure all numbers are converted to floats
    df[df < -1.000000e+30] = None
    conv_float = lambda x: float(x)
    df = df.applymap(conv_float)
    # Round Epoch times to the nearest second:
    epoch_round = lambda x: round(x/1000)*1000
    df['Epoch'] = df['Epoch'].apply(epoch_round)
    return df

def rpa_nacs_comb_day(path_rpa, path_nacs, vrs_rpa, vrs_nacs):
    
    # Make dfs:
    df_nacs = df_make(path_rpa, vrs_rpa)
    df_rpa = df_make(path_nacs, vrs_nacs)
    
    # Find indexes of same times:
    rpa_nacs_idxs = []
    for idx1,ep1 in enumerate(df_rpa['Epoch']):
        for idx2,ep2 in enumerate(df_nacs['Epoch']):
            if (ep1 == ep2):# and (ep1 != ep0):
                rpa_nacs_idxs.append((idx1,idx2))
                #ep0 = ep2
                idx0 = idx2
                break
    # Create dataframes to store 16-sec window averages
    RPA_df = pd.DataFrame(columns=df_rpa.columns)
    NACS_df = pd.DataFrame(columns=df_nacs.columns)
    
    # Search through the list of index tuples and find only those that have the right time spans (16 sec increments)
    pi1, pi2 = rpa_nacs_idxs[0]
    idx_range = range(8,len(rpa_nacs_idxs),8)
            
    # Run through the results to see whether they meet the 16-sec window criterion, calculate the mean if so:
    for i,v in enumerate(rpa_nacs_idxs[8::8]):
        i1, i2 = v
        if (i1 - pi1 == 16) and (i2 - pi2 == 16):
            r_means = {}
            n_means = {}
            for var in df_rpa.columns:
                r_means[var] = df_rpa[rpa_nacs_idxs[(i-1)*8][0]:rpa_nacs_idxs[i*8][0]][var].mean()
            for var in df_nacs.columns:
                n_means[var] = df_nacs[rpa_nacs_idxs[(i-1)*8][1]:rpa_nacs_idxs[i*8][1]][var].mean()
            RPA_df = RPA_df.append(r_means, ignore_index=True)
            NACS_df = NACS_df.append(n_means, ignore_index=True)
        pi1, pi2 = i1, i2    
    
        #   if i == 0:
        #   if (rpa_nacs_idxs[8][0] - rpa_nacs_idxs[0][0] == 16) and (rpa_nacs_idxs[8][1] - rpa_nacs_idxs[0][1] == 16):
        #       r_means = {}
        #       n_means = {}
        #       for var in df_rpa.columns:
        #           r_means[var] = df_rpa[rpa_nacs_idxs[0][0]:rpa_nacs_idxs[8][0]][var].mean()
        #       for var in df_nacs.columns:
        #           n_means[var] = df_nacs[rpa_nacs_idxs[0][1]:rpa_nacs_idxs[8][1]][var].mean()
        #       RPA_df = RPA_df.append(r_means, ignore_index=True)
        #       NACS_df = NACS_df.append(n_means, ignore_index=True)
        #else:
    
    
    # One atomic mass unit in kilograms
    amukg = 1.66054e-27

    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_nacs = [16,28,4,14,40]

    # Total density of neutrals in kg.m^-3 for NACS instrument
    tot_den_nacs = np.zeros((len(NACS_df),1))
    for idx in range(0,len(NACS_df)):
        tot_den = 0.0
        for i,var in enumerate(df_nacs.columns[1:6]):
            den = NACS_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += NACS_df.iloc[idx][var]*1e6*amus_nacs[i]*amukg
        tot_den_nacs[idx] = tot_den
    try:
        NACS_df['total_mass_den'] = tot_den_nacs
    except:
        
        print(tot_den_nacs)
    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_rpa = [16,1,4,30] # Molecular ion taken as the average of N2, O2 and NO (assuming equal proportions)

    # Total density of ions in kg.m^-3 for PRA instrument
    tot_den_rpa = np.zeros((len(RPA_df),1))
    for idx in range(0,len(RPA_df)):
        tot_den = 0.0
        for i,var in enumerate(df_rpa.columns[1:5]):
            den = RPA_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += RPA_df.iloc[idx][var]*1e6*amus_rpa[i]*amukg
        tot_den_rpa[idx] = tot_den

    RPA_df['total_mass_den'] = (tot_den_rpa)

    pos_vars = ['Epoch']
    for el in nacs_var_list[6:9]:
        pos_vars.append(el)
    pos_vars.append('total_mass_den')

    df_tot_day = pd.DataFrame()

    for var in pos_vars:
        if var == 'total_mass_den':
            df_tot_day[var] = (RPA_df[var] + NACS_df[var])
            df_tot_day['total_neutral_den'] = NACS_df[var]
            df_tot_day['total_ion_den'] = RPA_df[var]
        else:
            df_tot_day[var] = (RPA_df[var] + NACS_df[var]) / 2

    # Convert glat and glon to radians:
    df_tot_day['glat'] = df_tot_day['glat'] * (np.pi/180)
    df_tot_day['glon'] = df_tot_day['glon'] * (np.pi/180)

    dat_list = []
    for i,epoch in enumerate(df_tot_day['Epoch']):
        if pd.isnull(epoch):
            print(f'This value {epoch} is null. This is record number {i} in the file {path_rpa} or {path_nacs}')
        dat_list.append(pd.to_datetime(cdflib.cdfepoch.encode(epoch), infer_datetime_format=True))

    d_t = pd.DatetimeIndex(dat_list)
    df_tot_day['datetime'] = d_t
    df_tot_day['year'] = d_t.year
    df_tot_day['doy'] = d_t.dayofyear
    df_tot_day['d_hr'] = d_t.hour + d_t.minute/60 + d_t.second/3600
    return df_tot_day

In [7]:
# Create list of the variables of interest and create an array that contains the relevant data:
rpa_var_list = [
    'Epoch',
    'O',
    'H',
    'He',
    'molecularIons',
    'glat',
    'glon',
    'alt'
]

nacs_var_list = [
 'Epoch',
 'O_density',
 'N2_density',
 'He_density',
 'N_density',
 'Ar_density',
 'alt',
 'glat',
 'glon',
]

In [8]:
# Run through all files and save the daily total density values for 

all_var_list = [
    'Epoch',
    'alt',
    'glat',
    'glon',
    'total_mass_den',
    'total_neutral_den',
    'total_ion_den',
    'datetime',
    'year',
    'doy',
    'd_hr'
]

df_all = pd.DataFrame(columns = all_var_list)

#for rpa_path, nacs_path in zip(RPA_cdf_good, NACS_cdf_good):
#    df_tot_day = rpa_nacs_comb_day(rpa_path, nacs_path, rpa_var_list, nacs_var_list)
#    df_all.append(df_tot_day, ignore_index=True)

In [9]:
#out_path = Path(Path.cwd(),'DE2_Tot_Den_All_Data.csv')
#df_nacs.to_csv(out_path,index=False)

In [10]:
np.nan

nan

In [11]:
pd.isnull(np.nan)

True

In [12]:
rpa = df_make(RPA_cdf_good[15],rpa_var_list)
nacs = df_make(NACS_cdf_good[15],nacs_var_list)

In [13]:
rpa.head(15)

Unnamed: 0,Epoch,O,H,He,molecularIons,glat,glon,alt
0,62535813280000,140556.0,,,,-33.360001,126.940002,815.590027
1,62535813282000,140670.0,,,,-33.240002,126.93,814.890015
2,62535813284000,30679.0,40362.0,17398.0,,-33.130001,126.93,814.200012
3,62535813290000,145175.0,,,,-32.77,126.900002,812.130005
4,62535813291000,146130.0,,,,-32.709999,126.900002,811.780029
5,62535813294000,146372.0,,,,-32.540001,126.879997,810.73999
6,62535813295000,147044.0,,,,-32.48,126.879997,810.400024
7,62535813298000,146931.0,,,,-32.299999,126.870003,809.349976
8,62535813299000,148248.0,,,,-32.240002,126.860001,809.01001
9,62535813302000,148709.0,,,,-32.07,126.849998,807.960022


In [14]:
nacs.head(15)

Unnamed: 0,Epoch,O_density,N2_density,He_density,N_density,Ar_density,alt,glat,glon
0,62535813172000,3364367.0,0.0,0.0,0.0,0.0,851.460022,-39.630001,127.379997
1,62535813173000,3488655.0,0.0,0.0,0.0,0.0,851.140015,-39.580002,127.379997
2,62535813174000,3347600.0,0.0,0.0,0.0,0.0,850.820007,-39.52,127.370003
3,62535813175000,3386703.0,0.0,0.0,0.0,0.0,850.48999,-39.459999,127.370003
4,62535813180000,3394127.0,0.0,0.0,0.0,0.0,848.869995,-39.16,127.349998
5,62535813181000,3449103.0,0.0,0.0,0.0,0.0,848.549988,-39.110001,127.349998
6,62535813182000,3438863.0,0.0,0.0,0.0,0.0,848.219971,-39.049999,127.339996
7,62535813183000,3489551.0,0.0,0.0,0.0,0.0,847.900024,-38.990002,127.339996
8,62535813188000,3526799.0,0.0,0.0,0.0,0.0,846.27002,-38.689999,127.32
9,62535813189000,3459343.0,0.0,0.0,0.0,0.0,845.940002,-38.639999,127.309998


In [15]:
A = rpa_nacs_comb_day(RPA_cdf_good[400], NACS_cdf_good[400], rpa_var_list, nacs_var_list)

[]


ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [None]:
rpa = df_make(RPA_cdf_good[400],rpa_var_list)
nacs = df_make(NACS_cdf_good[400],nacs_var_list)

In [None]:
rpa.head(5)

In [None]:
nacs.head(5)

In [None]:
rpa_nacs_idxs = []
ep0 = 0
for idx1,ep1 in enumerate(rpa['Epoch']):
    for idx2,ep2 in enumerate(nacs['Epoch']):
        if (ep1 == ep2) and (ep1 != ep0):
            rpa_nacs_idxs.append((idx1,idx2))
            ep0 = ep2
            idx0 = idx2
            break

In [None]:
rpa['Epoch'][rpa_nacs_idxs[3][0]] - rpa['Epoch'][rpa_nacs_idxs[2][0]]

Based on the various things I have tried, it seems like the major problem here is that the alignment of times process is problematic. 

Rounding the times to the nearest second can create rows with the same time, if they are close enough to each other.

Also, in some files the difference between records is 2 sec, in some it is 1 sec. Therefore the number of indexes forward to use when taking the 16-sec averages will differ from file to file.


### 2019-07-01

Need to write up a search algorithm to find the number of indexes between one record and one 16-sec in the future.

This could be done at the start and then tested for the same number of places throughout the rest of the file. Would also need to find the next available record after a given 16-sec window is checked.


In [None]:
def df_make(path, vrs):
    # Open File
    cdf = cdflib.CDF(path)
    # Create a pandas dataframe from the data in the .cdf file:
    data_arrs = [cdf.varget(variable = var) for var in vrs]
    data_reshp = reshape_fn(data_arrs)
    data = np.hstack(data_reshp)
    df = pd.DataFrame(data)
    # Label columns
    df.columns = vrs
    # Convert any unreasonable values to NaN's and ensure all numbers are converted to floats
    df[df < -1.000000e+30] = None
    conv_float = lambda x: float(x)
    df = df.applymap(conv_float)
    # Round Epoch times to the nearest second:
    epoch_round = lambda x: round(x/1000)*1000
    df['Epoch'] = df['Epoch'].apply(epoch_round)
    return df

def rpa_nacs_comb_day(path_rpa, path_nacs, vrs_rpa, vrs_nacs):
    
    # Make dfs:
    df_nacs = df_make(path_rpa, vrs_rpa)
    df_rpa = df_make(path_nacs, vrs_nacs)
    
    # Find indexes of same times:
    rpa_nacs_idxs = []
    for idx1,ep1 in enumerate(df_rpa['Epoch']):
        for idx2,ep2 in enumerate(df_nacs['Epoch']):
            if (ep1 == ep2) and (ep1 != ep0):
                rpa_nacs_idxs.append((idx1,idx2))
                ep0 = ep2
                idx0 = idx2
                break
    # Create dataframes to store 16-sec window averages
    RPA_df = pd.DataFrame(columns=df_rpa.columns)
    NACS_df = pd.DataFrame(columns=df_nacs.columns)
    
    # Search through the list of index tuples and find only those that have the right time spans (16 sec increments)
    for i in range(0,20):
        if RPA_df['Epoch'][i] - RPA_df['Epoch'][0] == 16000.0:
            
        
    
    pi1, pi2 = rpa_nacs_idxs[0]
    idx_range = range(8,len(rpa_nacs_idxs),8)
            
    # Run through the results to see whether they meet the 16-sec window criterion, calculate the mean if so:
    for i,v in enumerate(rpa_nacs_idxs[8::8]):
        i1, i2 = v
        if (i1 - pi1 == 16) and (i2 - pi2 == 16):
            r_means = {}
            n_means = {}
            for var in df_rpa.columns:
                r_means[var] = df_rpa[rpa_nacs_idxs[(i-1)*8][0]:rpa_nacs_idxs[i*8][0]][var].mean()
            for var in df_nacs.columns:
                n_means[var] = df_nacs[rpa_nacs_idxs[(i-1)*8][1]:rpa_nacs_idxs[i*8][1]][var].mean()
            RPA_df = RPA_df.append(r_means, ignore_index=True)
            NACS_df = NACS_df.append(n_means, ignore_index=True)
        pi1, pi2 = i1, i2    
    
        #   if i == 0:
        #   if (rpa_nacs_idxs[8][0] - rpa_nacs_idxs[0][0] == 16) and (rpa_nacs_idxs[8][1] - rpa_nacs_idxs[0][1] == 16):
        #       r_means = {}
        #       n_means = {}
        #       for var in df_rpa.columns:
        #           r_means[var] = df_rpa[rpa_nacs_idxs[0][0]:rpa_nacs_idxs[8][0]][var].mean()
        #       for var in df_nacs.columns:
        #           n_means[var] = df_nacs[rpa_nacs_idxs[0][1]:rpa_nacs_idxs[8][1]][var].mean()
        #       RPA_df = RPA_df.append(r_means, ignore_index=True)
        #       NACS_df = NACS_df.append(n_means, ignore_index=True)
        #else:
    
    
    # One atomic mass unit in kilograms
    amukg = 1.66054e-27

    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_nacs = [16,28,4,14,40]

    # Total density of neutrals in kg.m^-3 for NACS instrument
    tot_den_nacs = np.zeros((len(NACS_df),1))
    for idx in range(0,len(NACS_df)):
        tot_den = 0.0
        for i,var in enumerate(df_nacs.columns[1:6]):
            den = NACS_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += NACS_df.iloc[idx][var]*1e6*amus_nacs[i]*amukg
        tot_den_nacs[idx] = tot_den
    try:
        NACS_df['total_mass_den'] = tot_den_nacs
    except:
        
        print(tot_den_nacs)
    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_rpa = [16,1,4,30] # Molecular ion taken as the average of N2, O2 and NO (assuming equal proportions)

    # Total density of ions in kg.m^-3 for PRA instrument
    tot_den_rpa = np.zeros((len(RPA_df),1))
    for idx in range(0,len(RPA_df)):
        tot_den = 0.0
        for i,var in enumerate(df_rpa.columns[1:5]):
            den = RPA_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += RPA_df.iloc[idx][var]*1e6*amus_rpa[i]*amukg
        tot_den_rpa[idx] = tot_den

    RPA_df['total_mass_den'] = (tot_den_rpa)

    pos_vars = ['Epoch']
    for el in nacs_var_list[6:9]:
        pos_vars.append(el)
    pos_vars.append('total_mass_den')

    df_tot_day = pd.DataFrame()

    for var in pos_vars:
        if var == 'total_mass_den':
            df_tot_day[var] = (RPA_df[var] + NACS_df[var])
            df_tot_day['total_neutral_den'] = NACS_df[var]
            df_tot_day['total_ion_den'] = RPA_df[var]
        else:
            df_tot_day[var] = (RPA_df[var] + NACS_df[var]) / 2

    # Convert glat and glon to radians:
    df_tot_day['glat'] = df_tot_day['glat'] * (np.pi/180)
    df_tot_day['glon'] = df_tot_day['glon'] * (np.pi/180)

    dat_list = []
    for i,epoch in enumerate(df_tot_day['Epoch']):
        if pd.isnull(epoch):
            print(f'This value {epoch} is null. This is record number {i} in the file {path_rpa} or {path_nacs}')
        dat_list.append(pd.to_datetime(cdflib.cdfepoch.encode(epoch), infer_datetime_format=True))

    d_t = pd.DatetimeIndex(dat_list)
    df_tot_day['datetime'] = d_t
    df_tot_day['year'] = d_t.year
    df_tot_day['doy'] = d_t.dayofyear
    df_tot_day['d_hr'] = d_t.hour + d_t.minute/60 + d_t.second/3600
    return df_tot_day

In [None]:
# Mostly covered in DE2_Data_Analysis but the main investigation was:

print("For the neutrals measuring instrument, NACS, the difference between the recorded measurements are as follows:")
for i in range(1,8):
    print(f"Difference between record {i} and {i-1} is {df_nacs['Epoch'][i]-df_nacs['Epoch'][i-1]}")
print(f"Overall difference between record 0 and 7 is: {df_nacs['Epoch'][8]-df_nacs['Epoch'][0]}")
      
for i in range(0,12):
    print(f"Difference between record {i} and 0 is {df_nacs['Epoch'][i] - df_nacs['Epoch'][0]} therefore and")

### 2019-07-02

What I can do instead to simplify processing it is to just use all of the measurements that align to within 1 sec. That way I don't need to worry about using a 16-sec window finder. If necessary, I can average out the total mass density at the end. The table at the end will be larger than it would have been, but still smaller than keeping all the data as before.

In [None]:
def df_make2(path, vrs):
    # Open File
    cdf = cdflib.CDF(path)
    # Create a pandas dataframe from the data in the .cdf file:
    data_arrs = [cdf.varget(variable = var) for var in vrs]
    data_reshp = reshape_fn(data_arrs)
    data = np.hstack(data_reshp)
    df = pd.DataFrame(data)
    # Label columns
    df.columns = vrs
    # Convert any unreasonable values to NaN's and ensure all numbers are converted to floats
    df[df < -1.000000e+30] = None
    conv_float = lambda x: float(x)
    df = df.applymap(conv_float)
    # Round Epoch times to the nearest second:
    epoch_round = lambda x: round(x/1000)*1000
    df['Epoch'] = df['Epoch'].apply(epoch_round)
    return df

def rpa_nacs_comb_day2(path_rpa, path_nacs, vrs_rpa, vrs_nacs):
    
    # Make dfs:
    df_nacs = df_make2(path_rpa, vrs_rpa)
    df_rpa = df_make2(path_nacs, vrs_nacs)
    
    # Create dataframes to store only aligned records
    # Find all aligned records and add them to new data frame:
    
    RPA_df = pd.DataFrame(columns=df_rpa.columns)
    NACS_df = pd.DataFrame(columns=df_nacs.columns)
    
    pn_ep = -5
    for r_idx,r_ep in enumerate(df_rpa['Epoch']):
        #print(r_ep)
        for n_idx,n_ep in enumerate(df_nacs['Epoch']):
            #print(n_ep)
            if (r_ep == n_ep) and (r_ep != pn_ep):
                rpa_nans = 0
                nacs_nans = 0
                for var_r in rpa_var_list[1:5]:
                    if np.isnan(df_rpa.iloc[r_idx][var_r]):
                        rpa_nans += 1
                for var_n in nacs_var_list[1:6]:
                    if np.isnan(df_nacs.iloc[n_idx][var_n]):
                        nacs_nans += 1                    
                if (rpa_nans == 4) or (nacs_nans == 5):
                    break
                else:
                    RPA_df = RPA_df.append(df_rpa.iloc[r_idx])
                    NACS_df = NACS_df.append(df_nacs.iloc[n_idx])
                    pn_ep = n_ep
                    break
    
    # One atomic mass unit in kilograms
    amukg = 1.66054e-27

    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_nacs = [16,28,4,14,40]

    # Total density of neutrals in kg.m^-3 for NACS instrument
    tot_den_nacs = np.zeros((len(NACS_df),1))
    for idx in range(0,len(NACS_df)):
        tot_den = 0.0
        for i,var in enumerate(df_nacs.columns[1:6]):
            den = NACS_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += NACS_df.iloc[idx][var]*1e6*amus_nacs[i]*amukg
        tot_den_nacs[idx] = tot_den
    try:
        NACS_df['total_mass_den'] = tot_den_nacs
    except:
        print(tot_den_nacs)
    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_rpa = [16,1,4,30] # Molecular ion taken as the average of N2, O2 and NO (assuming equal proportions)

    # Total density of ions in kg.m^-3 for PRA instrument
    tot_den_rpa = np.zeros((len(RPA_df),1))
    for idx in range(0,len(RPA_df)):
        tot_den = 0.0
        for i,var in enumerate(df_rpa.columns[1:5]):
            den = RPA_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += RPA_df.iloc[idx][var]*1e6*amus_rpa[i]*amukg
        tot_den_rpa[idx] = tot_den
    try:
        RPA_df['total_mass_den'] = tot_den_rpa
    except:
        print(tot_den_rpa)
        
    pos_vars = ['Epoch']
    for el in nacs_var_list[6:9]:
        pos_vars.append(el)
    pos_vars.append('total_mass_den')

    df_tot_day = pd.DataFrame()

    for var in pos_vars:
        if var == 'total_mass_den':
            df_tot_day[var] = (RPA_df[var] + NACS_df[var])
            df_tot_day['total_neutral_den'] = NACS_df[var]
            df_tot_day['total_ion_den'] = RPA_df[var]
        else:
            df_tot_day[var] = (RPA_df[var] + NACS_df[var]) / 2

    # Convert glat and glon to radians:
    df_tot_day['glat'] = df_tot_day['glat'] * (np.pi/180)
    df_tot_day['glon'] = df_tot_day['glon'] * (np.pi/180)

    dat_list = []
    for i,epoch in enumerate(df_tot_day['Epoch']):
        if pd.isnull(epoch):
            print(f'This value {epoch} is null. This is record number {i} in the file {path_rpa} or {path_nacs}')
        else:
            dat_list.append(pd.to_datetime(cdflib.cdfepoch.encode(epoch), infer_datetime_format=True))

    d_t = pd.DatetimeIndex(dat_list)
    df_tot_day['datetime'] = d_t
    df_tot_day['year'] = d_t.year
    df_tot_day['doy'] = d_t.dayofyear
    df_tot_day['d_hr'] = d_t.hour + d_t.minute/60 + d_t.second/3600
    return df_tot_day

In [None]:
df_all = pd.DataFrame()

for rpa_path, nacs_path in zip(RPA_cdf_good, NACS_cdf_good):
    df_tot_day = rpa_nacs_comb_day2(rpa_path, nacs_path, rpa_var_list, nacs_var_list)
    df_all.append(df_tot_day, ignore_index=True)

In [None]:
DF_ERR_RPA = df_make2(Path('C://Users/Ronald Maj/Sat_data/DE2/plasma_rpa/ion2s_cdaweb/1981/de2_ion2s_rpa_19810808_v01.cdf'),rpa_var_list)
DF_ERR_NACS = df_make2(Path('C://Users/Ronald Maj/Sat_data/DE2/neutral_gas_nacs/neutral1s_nacs_cdaweb/1981/de2_neutral1s_nacs_19810808_v01.cdf'),nacs_var_list)

In [None]:
DF_ERR_RPA.head(10)

In [None]:
DF_ERR_NACS.iloc[9]['alt']

In [None]:
df_nacs = DF_ERR_NACS
df_rpa = DF_ERR_RPA

In [None]:
RPA_df = pd.DataFrame(columns=df_rpa.columns)
NACS_df = pd.DataFrame(columns=df_nacs.columns)

pn_ep = -5
for r_idx,r_ep in enumerate(df_rpa['Epoch']):
    #print(r_ep)
    for n_idx,n_ep in enumerate(df_nacs['Epoch']):
        #print(n_ep)
        if (r_ep == n_ep) and (r_ep != pn_ep):
            #print(f'Here now! {r_ep} = {n_ep} I believe')
            #print(f'This is the first value: {df_rpa.iloc[r_idx]}')
            #print(f'This is the 2nd value: {df_nacs.iloc[n_idx]}')
            RPA_df = RPA_df.append(df_rpa.iloc[r_idx])
            NACS_df = NACS_df.append(df_nacs.iloc[n_idx])
            pn_ep = n_ep
            break

In [None]:
RPA_df.head(5)

In [None]:
NACS_df.head(5)

In [None]:
RPA_df.head(5)['Epoch'][610]

In [None]:
NACS_df.head(5)['Epoch'][2398]

In [None]:
RPA_df.head(5)['Epoch'][610] == NACS_df.head(5)['Epoch'][2398]

In [None]:
# One atomic mass unit in kilograms
amukg = 1.66054e-27

# Create a list of AMU for each atomic or molecular species in the var list we have:
amus_nacs = [16,28,4,14,40]

# Total density of neutrals in kg.m^-3 for NACS instrument
tot_den_nacs = np.zeros((len(NACS_df),1))
for idx in range(0,len(NACS_df)):
    tot_den = 0.0
    for i,var in enumerate(df_nacs.columns[1:6]):
        den = NACS_df.iloc[idx][var]
        if np.isnan(den):
            pass
        else:
            tot_den += NACS_df.iloc[idx][var]*1e6*amus_nacs[i]*amukg
    tot_den_nacs[idx] = tot_den
try:
    NACS_df['total_mass_den'] = tot_den_nacs
except:
    print(tot_den_nacs)
# Create a list of AMU for each atomic or molecular species in the var list we have:
amus_rpa = [16,1,4,30] # Molecular ion taken as the average of N2, O2 and NO (assuming equal proportions)

# Total density of ions in kg.m^-3 for PRA instrument
tot_den_rpa = np.zeros((len(RPA_df),1))
for idx in range(0,len(RPA_df)):
    tot_den = 0.0
    for i,var in enumerate(df_rpa.columns[1:5]):
        den = RPA_df.iloc[idx][var]
        if np.isnan(den):
            pass
        else:
            tot_den += RPA_df.iloc[idx][var]*1e6*amus_rpa[i]*amukg
    tot_den_rpa[idx] = tot_den
try:
    RPA_df['total_mass_den'] = tot_den_rpa
except:
    print(tot_den_rpa)

In [None]:
tot_den_rpa[0:10]

In [None]:
tot_den_nacs[0:10]

In [None]:
NACS_df.head(6)

In [None]:
len(tot_den_nacs)

In [None]:
pos_vars = ['Epoch']
for el in nacs_var_list[6:9]:
    pos_vars.append(el)
pos_vars.append('total_mass_den')

In [None]:
pos_vars

In [None]:
df_tot_day = pd.DataFrame()

for var in pos_vars:
    print(var)
    if var == 'total_mass_den':
        #print(f'RPA length of values:{len(RPA_df[var].values)}')
        #print(f'NACS values:{len(NACS_df[var].values)}')
        df_tot_day[var] = (RPA_df[var].values + NACS_df[var].values)
        #print(df_tot_day[var])
        df_tot_day['total_neutral_den'] = NACS_df[var]
        df_tot_day['total_ion_den'] = RPA_df[var]
    else:
        df_tot_day[var] = (RPA_df[var] + NACS_df[var]) / 2
        print(len(df_tot_day))

In [None]:
df_tot_day.head(5)

In [None]:
rpa_var_list[1:5]

In [None]:
nacs_var_list[1:6]

### 2019-07-03

So the problem from yesterday was that (I think) there were cases where there were no measurements from any one of the species for a given instrument. In those cases the total mass density was not being recorded and the number of total mass densities was not equal to the number of epoch records.

So now need to figure out how to fix that problem

In [None]:
len(RPA_df)

In [None]:
len(tot_den_rpa)

So the length of the original dataframe is in fact 260, but when the new one is made, it is 520 - double.

There must be a problem with double assignment

In [None]:
df_tot_day = pd.DataFrame()

for var in pos_vars:
    print(var)
    if var == 'total_mass_den':
        print(f'RPA length of values:{len(RPA_df[var].values)}')
        print(f'NACS values:{len(NACS_df[var].values)}')
        df_tot_day[var] = (RPA_df[var].values + NACS_df[var].values)
        print(df_tot_day[var])
        df_tot_day['total_neutral_den'] = NACS_df[var]
        df_tot_day['total_ion_den'] = RPA_df[var]
    else:
        df_tot_day[var] = (RPA_df[var] + NACS_df[var]) / 2
        print(len(df_tot_day))

In [None]:
len(RPA_df['alt'].values)

In [None]:
pos_vars

In [None]:
for var in pos_vars:
    print(len(RPA_df[var].values))

In [None]:
df_tot_day = pd.DataFrame()
print(len(df_tot_day))

for var in pos_vars:
    print(var)
    print(len(RPA_df[var].values))
    if var == 'total_mass_den':
        print(f'RPA length of values:{len(RPA_df[var].values)}')
        print(f'NACS values:{len(NACS_df[var].values)}')
        df_tot_day[var] = (RPA_df[var].values + NACS_df[var].values)
        print(df_tot_day[var])
        df_tot_day['total_neutral_den'] = NACS_df[var]
        df_tot_day['total_ion_den'] = RPA_df[var]
    else:
        df_tot_day[var] = (RPA_df[var].values + NACS_df[var].values) / 2
        print(len(df_tot_day))

Got it - it was the fact that the values were not being referenced. The dataframe assignment was creating an index value and because they did not match up, they were not being added together properly. Instead a new dataframe was created with the neutral and ion records separate.

In [None]:
for r_idx,r_ep in enumerate(df_rpa['Epoch']):
    for var in rpa_var_list[1:5]:
        if var == 'O':
            print(np.isnan(df_rpa.iloc[r_idx][var]))

#df_rpa.iloc[1000]['O']

In [None]:
def df_make2(path, vrs):
    # Open File
    cdf = cdflib.CDF(path)
    # Create a pandas dataframe from the data in the .cdf file:
    data_arrs = [cdf.varget(variable = var) for var in vrs]
    data_reshp = reshape_fn(data_arrs)
    data = np.hstack(data_reshp)
    df = pd.DataFrame(data)
    # Label columns
    df.columns = vrs
    # Convert any unreasonable values to NaN's and ensure all numbers are converted to floats
    df[df < -1.000000e+30] = None
    conv_float = lambda x: float(x)
    df = df.applymap(conv_float)
    # Round Epoch times to the nearest second:
    epoch_round = lambda x: round(x/1000)*1000
    df['Epoch'] = df['Epoch'].apply(epoch_round)
    return df

def rpa_nacs_comb_day2(path_rpa, path_nacs, vrs_rpa, vrs_nacs):
    
    # Make dfs:
    df_rpa = df_make2(path_rpa, vrs_rpa)
    df_nacs = df_make2(path_nacs, vrs_nacs)
    
    # Create dataframes to store only aligned records
    # Find all aligned records and add them to new data frame:
    
    RPA_df = pd.DataFrame(columns=df_rpa.columns)
    NACS_df = pd.DataFrame(columns=df_nacs.columns)
    
    pn_ep = -5
    
    for r_idx,r_ep in enumerate(df_rpa['Epoch']):
        if (not df_nacs[df_nacs['Epoch'] == r_ep].index.empty) and (r_ep != pn_ep):
            n_idx = df_nacs[df_nacs['Epoch'] == r_ep].index[0]
            rpa_nans = 0
            nacs_nans = 0
            for var_r in rpa_var_list[1:5]:
                if np.isnan(df_rpa.iloc[r_idx][var_r]):
                    rpa_nans += 1
            for var_n in nacs_var_list[1:6]:
                if np.isnan(df_nacs.iloc[n_idx][var_n]):
                    nacs_nans += 1                    
            if (rpa_nans == 4) or (nacs_nans == 5):
                break
            else:
                RPA_df = RPA_df.append(df_rpa.iloc[r_idx])
                NACS_df = NACS_df.append(df_nacs.iloc[n_idx])
                pn_ep = n_ep
                break
    
    # One atomic mass unit in kilograms
    amukg = 1.66054e-27

    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_nacs = [16,28,4,14,40]

    # Total density of neutrals in kg.m^-3 for NACS instrument
    tot_den_nacs = np.zeros((len(NACS_df),1))
    for idx in range(0,len(NACS_df)):
        tot_den = 0.0
        for i,var in enumerate(df_nacs.columns[1:6]):
            den = NACS_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += NACS_df.iloc[idx][var]*1e6*amus_nacs[i]*amukg
        tot_den_nacs[idx] = tot_den
    try:
        NACS_df['total_mass_den'] = tot_den_nacs
    except:
        print(NACS_df)
        print(f'in the file {path_rpa} or {path_nacs}')
        
    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_rpa = [16,1,4,30] # Molecular ion taken as the average of N2, O2 and NO (assuming equal proportions)

    # Total density of ions in kg.m^-3 for PRA instrument
    tot_den_rpa = np.zeros((len(RPA_df),1))
    for idx in range(0,len(RPA_df)):
        tot_den = 0.0
        for i,var in enumerate(df_rpa.columns[1:5]):
            den = RPA_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += RPA_df.iloc[idx][var]*1e6*amus_rpa[i]*amukg
        tot_den_rpa[idx] = tot_den
    try:
        RPA_df['total_mass_den'] = tot_den_rpa
    except:
        print(tot_den_rpa)
        
    pos_vars = ['Epoch']
    for el in nacs_var_list[6:9]:
        pos_vars.append(el)
    pos_vars.append('total_mass_den')

    df_tot_day = pd.DataFrame()

    for var in pos_vars:
        if var == 'total_mass_den':
            df_tot_day[var] = (RPA_df[var].values + NACS_df[var].values)
            df_tot_day['total_neutral_den'] = NACS_df[var].values
            df_tot_day['total_ion_den'] = RPA_df[var].values
        else:
            df_tot_day[var] = (RPA_df[var].values + NACS_df[var].values) / 2

    # Convert glat and glon to radians:
    df_tot_day['glat'] = df_tot_day['glat'] * (np.pi/180)
    df_tot_day['glon'] = df_tot_day['glon'] * (np.pi/180)

    dat_list = []
    for i,epoch in enumerate(df_tot_day['Epoch']):
        if pd.isnull(epoch):
            print(f'This value {epoch} is null. This is record number {i} in the file {path_rpa} or {path_nacs}')
        else:
            dat_list.append(pd.to_datetime(cdflib.cdfepoch.encode(epoch), infer_datetime_format=True))

    d_t = pd.DatetimeIndex(dat_list)
    df_tot_day['datetime'] = d_t
    df_tot_day['year'] = d_t.year
    df_tot_day['doy'] = d_t.dayofyear
    df_tot_day['d_hr'] = d_t.hour + d_t.minute/60 + d_t.second/3600
    return df_tot_day

In [None]:
df_all = pd.DataFrame()

for rpa_path, nacs_path in zip(RPA_cdf_good, NACS_cdf_good):
    if RPA_cdf_good.index(rpa_path) < 100:
        continue
    else:
        if RPA_cdf_good.index(rpa_path)%10 == 0:
            print(f'Processing file {RPA_cdf_good.index(rpa_path)} of {len(RPA_cdf_good)}')
        df_tot_day = rpa_nacs_comb_day2(rpa_path, nacs_path, rpa_var_list, nacs_var_list)
        df_all = df_all.append(df_tot_day, ignore_index=True)

In [None]:
err_cdf_rpa = Path(Path.cwd(),'plasma_rpa/ion2s_cdaweb/1982/de2_ion2s_rpa_19820305_v01.cdf')
err_cdf_nacs = Path(Path.cwd(),'neutral_gas_nacs/neutral1s_nacs_cdaweb/1982/de2_neutral1s_nacs_19820305_v01.cdf')

df_rpa = df_make2(err_cdf_rpa, rpa_var_list)
df_nacs = df_make2(err_cdf_nacs, nacs_var_list)

In [None]:
len(df_nacs)

In [None]:
len(df_rpa)

In [None]:
    RPA_df = pd.DataFrame(columns=df_rpa.columns)
    NACS_df = pd.DataFrame(columns=df_nacs.columns)
    
    pn_ep = -5
    
    for r_idx,r_ep in enumerate(df_rpa['Epoch']):
        if (not df_nacs[df_nacs['Epoch'] == r_ep].index.empty) and (r_ep != pn_ep):
            n_idx = df_nacs[df_nacs['Epoch'] == r_ep].index[0]
            rpa_nans = 0
            nacs_nans = 0
            for var_r in rpa_var_list[1:5]:
                if np.isnan(df_rpa.iloc[r_idx][var_r]):
                    rpa_nans += 1
            for var_n in nacs_var_list[1:6]:
                if np.isnan(df_nacs.iloc[n_idx][var_n]):
                    nacs_nans += 1                    
            if (rpa_nans == 4) or (nacs_nans == 5):
                break
            else:
                RPA_df = RPA_df.append(df_rpa.iloc[r_idx])
                NACS_df = NACS_df.append(df_nacs.iloc[n_idx])
                pn_ep = n_ep
                break

In [None]:
min(df_nacs['Epoch'])

In [None]:
max(df_rpa['Epoch']) - min(df_nacs['Epoch'])

Difference the maximum of rpa and minimum of nacs is positive, therefore there should be time that overlaps.

### 2019-07-04

Increased the speed of searching through the dataframe to find same time epoch yesterday, but now need to solve the new issue of the empty dataframe

In [None]:
RPA_df

In [None]:
NACS_df

In [None]:
df_nacs[df_nacs['Epoch'] == r_ep].index[0]

In [None]:
r_ep

In [None]:
r_idx

In [None]:
df_rpa.iloc[r_idx]

In [None]:
df_rpa[np.isnan(df_rpa['O'])]

The entire for-loop may be breaking because the first record doesn't have any ion measurement present.

Therefore instead of 'break' need to have a 'continue' statement.

In [None]:
RPA_df = pd.DataFrame(columns=df_rpa.columns)
NACS_df = pd.DataFrame(columns=df_nacs.columns)

pn_ep = -5

for r_idx,r_ep in enumerate(df_rpa['Epoch']):
    if (not df_nacs[df_nacs['Epoch'] == r_ep].index.empty) and (r_ep != pn_ep):
        n_idx = df_nacs[df_nacs['Epoch'] == r_ep].index[0]
        rpa_nans = 0
        nacs_nans = 0
        for var_r in rpa_var_list[1:5]:
            if np.isnan(df_rpa.iloc[r_idx][var_r]):
                rpa_nans += 1
        for var_n in nacs_var_list[1:6]:
            if np.isnan(df_nacs.iloc[n_idx][var_n]):
                nacs_nans += 1                    
        if (rpa_nans == 4) or (nacs_nans == 5):
            continue
        else:
            RPA_df = RPA_df.append(df_rpa.iloc[r_idx])
            NACS_df = NACS_df.append(df_nacs.iloc[n_idx])
            pn_ep = n_ep
            continue

In [None]:
len(RPA_df)

Now that that works, can re-run the files from before.

The files will probably take longer to run now but there should be no prematurely finished files. At the moment the process of looking through the file would end after a case of no ion or no neutral densities was reached.

In [18]:
def df_make2(path, vrs):
    # Open File
    cdf = cdflib.CDF(path)
    # Create a pandas dataframe from the data in the .cdf file:
    data_arrs = [cdf.varget(variable = var) for var in vrs]
    data_reshp = reshape_fn(data_arrs)
    data = np.hstack(data_reshp)
    df = pd.DataFrame(data)
    # Label columns
    df.columns = vrs
    # Convert any unreasonable values to NaN's and ensure all numbers are converted to floats
    df[df < -1.000000e+30] = None
    conv_float = lambda x: float(x)
    df = df.applymap(conv_float)
    # Round Epoch times to the nearest second:
    epoch_round = lambda x: round(x/1000)*1000
    df['Epoch'] = df['Epoch'].apply(epoch_round)
    return df

def rpa_nacs_comb_day2(path_rpa, path_nacs, vrs_rpa, vrs_nacs):
    
    # Make dfs:
    df_rpa = df_make2(path_rpa, vrs_rpa)
    df_nacs = df_make2(path_nacs, vrs_nacs)
    
    # Create dataframes to store only aligned records
    # Find all aligned records and add them to new data frame:
    
    RPA_df = pd.DataFrame(columns=df_rpa.columns)
    NACS_df = pd.DataFrame(columns=df_nacs.columns)
    
    pn_ep = -5
    
    for r_idx,r_ep in enumerate(df_rpa['Epoch']):
        if (not df_nacs[df_nacs['Epoch'] == r_ep].index.empty) and (r_ep != pn_ep):
            n_idx = df_nacs[df_nacs['Epoch'] == r_ep].index[0]
            rpa_nans = 0
            nacs_nans = 0
            for var_r in rpa_var_list[1:5]:
                if np.isnan(df_rpa.iloc[r_idx][var_r]):
                    rpa_nans += 1
            for var_n in nacs_var_list[1:6]:
                if np.isnan(df_nacs.iloc[n_idx][var_n]):
                    nacs_nans += 1                    
            if (rpa_nans == 4) or (nacs_nans == 5):
                continue
            else:
                RPA_df = RPA_df.append(df_rpa.iloc[r_idx])
                NACS_df = NACS_df.append(df_nacs.iloc[n_idx])
                pn_ep = df_nacs.iloc[n_idx]['Epoch']
    
    # One atomic mass unit in kilograms
    amukg = 1.66054e-27

    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_nacs = [16,28,4,14,40]

    # Total density of neutrals in kg.m^-3 for NACS instrument
    tot_den_nacs = np.zeros((len(NACS_df),1))
    for idx in range(0,len(NACS_df)):
        tot_den = 0.0
        for i,var in enumerate(df_nacs.columns[1:6]):
            den = NACS_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += NACS_df.iloc[idx][var]*1e6*amus_nacs[i]*amukg
        tot_den_nacs[idx] = tot_den
    try:
        NACS_df['total_mass_den'] = tot_den_nacs
    except:
        print(NACS_df)
        print(f'in the file {path_rpa} or {path_nacs}')
        
    # Create a list of AMU for each atomic or molecular species in the var list we have:
    amus_rpa = [16,1,4,30] # Molecular ion taken as the average of N2, O2 and NO (assuming equal proportions)

    # Total density of ions in kg.m^-3 for PRA instrument
    tot_den_rpa = np.zeros((len(RPA_df),1))
    for idx in range(0,len(RPA_df)):
        tot_den = 0.0
        for i,var in enumerate(df_rpa.columns[1:5]):
            den = RPA_df.iloc[idx][var]
            if np.isnan(den):
                pass
            else:
                tot_den += RPA_df.iloc[idx][var]*1e6*amus_rpa[i]*amukg
        tot_den_rpa[idx] = tot_den
    try:
        RPA_df['total_mass_den'] = tot_den_rpa
    except:
        print(tot_den_rpa)
        
    pos_vars = ['Epoch']
    for el in nacs_var_list[6:9]:
        pos_vars.append(el)
    pos_vars.append('total_mass_den')

    df_tot_day = pd.DataFrame()

    for var in pos_vars:
        if var == 'total_mass_den':
            df_tot_day[var] = (RPA_df[var].values + NACS_df[var].values)
            df_tot_day['total_neutral_den'] = NACS_df[var].values
            df_tot_day['total_ion_den'] = RPA_df[var].values
        else:
            df_tot_day[var] = (RPA_df[var].values + NACS_df[var].values) / 2

    # Convert glat and glon to radians:
    df_tot_day['glat'] = df_tot_day['glat'] * (np.pi/180)
    df_tot_day['glon'] = df_tot_day['glon'] * (np.pi/180)

    dat_list = []
    for i,epoch in enumerate(df_tot_day['Epoch']):
        if pd.isnull(epoch):
            print(f'This value {epoch} is null. This is record number {i} in the file {path_rpa} or {path_nacs}')
        else:
            dat_list.append(pd.to_datetime(cdflib.cdfepoch.encode(epoch), infer_datetime_format=True))

    d_t = pd.DatetimeIndex(dat_list)
    df_tot_day['datetime'] = d_t
    df_tot_day['year'] = d_t.year
    df_tot_day['doy'] = d_t.dayofyear
    df_tot_day['d_hr'] = d_t.hour + d_t.minute/60 + d_t.second/3600
    return df_tot_day

In [19]:
df_all = pd.DataFrame()

for rpa_path, nacs_path in zip(RPA_cdf_good, NACS_cdf_good):
    if RPA_cdf_good.index(rpa_path)%10 == 0:
        print(f'Processing file {RPA_cdf_good.index(rpa_path)} of {len(RPA_cdf_good)}')
    df_tot_day = rpa_nacs_comb_day2(rpa_path, nacs_path, rpa_var_list, nacs_var_list)
    df_all = df_all.append(df_tot_day, ignore_index=True)

Processing file 0 of 444
Processing file 10 of 444
Processing file 20 of 444
Processing file 30 of 444
Processing file 40 of 444
Processing file 50 of 444
Processing file 60 of 444
Processing file 70 of 444
Processing file 80 of 444
Processing file 90 of 444
Processing file 100 of 444
Processing file 110 of 444
Processing file 120 of 444
Processing file 130 of 444
Processing file 140 of 444
Processing file 150 of 444
Processing file 160 of 444
Processing file 170 of 444
Processing file 180 of 444
Processing file 190 of 444
Processing file 200 of 444
Processing file 210 of 444
Processing file 220 of 444
Processing file 230 of 444
Processing file 240 of 444
Processing file 250 of 444
Processing file 260 of 444
Processing file 270 of 444
Processing file 280 of 444
Processing file 290 of 444
Processing file 300 of 444
Processing file 310 of 444
Processing file 320 of 444
Processing file 330 of 444
Processing file 340 of 444
Processing file 350 of 444
Processing file 360 of 444
Processing f

In [20]:
#df_all.to_csv('All_Data_DE2_Ion_Neturals.csv')

In [21]:
len(df_all)

4465337