In [None]:
import numpy as np
import pandas as pd
import os
from os import listdir
from os.path import isfile, join
import fnmatch
import re

'''
Open and process comsol data files. Save to one large dataframe. 

DF columns:
positionIdx, freq, Ex_near, Ey_n, Ez_n, Enorm_n, Ex_all, Ey_a, Ez_a, Enorm_a, LPV 

Rows:
frequency

shape = (postions * frequency steps) x (11)
size = (1 +(10*4))bytes * nPostions * nFrequencies
18 postions, 1000 frequencies size = 0.73MB

Also save text file with look up table of 
postionIdx, X, Y, Z, Alpha, Beta, Gamma
'''

directory = '../dipoleMoveInBox_AfCalc_1.10.32/data/empty/' #include trailing `/`

#get list of files in directory
files = [f for f in listdir(directory) if isfile(join(directory, f))]
assert files != [], f"empty list in {directory}"

#generate list of file names that match pattern strings
#assumes order of "near, all, lpv"!!!!
pattern_strs    = ['.*allE.*Near.*','.*allE.*all.*', '.*LPV.*'] 
fileNames       = []
for pattern_str in pattern_strs:
    pattern     = re.compile(pattern_str, re.IGNORECASE)
    fileName    = [s for s in files if pattern.match(s)]
    try: 
        assert len(fileName) == 1
    except:
        print(f"No files or multple files match ' {pattern_str}. Check directory {directory}\n")
        continue
    fileNames.append(fileName[0])
assert len(fileNames) != 0, f'no files in {directory} match'

print(f'{len(fileNames)} File names will be used to create DF:')
for file in fileNames: print(file)  

dfOut = pd.DataFrame()
#####needs to go in a loop over file names:

#perform fuckary to fix comsol's stupid text file output. 
#modify 4th row of text (column names) to have white space delimiter and split on whitespace
for fileIdx, fileName in enumerate(fileNames):
    with open(directory + fileName, 'r') as f:
        data = f.readlines()
    data[4] = data[4].replace('%','')
    data[4] = data[4].replace(' (cm)','(cm)')
    data[4] = data[4].replace(' (MHz)','(MHz)')
    data[4] = data[4].replace('abs(emw.Ex) (V/m), Point: (', ' Ex_atPoint(') #header must start with 'E<i>' 
    data[4] = data[4].replace('abs(emw.Ey) (V/m), Point: (', ' Ey_atPoint(') #on these 3 lines.
    data[4] = data[4].replace('abs(emw.Ez) (V/m), Point: (', ' Ez_atPoint(')
    data[4] = data[4].replace('abs(emw.normE) (V/m), Point: (', ' Enorm_atPoint(')
    data[4] = data[4].replace(', ',',')
    data[4] = data[4].replace('(V)','')
    header = data[4].split()

    # read the file and skip the first 5 rows, get column names from header defined above
    df = pd.read_csv(directory + fileName, delim_whitespace=True, skiprows=5, header=None)
    df.columns = header

    #overwrite position and frequency columns to make life easy
    positionCol = df[[col for col in df if col.startswith('position')]]
    dfOut['positionIdx'] = positionCol.astype(np.int16)
    freqCol = df[[col for col in df if col.startswith('freq')]]
    dfOut['freq(MHz)'] = freqCol.astype(np.float32)

    ############## Data extraction. Modify here to add new files ##############

    #extract field from non LPV files
    if ('LPV' not in fileName) and ('lpv' not in fileName) and ('Lpv' not in fileName):
        fieldCompStrs   = ['Ex', 'Ey', 'Ez', 'Enorm']
        if ('near' in fileName) or ('Near' in fileName):
            fieldLoc = '_near'
        else:
            fieldLoc = '_all'
        for fieldCompStr in fieldCompStrs:
            filter_col = [col for col in df if col.startswith(fieldCompStr)]
            try:
                assert filter_col != []
            except:
                print(f'no data for {fieldCompStr} in {fileName}')
            dfOut[fieldCompStr+fieldLoc] = df[filter_col].mean(axis=1).astype(np.float32)
    #else extract LPV from LPV file
    else:
        filter_col = [col for col in df if col.startswith('abs(emw.Vport')]
        dfOut['LPV'] = df[filter_col].astype(np.float32)

dfOut.head()
dfOut.to_pickle(directory+'testDf.npy')

In [None]:
readDf = pd.read_pickle(directory+'testDf.npy')

readDf.head()


In [None]:
#note that x^2+y^2+z^2 != comsol E_norm^2

import matplotlib.pyplot as plt
%matplotlib widget
plt.close('all')
plt.figure()
plt.hist(( dfOut['Enorm_all'] -((dfOut['Ex_all']**2+dfOut['Ey_all']**2+dfOut['Ez_all']**2)**0.5 )).to_numpy(), log=True, bins = 1000);

plt.figure()
plt.hist(( dfOut['Enorm_near'] -((dfOut['Ex_near']**2+dfOut['Ey_near']**2+dfOut['Ez_near']**2)**0.5 )).to_numpy(), log=True, bins = 1000);



In [None]:
#check that nan mean gives same as first value. taken from E_all Ex from file
#18pos_250_400MHz_750freqpts_cons5.9e6_perm1000_realDimentions_nonAWE_180Ohm_E_1x_0y_0z_wallDrive

a =[float('NaN'),                     float('NaN'),                      15.061594171613452,      float('NaN') ,                   float('NaN')       ,             float('NaN') ,                    10.987955833422253      ,float('NaN'),                     float('NaN'),                     float('NaN'),                      2.402071739079808 ,      float('NaN')      ,              float('NaN')   ,                 float('NaN') ,                    17.531840610789363      ,float('NaN'),                     6.059617775723454    ,    6.097829178943222  ,      14.62387249377226       , 5.127934420273548      ,  3.9582918181254776    ,   8.216028057593306   ,     37.52904136210943  ,      17.393536574981063  ,     5.726554203215482   ,     18.999572962628427      , 15.356668069873258    ,   31.986858040073557     ,  1.8424256615682568     ,  8.7167632101463       ,   7.3527999514281985    ,   15.84118339660775   ,     6.441727281674445 ,       13.717055460403888       ,0.9461027454174641 ,      4.744513698654297       , 17.677079369442954       ,28.99802365141824       , 7.77210998352825      ,   6.222064305264859   ,     27.133605882005973    ,   11.115905093526512      , 22.511096594558257      , 5.8355181109054675     ,  13.904215794126994      , 11.056934809785641     ,  8.428648650491725      ,  1.6549639451922653      ,float('NaN')   ,                  15.436930383416772   ,   float('NaN')    ,                float('NaN')       ,             float('NaN')         ,            15.78751348039556    ,   float('NaN')        ,            float('NaN')         ,           float('NaN')   ,                  6.480328710269199,       float('NaN'),                     float('NaN'),                     float('NaN'),                      17.777891698875962,      float('NaN'),                     float('NaN')]                
np.nanmean(np.asarray(a))