In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot
import seaborn as sns

import os
from pathlib import Path

In [None]:
SEARCH_MSG = \
    f"> Select a folder to add it to the path and search inside.\n" \
    f"> Select a file to finish the search process.\n" \
    f"> To select option [#], enter #.\n" \
    f"[E] Exit\n" \
    f"[U] Search up one level"

def findFile(input_path):
    current_path = Path(input_path) # recast for safety
    if not current_path.exists():
        return("Invalid Path")
    print(f"Current path: {current_path}")
    print(SEARCH_MSG)
    
    # list out files with [#] as identifier
    dirList = [f for f in current_path.iterdir()]
    print("> Files in current folder:")
    for c, i in enumerate(dirList):
        print('[' + str(c) + '] ' + str(i.name))
    print("")

    choice = input()
    while choice.lower() not in ["u", "e"] + list(str(i) for i in range(len(dirList))):
        choice = input("Couldn't read input, trying again. Target #: ")

    if choice.lower() == "e": return("Exiting.")
    if choice.lower() == "u": return(findFile(current_path.parent))
    if choice.isnumeric() and int(choice) in range(len(dirList)):
        name = dirList[int(choice)]
        current_path = current_path / name

        if current_path.is_dir():
            print(f"Selected folder to search: {current_path}")
            return(findFile(current_path))
        if current_path.is_file():
            print(f"Selected file: {current_path}")
            return current_path

    return("Error reading choice, exiting.")

data_path = findFile(Path.cwd())
print(data_path)

In [None]:
data_path = Path.cwd().parent / Path('input files/su25/clean M3 wavelengths targeted global.csv')
data_path

In [60]:
df = pd.read_csv(data_path)

In [61]:
df.head()

Unnamed: 0,Wavelength (µm) M3 targeted images,"average every 4 channels, step n, sort","average every 4 channels, step n, sort, round 0.0001",Wavelength (µm) M3 global images
0,0.44602,0.46099,0.461,0.461
1,0.456,0.50092,0.5009,0.5009
2,0.46598,0.54084,0.5408,0.5408
3,0.47596,0.580765,0.5808,0.5808
4,0.48595,0.62069,0.6207,0.6207


In [62]:
# change step size if past a breakpoint
# current wl: step to transition to
breakpoints = {
    "wl"   : [0, 0.44, 0.68, 0.71, 1.53, 1.56, 1.60], #, 1.64, 9999],
    "step" : [0,    4,    4,    3,    2,    3,    4]  #,   5,    4]
}
wavelengths = np.array(df[df.columns[0]])
empirical = np.array(df[df.columns[-1]].dropna())

In [63]:
# average 4 at a time
# set step size to last breakpoint
# increment by step size
# round to 4 decimals

print(f"     wl: step change, index change")

i = 0
bp_idx = 0
output = []
while i+3 < len(wavelengths):
    # average over next 4
    avg = np.round(np.mean(wavelengths[i:i+4]),4)

    # if next step exists
    # and current wl > bp_idx wl
    while bp_idx + 1 < len(breakpoints["step"]) and wavelengths[i] > breakpoints["wl"][bp_idx]:
        bp_idx += 1
        print(f"{wavelengths[i]:0.5f}: step {breakpoints['step'][bp_idx-1]}->{breakpoints['step'][bp_idx]}, idx {bp_idx-1}->{bp_idx}")

    # save and increment by values consumed
    output.append(avg)
    i += breakpoints["step"][bp_idx]
output = np.array(output)

print(f"\nlengths:\n" + f"output    {len(output)}\n" + f"empirical {len(empirical)}\n")
print(f"sum diff:  {sum(output - empirical):f}")

print(f"\n" + f"idx: generated vs empirical")
for i in range(min(len(output), len(empirical))):
    if empirical[i] != output[i]:
        print(f"{i}: {output[i]:.04f} | {empirical[i]:.04f}")

     wl: step change, index change
0.44602: step 0->4, idx 0->1
0.44602: step 4->4, idx 1->2
0.68556: step 4->3, idx 2->3
0.71551: step 3->2, idx 3->4
1.53400: step 2->3, idx 4->5
1.56390: step 3->4, idx 5->6

lengths:
output    85
empirical 85

sum diff:  0.000100

idx: generated vs empirical
21: 1.0099 | 1.0100
61: 2.0580 | 2.0579
69: 2.3774 | 2.3773
78: 2.7366 | 2.7367
82: 2.8964 | 2.8963


In [None]:
### inconsistencies
## round up, not truncating
# mean(0.99497, 1.005, 1.0149, 1.0249) = 1.0099425 -> 1.0099, manual is 1.01 > 1.0099
## round down
# mean(2.3624, 2.3724, 2.3823, 2.3923) = 2.37735 -> 2.3774, manual is 2.3773 < 2.3774

In [None]:
### file writing: