In [1]:
import re
import pandas as pd

from energy_ranges import (
    ERange,
    KRange,
    merge_ranges,
    wavenumber_to_energy,
)

ModuleNotFoundError: No module named 'pint'

# Dev

In [2]:
def clean_column_names(columns):
    # Convert to lowercase, replace spaces with underscores, and remove non-alphanumeric characters
    return [re.sub(r'[^\w]', '', col.lower().replace(' ', '_')) if isinstance(col, str) else col for col in columns]


def convert_energy_range(energy_range_str):
    """Converts the energy range string into a list of numeric values, applying convert_k_to_E where needed."""
    energy_list = []
    for item in energy_range_str.split():
        if 'k' in item:
            # Remove the 'k' and apply the convert_k_to_E function
            k = float(item.replace('k', ''))
            E = wavenumber_to_energy(k)
            energy_list.append(E)
        else:
            # Convert to float for other values
            energy_list.append(float(item))
    return energy_list

def convert_energy_steps(energy_steps_str):
    """Converts the energy steps string into a list, applying E_step_to_k_step where needed."""
    energy_steps_list = []
    for item in energy_steps_str.split():
        if 'k' in item:
            value = float(item.replace('k', ''))
            energy_steps_list.append(value)
        else:
            # Convert to float for other values
            energy_steps_list.append(float(item))
    return energy_steps_list

def convert_float_list(str_list):
    """Converts a list of strings into a numpy array of floats"""
    float_list = []
    for item in str_list.split():
        float.append(float(item))
    return float_list

def validate_data(df):
    validated_df = df.copy()

    # Convert 'Yes' to True and 'No' to False in specific columns
    yes_no_columns = ['measure_this_slot', 'is_standard']
    for col in yes_no_columns:
        if col in validated_df.columns:
            validated_df[col] = validated_df[col].map({'Yes': True, 'No': False})
    print("start")
    # Ensure these columns are string types
    string_columns = ['sample_name', 
                    #   'note', 
                    #   'element_symbol', 
                    #   'absorption_edge', 
                    #   'sample_name', 
                    #   'auto_gain_mode'
                      ]
    validated_df[string_columns] = validated_df[string_columns].astype(str)
    print(f'checked {string_columns}')

    # Ensure these columns are integer types, coerce invalid entries to NaN
    int_columns = ['number_of_repetitions']
    validated_df[int_columns] = validated_df[int_columns].apply(pd.to_numeric, errors='coerce', downcast='integer')
    print(f'checked {int_columns}')

    # Ensure these columns are float types, coerce invalid entries to NaN
    float_columns = ['kweight', 'auto_gain_energy', 
                    #  'detector_x', 'sample_x', 'sample_y', 
                    #  'slit_width', 'slit_height'
                     ]
    validated_df[float_columns] = validated_df[float_columns].apply(pd.to_numeric, errors='coerce')
    print(f'checked {float_columns}')

    # Convert energy_range strings into energy list using the previously defined convert_energy_range function
    validated_df['energy_ranges'] = validated_df['energy_ranges'].apply(convert_energy_range)
    print('checked energy range')

    # Convert energy_steps strings into energy step list using the previously defined convert_energy_steps function
    validated_df['energy_steps'] = validated_df['energy_steps'].apply(convert_energy_steps)
    print('checked energy steps')

    # Convert exposure strings into energy step list using the previously defined convert_energy_steps function
    validated_df['integration_times'] = validated_df['integration_times'].apply(convert_energy_range)
    print('checked integration times')

    # Validate that absorption_edge is either 'K', 'L1', 'L2', or 'L3'
    if 'absorption_edge' in validated_df.columns:
        valid_edges = {'K', 'L1', 'L2', 'L3'}
        if not validated_df['absorption_edge'].isin(valid_edges).all():
            raise ValueError("Invalid absorption_edge. It must be 'K', 'L1', 'L2', or 'L3'.")

    return validated_df

## pytest to test above functions

In [10]:
def check_wheel_type(file_name, printit=True):
    # Extract the wheel type from the first row, first column
    num_slot = pd.read_excel(file_name, header=None).iloc[0, 0]
    if printit:
        print(f"wheel_type: {num_slot}-slot, round")
    return num_slot

In [17]:
file_name = 'sample_wheel_18slots_20241121.xlsx'

# Read the Excel file with header at row 7 and skip row 8 (example lines)
df = pd.read_excel(file_name, header=1, skiprows=[2])

# Clean the column names by replacing symbols with underscores
df.columns = clean_column_names(df.columns)
num_slots = check_wheel_type(file_name)
 
# Select the first `num_slots` rows of data
data = df.head(num_slots)

# validate the data
data = validate_data(data)

wheel_type: 18-slot, round
start
checked ['sample_name']
checked ['number_of_repetitions']
checked ['kweight', 'auto_gain_energy']
checked energy range
checked energy steps
checked integration times


In [19]:
data

Unnamed: 0,slot_number,ring,sample_name,measure_this_slot,number_of_repetitions,element_symbol,absorption_edge,is_standard,note,energy_ranges,energy_steps,kweight,integration_times,auto_gain_energy,auto_gain_mode,detector_x,sample_x,sample_y,slit_width,slit_height
0,0.0,Outer,MnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[10.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
1,1.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[11.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
2,2.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[12.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
3,3.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[13.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
4,4.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[14.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
5,5.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[15.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
6,6.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[16.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
7,7.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[17.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
8,8.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[18.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
9,9.0,Outer,NiMnO,True,1,Mn,K,False,,"[-200.0, -30.0, -10.0, 25.0, 857.2459761284085]","[19.0, 2.0, 0.3, 0.05]",0.0,"[0.5, 0.5, 0.5, 0.5]",-50.0,upper,,,,,
