In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 19 13:19:17 2025

@author: BJLuttgenau
"""

import os
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import pandas as pd
import time
from _functions import *

In [5]:
# =================== SETTINGS & FOLDERS ===================

# Set directory where the data files are located
directory = '/home/kas/Projects/accelerate_data_process/data example small/'  # <-- adapt this to your file name/path
os.chdir(directory)

# Path to the tab-delimited txt file that contains the two columns "sample_name" and "group_name":
#   "sample_name" (matching the filenames) and "group_name"
#txt_file = '/home/kas/Projects/accelerate_data_process/data example small/sample_holder_position_readout_2025-06-08.txt'  # <-- adapt this path

In [6]:
# =================== GLOBAL BEAM INTERVAL PARAMETERS ===================
# Define absolute times (in seconds)
BEAM_ON_USED_S          = 30.0  # how many seconds of beam-on intervals to include for determining values for RGA spectrum
BEAM_OFF_BEFORE_S       = 20.0  # how many seconds (before beam-on) to exclude for the first beam-off region
BEAM_OFF_AFTER_S        = 30.0  # how many seconds (after beam-on) to exclude for the second beam-off region

In [7]:
# ===================================================================================

SAVE_IMAGES = False  # True if you want to save or False if you don't want to save

SAVE_EVERY_Pressure_IMAGE = False  # True if you want to save or False if you don't want to save Pressure(t) for every m/z

In [11]:
# Regex pattern to match the desired file format
#pattern = re.compile(r"sample_holder_position_readout_\d{4}-\d{2}-\d{2}\.txt")

#__________
# Find all matching files
matching_files = [f for f in os.listdir(directory) if f.startswith('sample_holder_position_readout_') and f.endswith('.txt')]

if not matching_files:
    raise FileNotFoundError("No matching 'sample_holder_position_readout_YYYY-MM-DD.txt' files found.")

# Sort files by date (assuming filenames are date-sorted)
matching_files.sort(reverse=True)

print(f"Using {len(matching_files)} matching files.")

# Initialize empty DataFrame to collect all entries
df_all_groups = pd.DataFrame()

# Read and concatenate all matching files
for fname in matching_files:
    fpath = os.path.join(directory, fname)
    df_temp = pd.read_csv(fpath, sep='\t')
    df_all_groups = pd.concat([df_all_groups, df_temp], ignore_index=True)

# Build dictionary: group_name -> list of unique sample_names
sample_groups = {}
for grp, subdf in df_all_groups.groupby('group_name'):
    sample_groups[grp] = sorted(subdf['sample_name'].unique())

sample_groups

Using 2 matching files.


{'CP33': ['CP33 1', 'CP33 2'],
 'CP33H': ['CP33H 1', 'CP33H 2'],
 'CP8.5': ['CP8.5 1', 'CP8.5 2'],
 'CP8.5H': ['CP8.5H 1', 'CP8.5H 2'],
 'CSAR': ['CSAR 1', 'CSAR 2'],
 'G': ['G 1', 'G 2'],
 'GH': ['GH 1', 'GH 2'],
 'P2VP': ['P2VP 1', 'P2VP 2'],
 'P35': ['P35 1', 'P35 2'],
 'P4VP': ['P4VP 1', 'P4VP 2'],
 'P950': ['P950 1', 'P950 2'],
 'P950-AlO2': ['P950-AlO2 1', 'P950-AlO2 2'],
 'P950-AlO4': ['P950-AlO4 1', 'P950-AlO4 2'],
 'P950-Hf10x4': ['P950-Hf10x4 1', 'P950-Hf10x4 2'],
 'P950-Hf15x1': ['P950-Hf15x1 1', 'P950-Hf15x1 2'],
 'P950-InO2': ['P950-InO2 1', 'P950-InO2 2'],
 'P950-InO4': ['P950-InO4 1', 'P950-InO4 2'],
 'PEG': ['PEG 1', 'PEG 2'],
 'PHEMA': ['PHEMA 1', 'PHEMA 2'],
 'PS': ['PS 1', 'PS 2'],
 'SU8': ['SU8 1', 'SU8 2'],
 'ZEP': ['ZEP 1', 'ZEP 2'],
 'bare Si': ['bare Si'],
 'bare Si BNL': ['bare Si BNL 1', 'bare Si BNL 2']}

In [5]:


# Plotting parameters
SMALL_SIZE = 12
MEDIUM_SIZE = 16
BIGGER_SIZE = 20

# Create folders for saving outputs

folders = ['Analysis_results-ascii',
           'Analysis_results-plots',
           "Analysis_results-ascii/TEY_normalized_averaged",
           "Analysis_results-ascii/TEY_normalized",
           "Analysis_results-ascii/MS",
           "Analysis_results-ascii/MS(t)",
           "Analysis_results-ascii/MS(t)_averaged",
           "Analysis_results-ascii/MS_averaged",
           "Analysis_results-ascii/Total_outgassing",
           "Analysis_results-ascii/Total_outgassing_averaged"]

for folder in folders:
    path = os.path.join(directory, folder)
    try:
        os.makedirs(path, exist_ok=True)
        print(f"Folder '{folder}' is ready.")
    except Exception as e:
        print(f"Failed to create folder '{folder}': {e}")

output_folder = directory +"/Analysis_results-ascii"
output_folder_plots = directory+"/Analysis_results-plots"

Folder 'rawdataplots' is ready.
Folder 'plots' is ready.
Folder 'outgassing_data' is ready.
Folder 'Analysis_results-ascii' is ready.
Folder 'Analysis_results-plots' is ready.
Folder 'Analysis_results-ascii/TEY_normalized_averaged' is ready.
Folder 'Analysis_results-ascii/TEY_normalized' is ready.
Folder 'Analysis_results-ascii/MS' is ready.
Folder 'Analysis_results-ascii/MS(t)' is ready.
Folder 'Analysis_results-ascii/MS(t)_averaged' is ready.
Folder 'Analysis_results-ascii/MS_averaged' is ready.
Folder 'Analysis_results-ascii/Total_outgassing' is ready.
Folder 'Analysis_results-ascii/Total_outgassing_averaged' is ready.


In [6]:
# ===== REGEX PATTERNS =====
darkpd_pattern = re.compile(r"DarkPD_(-?\d+\.\d+)uA")
pd_pattern = re.compile(r"_PD_(-?\d+\.\d+)uA")
scanspeed_pattern = re.compile(r"scanspeed_(\d+)")
scantime_pattern = re.compile(r"scantime_(\d+)")

# ===== FIND ALL MAIN FILES =====
main_files = [
    f for f in os.listdir(directory)
    if f.startswith("sample_holder_position_readout")
    and f.endswith(".txt")
    and "_post_analysis" not in f
]


if not main_files:
    raise FileNotFoundError("No files starting with 'sample_holder_position_readout' found.")

# ===== SETUP OUTPUT SUBFOLDER =====
ascii_output_dir = output_folder #os.path.join(directory, "Analysis_results-ascii")
os.makedirs(ascii_output_dir, exist_ok=True)

# ===== PROCESS EACH MAIN FILE =====
for main_fname in main_files:
    main_file = os.path.join(directory, main_fname)

    # Create copy for post-analysis
    base_name, ext = os.path.splitext(main_fname)  # Use fname here to avoid duplicating path
    post_fname = base_name + "_post_analysis" + ext
    post_file = os.path.join(ascii_output_dir, post_fname)

    if os.path.exists(post_file):
        print(f"Post-analysis file already exists: {post_file}")
    else:
        shutil.copy2(main_file, post_file)
        print(f"Created copy: {post_file}")

        # Read the copied file
        df = pd.read_csv(post_file, sep="\t")

        # Ensure new columns exist
        df["DarkPD,A"] = None
        df["PD,A"] = None
        df["scanspeed"] = None
        df["scantime,s"] = None

        # ===== PROCESS TEY FILES =====
        for fname in os.listdir(directory):
            if "TEY" in fname and fname.endswith(".txt"):
                sample_name = fname.split("_TEY")[0]

                darkpd_match = darkpd_pattern.search(fname)
                pd_match = pd_pattern.search(fname)

                if darkpd_match and pd_match:
                    darkpd_val = float(darkpd_match.group(1)) * 1e-6
                    pd_val = float(pd_match.group(1)) * 1e-6

                    df.loc[df["sample_name"] == sample_name, "DarkPD,A"] = float(f"{darkpd_val:.5g}")
                    df.loc[df["sample_name"] == sample_name, "PD,A"] = float(f"{pd_val:.5g}")

        # ===== PROCESS RGA FILES =====
        for fname in os.listdir(directory):
            if "RGA_" in fname and fname.endswith(".txt"):
                sample_name = fname.split("_RGA_")[0]

                scanspeed_match = scanspeed_pattern.search(fname)
                scantime_match = scantime_pattern.search(fname)

                if scanspeed_match:
                    df.loc[df["sample_name"] == sample_name, "scanspeed"] = int(scanspeed_match.group(1))
                if scantime_match:
                    df.loc[df["sample_name"] == sample_name, "scantime,s"] = int(scantime_match.group(1))

        # ===== SAVE UPDATED COPY =====
        df.to_csv(post_file, sep="\t", index=False)
        print(f"Updated file saved to: {post_file}")

Post-analysis file already exists: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/sample_holder_position_readout_2025-06-08_post_analysis.txt
Post-analysis file already exists: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/sample_holder_position_readout_2025-06-08 bare Si_post_analysis.txt


In [7]:
pd_pattern = re.compile(r"_PD_([-+]?\d*\.?\d+)uA", re.IGNORECASE)
folder_path_TEY_norm = directory +"/Analysis_results-ascii/TEY_normalized"
output_data_folder = directory +"/Analysis_results-ascii/TEY_normalized_averaged"

for filename in os.listdir(directory):
    if "TEY_" in filename and filename.lower().endswith(".txt"):
        match = pd_pattern.search(filename)
        if not match:
            print(f"⚠ Skipping {filename}: PD value not found.")
            continue
        
        pd_value_uA = float(match.group(1))
        file_path = os.path.join(directory, filename)
        
        try:
            df = pd.read_csv(file_path, sep="\t")
            if "Time,s" not in df.columns or "TEY,A" not in df.columns:
                print(f"⚠ Skipping {filename}: Missing required columns.")
                continue
            
            norm_tey = df["TEY,A"] / pd_value_uA * 1e6
            
            # Fix spikes only after 62 seconds
            norm_tey_fixed = fix_spikes_with_time(df["Time,s"].values, norm_tey.values, start_time=62, threshold=5E-3)
            
            norm_tey_rounded = pd.Series(norm_tey_fixed).apply(lambda v: round_sig(v, 5))
            
            df_normalized = pd.DataFrame({
                "Time(s)": df["Time,s"],
                "Normalized_TEY": norm_tey_rounded
            })
            
            base_name = filename.split("_TEY_Dark")[0]
            new_filename = f"{base_name}_TEY_normalized.txt"
            output_path = os.path.join(folder_path_TEY_norm, new_filename)
            df_normalized.to_csv(output_path, sep="\t", index=False)
            print(f"✅ Saved normalized file: {new_filename}")
        
        except Exception as e:
            print(f"❌ Error processing {filename}: {e}")

✅ Saved normalized file: PS 2_TEY_normalized.txt
✅ Saved normalized file: PEG 2_TEY_normalized.txt
✅ Saved normalized file: PHEMA 2_TEY_normalized.txt
✅ Saved normalized file: PS 1_TEY_normalized.txt
✅ Saved normalized file: PEG 1_TEY_normalized.txt
✅ Saved normalized file: SU8 2_TEY_normalized.txt
✅ Saved normalized file: SU8 1_TEY_normalized.txt
✅ Saved normalized file: PHEMA 1_TEY_normalized.txt
✅ Saved normalized file: ZEP 1_TEY_normalized.txt
✅ Saved normalized file: ZEP 2_TEY_normalized.txt


In [8]:
#-------------------TEY averaging of normalized data---------------------------
mapping_file = txt_file

# Load mapping file
mapping_df = pd.read_csv(mapping_file, sep="\t", dtype=str)
mapping_df = mapping_df.iloc[:, [3, 6]]  # 4th and 7th columns (0-based index)
mapping_df.columns = ["sample_name", "group_name"]

# Dictionary: group -> list of file paths
group_files = {}

for fname in os.listdir(folder_path_TEY_norm):
    if fname.endswith("_TEY_normalized.txt"):
        sample_name = fname.replace("_TEY_normalized.txt", "")
        group_name = mapping_df.loc[mapping_df["sample_name"] == sample_name, "group_name"]
        
        if not group_name.empty:
            gname = group_name.values[0]
            group_files.setdefault(gname, []).append(os.path.join(folder_path_TEY_norm, fname))

# Process each group
for group, files in group_files.items():
    data_arrays = []
    
    for fpath in files:
        df = pd.read_csv(fpath, sep="\t")
        df = df.iloc[:, [0, 1]]  # Only Time and TEY intensity
        df.columns = ["Time,s", "TEY"]
        data_arrays.append(df.to_numpy())
    
    # Find minimum number of rows (in case files differ in length)
    min_len = min(arr.shape[0] for arr in data_arrays)
    data_arrays = [arr[:min_len] for arr in data_arrays]
    
    # Stack into 3D array: shape (files, rows, 2)
    stacked = np.stack(data_arrays, axis=0)  # files × rows × columns
    
    # Average times and TEYs line-by-line
    avg_time = stacked[:, :, 0].mean(axis=0)
    avg_tey = stacked[:, :, 1].mean(axis=0)
    std_tey = stacked[:, :, 1].std(axis=0, ddof=0)  # population std
    
    # Build DataFrame
    result_df = pd.DataFrame({
        "Time(s)": avg_time,
        "Averaged_TEY": avg_tey,
        "Std_TEY": std_tey
    })
    
    # Save in same folder
    output_path = os.path.join(output_data_folder, f"{group}_TEY_normalized_averaged.txt")
    result_df.to_csv(output_path, sep="\t", index=False, float_format="%.5g")

    print(f"Saved: {output_path}")

Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/TEY_normalized_averaged/PEG_TEY_normalized_averaged.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/TEY_normalized_averaged/PS_TEY_normalized_averaged.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/TEY_normalized_averaged/ZEP_TEY_normalized_averaged.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/TEY_normalized_averaged/PHEMA_TEY_normalized_averaged.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/TEY_normalized_averaged/SU8_TEY_normalized_averaged.txt


In [20]:
#-------------------- Search for Maximal TEY value--------------------------------------


results = []

for filename in os.listdir(folder_path_TEY_norm):
    if "TEY_" in filename and filename.endswith('.txt'):
        filepath = os.path.join(folder_path_TEY_norm, filename)
        try:
            df = pd.read_csv(filepath, sep=r"\s+", header=0)
        except Exception as e:
            print(f"Could not read {filename}: {e}")
            continue

        # Check for the updated column names
        print(df.columns)
        if 'Time(s)' not in df.columns or 'Normalized_TEY' not in df.columns:
            print(f"File {filename} missing required columns")
            continue

        subset = df[(df['Time(s)'] >= 59.5) & (df['Time(s)'] <= 60.5)]

        if subset.empty:
            print(f"No data in time window for {filename}")
            continue

        max_val = subset['Normalized_TEY'].max()

        sample_name = filename.split("_TEY_")[0]

        results.append((sample_name, max_val))

output_df = pd.DataFrame(results, columns=['sample', 'TEY_t=0'])

output_path = os.path.join(output_folder, 'TEY_at_t=0.txt')
output_df.to_csv(output_path, sep='\t', index=False)


print("Done! Results saved to 'TEY_at_t0.txt'.")

Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Index(['Time(s)', 'Normalized_TEY'], dtype='object')
Done! Results saved to 'TEY_at_t0.txt'.


In [10]:
sample_groups

{'CP33': ['CP33 1', 'CP33 2'],
 'CP33H': ['CP33H 1', 'CP33H 2'],
 'CP8.5': ['CP8.5 1', 'CP8.5 2'],
 'CP8.5H': ['CP8.5H 1', 'CP8.5H 2'],
 'CSAR': ['CSAR 1', 'CSAR 2'],
 'G': ['G 1', 'G 2'],
 'GH': ['GH 1', 'GH 2'],
 'P2VP': ['P2VP 1', 'P2VP 2'],
 'P35': ['P35 1', 'P35 2'],
 'P4VP': ['P4VP 1', 'P4VP 2'],
 'P950': ['P950 1', 'P950 2'],
 'P950-AlO2': ['P950-AlO2 1', 'P950-AlO2 2'],
 'P950-AlO4': ['P950-AlO4 1', 'P950-AlO4 2'],
 'P950-Hf10x4': ['P950-Hf10x4 1', 'P950-Hf10x4 2'],
 'P950-Hf15x1': ['P950-Hf15x1 1', 'P950-Hf15x1 2'],
 'P950-InO2': ['P950-InO2 1', 'P950-InO2 2'],
 'P950-InO4': ['P950-InO4 1', 'P950-InO4 2'],
 'PEG': ['PEG 1', 'PEG 2'],
 'PHEMA': ['PHEMA 1', 'PHEMA 2'],
 'PS': ['PS 1', 'PS 2'],
 'SU8': ['SU8 1', 'SU8 2'],
 'ZEP': ['ZEP 1', 'ZEP 2'],
 'bare Si': ['bare Si'],
 'bare Si BNL': ['bare Si BNL 1', 'bare Si BNL 2']}

In [11]:
# Scan folder for TEY and RGA files
TEY_files = [f for f in os.listdir(directory) if f.endswith(".txt") and "_TEY_" in f]
rga_files = [f for f in os.listdir(directory) if f.endswith(".txt") and "_RGA_" in f]
TEY_files.sort()
rga_files.sort()

if len(TEY_files) != len(rga_files):
    raise ValueError(f"Mismatch in file counts: {len(TEY_files)} TEY files vs {len(rga_files)} RGA files.")

# Dictionaries to store per-sample data
sample_outgassing = {}  # sample_name -> {'avg': array, 'std': array, 'sum_avg': float, 'sum_std': float}
sample_TEY = {}         # sample_name -> (time_array, TEY_normalized) 
sample_ion = {}         # sample_name -> { m/z : (time_array, corrected_data), 'sum': (time_array, sum_corrected_data) }

In [12]:


# Process each sample (each pair of TEY and RGA files)
for rga_file, TEY_file in zip(rga_files, TEY_files):
    rga_file_path = os.path.join(directory, rga_file)
    TEY_file_path = os.path.join(directory, TEY_file)
    
    # Extract sample name from the file name
    sample_name = extract_sample_name(rga_file)
    print(f"Processing sample: {sample_name}")
    
    # Determine beam-on indices based on TEY and RGA files
    beam_on_indices = determine_intervals(TEY_file_path, rga_file_path)
    
    # Load RGA data
    rga_data = np.genfromtxt(rga_file_path, delimiter='\t', skip_header=2, dtype=str)
    ncols = rga_data.shape[1]  # total columns; first column is time, remaining are m/z channels
    
    # Plot total raw sum (no background subtraction)
    #plot_all_columns_and_sum(rga_data, sample_name)
    
    # Prepare placeholders for outgassing averages
    outgassing_avg_list = []
    outgassing_std_list = []
    
    # We'll also keep a running sum of the background-corrected data for each time point
    # so we can have a "sum-of-all-channels" background-corrected trace.
    # Initialize after we know the time array from the first channel.
    sum_corrected_data = None
    sum_beam_on_interval = None
    sum_beam_off1_interval = None
    sum_beam_off2_interval = None
    
    # Process each mass channel and collect outgassing + ion signal data
    sample_ion[sample_name] = {}
    for col in range(1, ncols):
        (beam_on_interval, beam_off1_interval, beam_off2_interval,
            avg_values, std_values, time_array, corrected_data) = process_and_plot_column(rga_data, col, sample_name, beam_on_indices)
        
        outgassing_avg_list.append(avg_values[0])
        outgassing_std_list.append(std_values[0])

        #print(outgassing_avg_list)
        
        # Store the time + corrected data for individual sample, per m/z
        # As requested: time(s), corrected ion signal, and repeated std
        # (the same std_value for each time point)
        data_with_std = np.column_stack((
            time_array, 
            corrected_data, 
            np.full_like(corrected_data, std_values[0])
        ))
        #rga_out_path = os.path.join(
        #    folders['outgassing'], f'{sample_name}_mz_{col}_corrected_signal.txt'
        #)
        #np.savetxt(
        #    rga_out_path,
        #    data_with_std,
        #    fmt='%.6e',
        #    delimiter='\t',
        #    header=(f"Ion signal for {sample_name}, m/z={col}\n"
        #            "Time(s)\tCorrected Signal(Torr)\tStd(Torr)"),
        #    comments=''
        #)
        
        # Store in the dictionary
        sample_ion[sample_name][col] = (time_array, corrected_data,np.full_like(corrected_data, std_values[0]))
        
        # Accumulate the sum
        if sum_corrected_data is None:
            sum_corrected_data = np.copy(corrected_data)
            sum_beam_on_interval = beam_on_interval
            sum_beam_off1_interval = beam_off1_interval
            sum_beam_off2_interval = beam_off2_interval
        else:
            sum_corrected_data += corrected_data
    
    # After processing all columns, store the average outgassing data
    sample_outgassing[sample_name] = {
        'avg': np.array(outgassing_avg_list),  # shape (n_mz_channels,)
        'std': np.array(outgassing_std_list)
    }
    
    # Save individual sample outgassing data (average + std across m/z)
    # i.e. the typical "mass_number, avg, std"
    mass_numbers = np.arange(1, ncols)  # mass channels 1 ... (ncols-1)
    data_to_save = np.column_stack((mass_numbers, 
                                    sample_outgassing[sample_name]['avg'],
                                    sample_outgassing[sample_name]['std']))
    header = (f"Outgassing data {sample_name}\n"
                "Mass number\tAvg Values (Torr)\tStd Values (Torr)")
    #file_path = os.path.join(folders['outgassing'], f'{sample_name}_outgassing_data_mean_std.txt')
    #np.savetxt(file_path, data_to_save, delimiter='\t', header=header, 
    #            fmt='%d\t%.6e\t%.6e', comments='')
    #print(f"Data saved to {file_path}")
    
    # ======= Also save the sum of the corrected data for each sample =======
    if sum_corrected_data is not None:
        # Compute a single standard deviation from beam-off region of the sum
        sum_data_beam_off = np.concatenate((
            sum_corrected_data[sum_beam_off1_interval[0]:sum_beam_off1_interval[1]],
            sum_corrected_data[sum_beam_off2_interval[0]:sum_beam_off2_interval[1]]
        ))
        sum_std_value = np.std(sum_data_beam_off)
        sum_data_with_std = np.column_stack((
            time_array, 
            sum_corrected_data, 
            np.full_like(sum_corrected_data, sum_std_value)
        ))
        #sum_file = os.path.join(folders['outgassing'], f'{sample_name}_sum_corrected_signal.txt')
        #np.savetxt(
        ##    sum_file,
         #   sum_data_with_std,
         #   fmt='%.6e',
         #   delimiter='\t',
         #   header=(f"Sum of corrected signals for {sample_name}\n"
         #           "Time(s)\tSumCorrected(Torr)\tStd(Torr)"),
         #   comments=''
        #)
        #print(f"Sum of corrected signals saved to {sum_file}")
        
        # Also store average over beam on region, and that std, in sample_outgassing:
        sum_data_beam_on = sum_corrected_data[sum_beam_on_interval[0]: sum_beam_on_interval[1]]
        sum_avg_value = np.mean(sum_data_beam_on)
        sample_outgassing[sample_name]['sum_avg'] = sum_avg_value
        sample_outgassing[sample_name]['sum_std'] = sum_std_value
        sample_ion[sample_name]['sum_std'] = sum_std_value
    else:
        sample_outgassing[sample_name]['sum_avg'] = 0
        sample_outgassing[sample_name]['sum_std'] = 0
        sample_ion[sample_name]['sum_std'] = 0
    
    # ======= Plot Individual Outgassing Spectrum (Linear Scale) =======
    #plt.figure(figsize=plot_size)
    #outgassing_avg = sample_outgassing[sample_name]['avg']
    #outgassing_std = sample_outgassing[sample_name]['std']
    
    # Filter out negative avg values or when errorbar is larger than the avg
    #outgassing_avg[outgassing_avg < 0] = 0
    #outgassing_std[outgassing_std > outgassing_avg] = 0

Processing sample: PEG 1
Processing sample: PEG 2
Processing sample: PHEMA 1
Processing sample: PHEMA 2
Processing sample: PS 1
Processing sample: PS 2
Processing sample: SU8 1
Processing sample: SU8 2
Processing sample: ZEP 1
Processing sample: ZEP 2


In [13]:
sample_outgassing  # sample_name -> {'avg': array, 'std': array, 'sum_avg': float, 'sum_std': float}
sample_ion
#.keys()

{'PEG 1': {1: (array([  0. ,   6.5,  12.9,  19.4,  25.8,  32.3,  38.7,  45.2,  51.6,
           58.1,  64.5,  71. ,  77.4,  83.9,  90.3,  96.8, 103.2, 109.7,
          116.1, 122.6, 129. , 135.5, 141.9, 148.4, 154.8, 161.3, 167.8,
          174.2, 180.7, 187.1, 193.6, 200. , 206.5, 212.9, 219.4, 225.8,
          232.3, 238.7, 245.2, 251.6, 258.1, 264.5, 271. , 277.4, 283.9,
          290.3, 296.8, 303.2, 309.7, 316.1, 322.6, 329. , 335.5, 341.9,
          348.4, 354.8]),
   array([-3.30348346e-10, -6.64056242e-11,  1.54831825e-10,  8.92745467e-11,
          -1.61588004e-10,  7.96547175e-11, -9.83078334e-11,  5.72348884e-11,
           6.07723375e-11,  1.27150592e-11,  9.27525083e-11,  2.23095230e-10,
           2.44132679e-10,  1.99175401e-10, -5.79871500e-11,  1.32555572e-10,
           2.19593021e-10,  1.75935743e-10,  2.52973192e-10,  5.87159134e-11,
           9.12533625e-11,  4.07960842e-11, -7.89664667e-11,  9.39762551e-11,
           7.80137042e-11, -6.75435741e-11,  2.48599148e

In [14]:

save_sample_ion_to_txt(sample_ion,os.path.join(output_folder,'MS(t)'))
save_mass_spectra_with_pandas(sample_outgassing, os.path.join(output_folder,'MS'))
save_sample_ion_to_total_outgassing_txt(sample_ion,os.path.join(output_folder,'Total_outgassing'))
save_grouped_mass_spectra(sample_outgassing, os.path.join(output_folder,'MS_averaged'))
save_gouped_sample_ion_to_txt(sample_ion,os.path.join(output_folder,'MS(t)_averaged'))
save_grouped_sample_ion_to_total_outgassing_txt(sample_ion,os.path.join(output_folder,'Total_outgassing_averaged'))

Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/PEG 1.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/PEG 2.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/PHEMA 1.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/PHEMA 2.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/PS 1.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/PS 2.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/SU8 1.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/SU8 2.txt
Saved: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-ascii/MS(t)/ZEP 1.txt
Saved: /home/kas/

In [15]:
input_folder1 = output_folder+ "/TEY_normalized"
output_folder1 = output_folder_plots+"/TEY_normalized"

input_folder2 = output_folder+ "/TEY_normalized_averaged"
output_folder2 = output_folder_plots+"/TEY_normalized_averaged"

plot_ascii_files(input_folder1, output_folder1)
plot_ascii_files(input_folder2, output_folder2)
plot_MS(sample_outgassing,output_folder_plots+"/MS")
plot_MS_from_folder(os.path.join(output_folder,'MS_averaged'),output_folder_plots+"/MS_averaged")
plot_total_outgassing_from_folder(os.path.join(output_folder,'Total_outgassing_averaged'),output_folder_plots+"/Total_outgassing_averaged")

Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/PEG 2_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/PEG 1_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/PS 1_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/ZEP 1_TEY_normalized.png


Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/PS 2_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/PHEMA 2_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/SU8 2_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/PHEMA 1_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/SU8 1_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized/ZEP 2_TEY_normalized.png
Saved plot: /home/kas/Projects/accelerate_data_process/data example small//Analysis_results-plots/TEY_normalized_averaged/PS_TEY_normalized_averaged.png
Saved plot: /hom

In [None]:

def plot_ascii_files(input_folder, output_folder, extensions=(".txt", ".dat")):
    # input_folder1 = folder_path+ "/Analysis_results-ascii/TEY_normalized"
    # Create output folder if it doesn’t exist
    os.makedirs(output_folder, exist_ok=True)

    # Loop through files in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith(extensions):
            filepath = os.path.join(input_folder, filename)

            try:
                # Read file with pandas (handles tab/space delimiters)
                df = pd.read_csv(filepath, sep="\t")

                # Extract column names
                cols = df.columns.tolist()

                if len(cols) < 2:
                    print(f"Skipping {filename}, not enough columns")
                    continue

                x = df[cols[0]]
                y = df[cols[1]]

                plt.figure(figsize=(6,4))

                if len(cols) == 2:
                    plt.plot(x, y, linestyle="-", label=cols[1])
                elif len(cols) >= 3:
                    std = df[cols[2]]
                    plt.plot(x, y, color="blue", label=cols[1])
                    plt.fill_between(x, y-std, y+std, color="blue", alpha=0.3,
                                     label=f"{cols[1]} ± {cols[2]}")

                plt.xlabel(cols[0])
                plt.ylabel(cols[1])
                plt.title(filename)
                plt.legend()
                plt.tight_layout()

                # Save plot in output folder
                outpath = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}.png")
                plt.savefig(outpath, dpi=150)
                plt.close()
                print(f"Saved plot: {outpath}")

            except Exception as e:
                print(f"Could not process {filename}: {e}")
