In [None]:
import pandas as pd
import tkinter as tk
from tkinter import filedialog, messagebox
from openpyxl import load_workbook

# Function to apply rules and save the file
def process_file(input_file, output_file):
    try:
        # Load the Excel file, skipping the first two rows (metadata) and using row 3 as the header
        df = pd.read_excel(input_file, skiprows=2, header=0, dtype=str, engine='openpyxl')

        # Debug: Print the first few rows to verify the structure
        print("First few rows of the file (after skipping metadata and using row 3 as header):")
        print(df.head())

        # Apply rules to each column
        # Rule 1: Column "Carcass preservation method" - Change to "N/A" if not already
        df['Carcass preservation method'] = 'N/A'

        # Rule 2: Column "Project" - Ensure it is always "NBISC-BTSC"
        df['Project'] = 'NBISC-BTSC'

        # Rule 3: Column "Radiation beam type" - Change "=NA()" or "#N/A" or NaN to "sham"
        df['Radiation beam type'] = df['Radiation beam type'].replace({
            '=NA()': 'sham',  # Replace string "=NA()"
            '#N/A': 'sham',   # Replace string "#N/A"
            pd.NA: 'sham',    # Replace pandas NA (missing values)
            'NaN': 'sham',    # Replace string "NaN"
            '': 'sham'        # Replace empty strings
        })


        # Rule 5: Column "Sample storage temperature" - Change "Irrelevant" to "N/A"
        df['Sample storage temperature'] = df['Sample storage temperature'].replace('Irrelevant', 'N/A')

        # Load the original workbook to preserve metadata
        workbook = load_workbook(input_file)
        sheet = workbook.active

        # Overwrite rows starting from row 4 with the modified data
        for i, row in enumerate(df.itertuples(index=False), start=4):  # Start from row 4
            for j, value in enumerate(row, start=1):
                sheet.cell(row=i, column=j, value=value)

        # Save the modified workbook
        workbook.save(output_file)
        messagebox.showinfo("Success", f"File processed and saved as {output_file}")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred: {e}")

# Function to handle file selection and processing
def select_file():
    # Prompt user to select the input Excel file
    input_file = filedialog.askopenfilename(
        title="Select Input Excel File",
        filetypes=[("Excel files", "*.xlsx *.xls")]
    )
    if not input_file:
        return

    # Prompt user to specify the output file name and location
    output_file = filedialog.asksaveasfilename(
        title="Save As",
        defaultextension=".xlsx",
        filetypes=[("Excel files", "*.xlsx")]
    )
    if not output_file:
        return

    # Process the file
    process_file(input_file, output_file)

# Create the GUI
root = tk.Tk()
root.title("Excel File Processor")

# Add a button to trigger file selection
select_button = tk.Button(root, text="Select Excel File and Process", command=select_file)
select_button.pack(pady=20)

# Run the GUI
root.mainloop()

2025-02-13 14:17:05.127 python[8764:2643684] +[IMKClient subclass]: chose IMKClient_Modern
2025-02-13 14:17:05.127 python[8764:2643684] +[IMKInputSession subclass]: chose IMKInputSession_Modern
2025-02-13 14:17:06.933 python[8764:2643684] The class 'NSOpenPanel' overrides the method identifier.  This method is implemented by class 'NSWindow'
2025-02-13 14:17:37.040 python[8764:2643684] The class 'NSSavePanel' overrides the method identifier.  This method is implemented by class 'NSWindow'


First few rows of the file (after skipping metadata and using row 3 as header):
      Project Carcass preservation method Sample storage temperature  \
0  NBISC-BTSC                         NaN                 Irrelevant   
1  NBISC-BTSC                         NaN                 Irrelevant   
2  NBISC-BTSC                         NaN                 Irrelevant   
3  NBISC-BTSC                         NaN                 Irrelevant   
4  NBISC-BTSC                         NaN                 Irrelevant   

  Radiation beam type Unnamed: 4  
0                 NaN        NaN  
1                 NaN        NaN  
2                 NaN        NaN  
3                 NaN        NaN  
4                 NaN        NaN  
