In [10]:
# Updated function to process each file for horizontal concatenation and sort by temperature
import os
import pandas as pd

# Directory containing the files (this needs to be set to the correct folder path)
out_file_name = "15dC6.csv"
input_path = "data/20221208 15dC6/"
output_path = input_path.replace("data", "result")
os.makedirs(output_path, exist_ok=True)

def process_and_sort_file_horizontal(file_path):
    # Extracting temperature and UV status from file name
    file_name = os.path.basename(file_path)
    temp, uv_status = file_name.split('.')[0].split(' ')

    # Reading the data from the file
    data = pd.read_csv(file_path, sep="\t", header=None)
    
    # Creating a new header with temperature and UV status
    new_header = [f"{temp}_{uv_status}_{col}" for col in ['Wavelength', 'Absorbance']]
    data.columns = new_header

    # Extracting temperature as a number
    try:
        numeric_temp = int(temp)  # Converting the temperature string to an integer
    except ValueError:
        numeric_temp = None  # If conversion fails, return None

    return data, numeric_temp

# List all txt files in the directory
files = [os.path.join(input_path, f) for f in os.listdir(input_path) if f.endswith('.txt')]

# Process each file and collect the data for horizontal concatenation
all_data_processed = [process_and_sort_file_horizontal(file) for file in files]

# Sorting data by temperature in descending order
all_data_sorted = sorted(all_data_processed, key=lambda x: x[1], reverse=True) if all_data_processed[0][1] is not None else all_data_processed

# Concatenating the data horizontally
all_data_horizontal_sorted = pd.concat([data for data, temp in all_data_sorted], axis=1)

# Saving the combined and sorted data to a new CSV file
all_data_horizontal_sorted.to_csv(output_path+out_file_name, index=False)

# 最も高温以外のwavelengthを消す
# Load the sorted horizontal data
sorted_horizontal_data = pd.read_csv(output_path+out_file_name)

# Identify the highest temperature dataset
highest_temp_column = sorted_horizontal_data.columns[0]  # Assuming the first column is from the highest temperature

# Remove 'Wavelength' columns except for the highest temperature dataset
columns_to_keep = [col for col in sorted_horizontal_data.columns if not col.endswith('Wavelength') or col == highest_temp_column]
reduced_data = sorted_horizontal_data[columns_to_keep]

# Saving the reduced data to a new CSV file
output_csv_reduced_path = "/mnt/data/combined_spectral_data_reduced.csv"
reduced_data.to_csv(output_path+"temp_col_reduced_"+out_file_name, index=False)


