In [13]:
import pandas as pd
import os
import re

In [14]:
def extract_temperature(filename):
    """
    Extract the numerical part of the temperature from the filename.
    Example: 'polishedYellow_30-3000cm-1_100%_532exc_15s_6K_point2' -> 6
    """
    match = re.search(r'(\d+)K', filename)
    return int(match.group(1)) if match else None

def extract_degree(filename):
    match = re.search(r'(\d+)_degree_polarizer', filename)
    return int(match.group(1)) if match else None

def determine_test_type(filename):
    if 'cm-1' in filename.lower() and '30-2200cm-1' in filename:
        return 'raman'
    elif 'nm' in filename.lower() and '550-630nm' in filename:
        return 'pl'
    else:
        return 'unknown'


In [11]:

# Your specified file path
directory = r'C:\Users\Q9gJYx\OneDrive - Duke University\Desktop\NaYb_polo_2m'

data_raman = {}
data_pl = {}

for filename in os.listdir(directory):
    if filename.endswith('.txt'):
        test_type = determine_test_type(filename)
        if test_type == 'unknown':
            continue

        temperature = extract_temperature(filename)
        file_path = os.path.join(directory, filename)

        data = pd.read_csv(file_path, sep='\t', header=None, names=['X', 'Y'])
        data.set_index('X', inplace=True)

        if test_type == 'raman':
            if temperature not in data_raman:
                data_raman[temperature] = []
            data_raman[temperature].append(data['Y'])
        elif test_type == 'pl':
            if temperature not in data_pl:
                data_pl[temperature] = []
            data_pl[temperature].append(data['Y'])

# Combine and average data with the same temperature
combined_data_raman = pd.DataFrame()
combined_data_pl = pd.DataFrame()

for temp, y_values in data_raman.items():
    combined_data_raman[temp] = pd.concat(y_values, axis=1).mean(axis=1)

for temp, y_values in data_pl.items():
    combined_data_pl[temp] = pd.concat(y_values, axis=1).mean(axis=1)

# Convert all column names to strings and then check if they are digits
combined_data_raman.columns = [int(str(col)) if str(col).isdigit() else col for col in combined_data_raman.columns]
combined_data_pl.columns = [int(str(col)) if str(col).isdigit() else col for col in combined_data_pl.columns]

# Sort the DataFrames by column names (which are now temperatures)
combined_data_raman = combined_data_raman.reindex(sorted(combined_data_raman.columns), axis=1)
combined_data_pl = combined_data_pl.reindex(sorted(combined_data_pl.columns), axis=1)

print("Sorted Raman Test Data:")
print(combined_data_raman)
print("\nSorted PL Test Data:")
print(combined_data_pl)

# After processing and printing the data
# Specify the Excel file name
excel_file_name = 'test_data_output1.xlsx'

# Create a Pandas Excel writer using openpyxl as the engine
with pd.ExcelWriter(excel_file_name, engine='openpyxl') as writer:
    # Write each DataFrame to a different worksheet
    combined_data_raman.to_excel(writer, sheet_name='Raman Test Data')
    combined_data_pl.to_excel(writer, sheet_name='PL Test Data')

print(f"Data exported to {excel_file_name}")

Sorted Raman Test Data:
Empty DataFrame
Columns: []
Index: []

Sorted PL Test Data:
Empty DataFrame
Columns: []
Index: []
Data exported to test_data_output1.xlsx


In [15]:

# Your specified file path
directory = r'C:\Users\Q9gJYx\OneDrive - Duke University\Desktop\NaYb_polo_2m'

data_raman = {}
data_pl = {}

for filename in os.listdir(directory):
    if filename.endswith('.txt'):
        
        degree = extract_degree(filename)
        file_path = os.path.join(directory, filename)

        data = pd.read_csv(file_path, sep='\t', header=None, names=['X', 'Y'])
        data.set_index('X', inplace=True)
        
        if degree not in data_raman:
            data_raman[degree] = []
        data_raman[degree].append(data['Y'])

# Combine and average data with the same temperature
combined_data_raman = pd.DataFrame()

for degree, y_values in data_raman.items():
    combined_data_raman[degree] = pd.concat(y_values, axis=1).mean(axis=1)

# Convert all column names to strings and then check if they are digits
combined_data_raman.columns = [int(str(col)) if str(col).isdigit() else col for col in combined_data_raman.columns]

# Sort the DataFrames by column names (which are now temperatures)
combined_data_raman = combined_data_raman.reindex(sorted(combined_data_raman.columns), axis=1)

print("Sorted Raman Test Data:")
print(combined_data_raman)

# After processing and printing the data
# Specify the Excel file name
excel_file_name = 'test_data_output1.xlsx'

# Create a Pandas Excel writer using openpyxl as the engine
with pd.ExcelWriter(excel_file_name, engine='openpyxl') as writer:
    # Write each DataFrame to a different worksheet
    combined_data_raman.to_excel(writer, sheet_name='Raman Test Data')

print(f"Data exported to {excel_file_name}")

Sorted Raman Test Data:
            NaN     0.0     20.0    40.0    60.0    80.0    100.0   120.0  \
X                                                                           
30.1612     660.0   686.0   666.0   610.0  1230.0   778.0   818.0  1022.0   
30.6877     860.0   920.0   868.0   788.0  1566.0  1010.0  1058.0  1364.0   
31.2163    1146.0  1184.0  1112.0  1028.0  2050.0  1370.0  1396.0  1938.0   
31.7428    1488.0  1554.0  1472.0  1376.0  2716.0  1824.0  1872.0  2452.0   
32.2692    1970.0  2024.0  1898.0  1736.0  3460.0  2378.0  2524.0  3176.0   
...           ...     ...     ...     ...     ...     ...     ...     ...   
1998.3600    86.0    70.0    58.0    62.0    68.0    72.0    70.0    60.0   
1998.7600    80.0    64.0    64.0    56.0    72.0    64.0    56.0    66.0   
1999.1600    76.0    78.0    52.0    62.0    74.0    62.0    62.0    62.0   
1999.5600    76.0    64.0    60.0    62.0    76.0    68.0    60.0    72.0   
1999.9600    78.0    68.0    64.0    60.0    62.0   

In [16]:
# Replace 'your_file.xlsx' with the path to your Excel file
input_file = r'test_data_output1.xlsx'
output_file = 'transposed_file.xlsx'

# Read the Excel file
df = pd.read_excel(input_file)

# Transpose the DataFrame
transposed_df = df.T

# Write the transposed DataFrame to a new Excel file
# Note: You need to reset the index if you want to keep the row labels as the first column in the output file.
transposed_df.reset_index().to_excel(output_file, index=False)

In [17]:
# Replace 'your_file.xlsx' with the path to your Excel file
input_file = r'transposed_file.xlsx'
output_file = 'processed_file.xlsx'

# Read the Excel file
df = pd.read_excel(input_file)

# Copy the first row (background)
background_row = df.iloc[0]

# Subtract the background row from all rows, including itself
# If you want to keep the background row unchanged, use df.iloc[1:] in the loop instead
for index, row in df.iterrows():
    df.iloc[index] = row - background_row

# If you don't want the first row to be the subtracted background row itself, you can start from the second row like this:
# df = df.iloc[1:].subtract(background_row, axis='columns')

# Write the processed DataFrame to a new Excel file
df.to_excel(output_file, index=False)


TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [18]:
# Replace 'your_file.xlsx' with the path to your Excel file
input_file = r'processed_file.xlsx'
output_file = 'resumed_file.xlsx'

# Read the Excel fileddddddddddddddddddd
df = pd.read_excel(input_file)

# Transpose the DataFrame
transposed_df = df.T

# Write the transposed DataFrame to a new Excel file
# Note: You need to reset the index if you want to keep the row labels as the first column in the output file.
transposed_df.reset_index().to_excel(output_file, index=False)