## code to extract and save S.D and D.L. from summary file EMPA

## 1) Separate block

In [None]:
import re
import pandas as pd

file_path = './4-4-23.txt'  # Replace with the actual file path
file_name = file_path.replace("./", "")
file_name = file_path.replace(".txt", "")



# Initialize variables
sections = []
current_section = []

# Read the file line by line
with open(file_path, 'r') as file:
    lines = file.readlines()

# Identify and extract sections
for line in lines:
    if re.match(r'^Unknown Specimen', line):
        print("match: ", line)
        # Start of a new section
        if current_section:
            # Save the current section
            sections.append(current_section)
            current_section = []
    # Add line to the current section
    current_section.append(line.strip())

# Save the last section
if current_section:
    sections.append(current_section)

# Process each section
for section_content in sections:
    # Find the start and end indices for the desired block
    start_index = next((i for i, line in enumerate(section_content) if line.startswith('Element\tPeak(mm)')), None)
    end_index = next((i for i, line in enumerate(section_content) if line.startswith('Element\tf(chi)')), None)
    print("start =", start_index)
    print("end =", end_index)
    # Check if both start and end indices are found
    if start_index is not None and end_index is not None and end_index > start_index:
        # Extract the desired block
        block_content = section_content[start_index:end_index]

        # Extract the header dynamically from the second line
        header = block_content[0].split('\t')
        header.insert(0, "ID")

        try:
            # Create a DataFrame for the current block
            df = pd.DataFrame([re.split(r'\s+', line.strip()) for line in block_content[2:]], columns=header)

            # Print or further process the DataFrame
            print(df)
        except ValueError as e:
            print("ValueError:", e)
            print("Header:", header)
            print("Block content:", block_content)
    else:
        print("Start or end index not found or invalid in the section.")



## 2) S.D. extract

In [None]:
import re
import pandas as pd

#file_path = './test2.txt'  # Replace with the actual file path

# Initialize variables
sections = []
current_section = []
current_comment = None  # Variable to store the value in Comment line

# Read the file line by line
with open(file_path, 'r') as file:
    lines = file.readlines()

# Identify and extract sections
for line in lines:
    if re.match(r'^Unknown Specimen', line):
        print("match: ", line)
        # Start of a new section
        if current_section:
            # Save the current section
            sections.append((current_comment, current_section))
            current_section = []
    # Add line to the current section
    current_section.append(line.strip())
    if line:
        try:
            line = line.split("\t")
            #print("line ",line)
            if(line[2] == "Comment :"):
                #print("line_s[2]",line_s[2])
                # Keep track of the comment value
                current_comment= line[3]
                # Just a spacing line
                print("comm",current_comment)
        except IndexError:
            #print("no comm")
            pass

# Save the last section
if current_section:
    sections.append((current_comment, current_section))

# Initialize a dictionary to store lists of S.D.(%) values for each element
sd_values = {}
# Initialize a list to store Comment values
comment_list = []

# Process each section
for comment, section_content in sections:
    # Find the start and end indices for the desired block
    start_index = next((i for i, line in enumerate(section_content) if line.startswith('Element\tPeak(mm)')), None)
    end_index = next((i for i, line in enumerate(section_content) if line.startswith('Element\tf(chi)')), None)
    print("start =", start_index)
    print("end =", end_index)
    # Check if both start and end indices are found
    if start_index is not None and end_index is not None and end_index > start_index:
        # Extract the desired block
        block_content = section_content[start_index:end_index]

        # Extract the header dynamically from the second line
        header = block_content[0].split('\t')
        header.insert(0, "ID")

        try:
            # Create a DataFrame for the current block
            df = pd.DataFrame([re.split(r'\s+', line.strip()) for line in block_content[1:]], columns=header)

            # Extract S.D.(%) values for each element in the current block
            sd_column_values = df.set_index('Element')['S.D.(%)'].to_dict()

            # Update the sd_values dictionary with S.D.(%) values for the current block
            for element, sd_value in sd_column_values.items():
                sd_values.setdefault(element, []).append(sd_value)

            # Print or further process the DataFrame
            print(df)
        except ValueError as e:
            print("ValueError:", e)
            print("Header:", header)
            print("Block content:", block_content)
        # Add Comment value to the list
        comment_list.append(comment)
    else:
        print("Start or end index not found or invalid in the section.")

# Print the extracted lists of S.D.(%) values for each element
print("S.D.(%) values:")
for element, sd_value_list in sd_values.items():
    print(f"{element}: {sd_value_list}")


## 3) S.D. save

In [None]:
print("S.D.(%) values:")
print(sd_values)
for element, sd_value in sd_values.items():
    
    print(f"{element}: {sd_value}")

output_sd_file_path = 'sd_output.txt'
df_sd = pd.DataFrame.from_dict(sd_values)
df_sd.insert(0, 'Comment', comment_list)  # Insert the Comment column at the beginning
df_sd.to_csv(output_sd_file_path, index=False, sep='\t')
print(f'S.D. values saved to {output_sd_file_path}')

## 4)  D.L. extract

In [None]:
import re
import pandas as pd

#file_path = './test2.txt'  # Replace with the actual file path

# Initialize variables
sections = []
current_section = []
current_comment = None  # Variable to store the value in Comment line

# Read the file line by line
with open(file_path, 'r') as file:
    lines = file.readlines()

# Identify and extract sections
for line in lines:
    if re.match(r'^Unknown Specimen', line):
        print("match: ", line)
        # Start of a new section
        if current_section:
            # Save the current section
            sections.append((current_comment, current_section))
            current_section = []
    # Add line to the current section
    current_section.append(line.strip())
    if line:
        try:
            line = line.split("\t")
            #print("line ",line)
            if(line[2] == "Comment :"):
                #print("line_s[2]",line_s[2])
                # Keep track of the comment value
                current_comment= line[3]
                # Just a spacing line
                print("comm",current_comment)
        except IndexError:
            #print("no comm")
            pass

# Save the last section
if current_section:
    sections.append((current_comment, current_section))

# Initialize a dictionary to store lists of D.L. values for each element
dl_values = {}
# Initialize a list to store Comment values
comment_list = []

# Process each section
for comment, section_content in sections:
    # Find the start and end indices for the desired block
    start_index = next((i for i, line in enumerate(section_content) if line.startswith('Element\tPeak(mm)')), None)
    end_index = next((i for i, line in enumerate(section_content) if line.startswith('Element\tf(chi)')), None)
    print("start =", start_index)
    print("end =", end_index)
    # Check if both start and end indices are found
    if start_index is not None and end_index is not None and end_index > start_index:
        # Extract the desired block
        block_content = section_content[start_index:end_index]

        # Extract the header dynamically from the second line
        header = block_content[0].split('\t')
        header.insert(0, "ID")

        try:
            # Create a DataFrame for the current block
            df = pd.DataFrame([re.split(r'\s+', line.strip()) for line in block_content[1:]], columns=header)

            # Extract S.D.(%) values for each element in the current block
            dl_column_values = df.set_index('Element')['D.L.(ppm)'].to_dict()

            # Update the dl_values dictionary with D.L.(ppm) values for the current block
            for element, dl_value in dl_column_values.items():
                dl_values.setdefault(element, []).append(dl_value)

            # Print or further process the DataFrame
            print(df)
        except ValueError as e:
            print("ValueError:", e)
            print("Header:", header)
            print("Block content:", block_content)
        # Add Comment value to the list
        comment_list.append(comment)
    else:
        print("Start or end index not found or invalid in the section.")

# Print the extracted lists of S.D.(%) values for each element
print("D.L.(ppm) values:")
for element, dl_value_list in dl_values.items():
    print(f"{element}: {dl_value_list}")

## 5) D.L. save file

In [None]:
print("D.L. values:")
print(dl_values)
for element, dl_value in dl_values.items():
    
    print(f"{element}: {dl_value}")

output_dl_file_path = 'dl_output.txt'
df_dl = pd.DataFrame.from_dict(dl_values)
df_dl.insert(0, 'Comment', comment_list)  # Insert the Comment column at the beginning
df_dl.to_csv(output_dl_file_path, index=False, sep='\t')
print(f'D.L. values saved to {output_dl_file_path}')

## EXTRACT coordinates

In [41]:
# EXTRACT X and Y coodinates
file_in2 = open(file_path, "r")
lines = file_in2.readlines()
names = []
Xs = []
Ys = []
Zs = []

for i, line in enumerate(lines):
    line_s = line.split()
    #print(line_s)
    if line_s:
        try:
            if(line_s[4] == "Comment"):
                # Keep track of the comment value
                name = line_s[6]
                names.append(name)
                # Just a spacing line
            elif(line_s[0] == "Stage"):
                # Keep track of the comment value
                x = line_s[3]
                Xs.append(x)
                y = line_s[5]
                Ys.append(y)
                z = line_s[7]
                Zs.append(z)
                # Just a spacing line
        except IndexError:
            pass

import pandas as pd
# Just a spacing line
print("Sample: ",len(names))
print("X: ", len(Xs))
print("Y: ", len(Ys))
print("Z: ", len(Zs))
out = pd.DataFrame(
    {'Sample': names,
     'X':Xs,
     'Y':Ys,
     'Z':Zs
     })
# Just a spacing line
#print(out)

out.to_csv(file_name+'_coordinate.csv', index=False)

Sample:  16
X:  16
Y:  16
Z:  16


## FINISH