In [None]:
import xml.etree.ElementTree as ET
import csv
import numpy as np
import os

def xml_to_csv(xml_file_path, csv_file_path):
    # Load and parse the XML file
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    data_list = []  # Initialize a list to collect all rows

    # Open the CSV file in write mode
    with open(csv_file_path, mode='w', newline='') as file:
        csv_writer = csv.writer(file)
        
        # Initialize a list to keep track of the headers
        headers = []
        
        # Process each element in the XML file
        for i, element in enumerate(root.findall('{Default}default')):
            # For the first element, determine the headers based on attribute keys
            if i == 0:
                headers = list(element.attrib.keys())
                csv_writer.writerow(headers)
                data_list.append(headers)  # Add headers to data list
            
            # Extract the attributes based on the headers
            row = [element.attrib.get(header, '') for header in headers]
            csv_writer.writerow(row)
            data_list.append(row)  # Collect each row in data list
    return np.array(data_list)

def getBCCclusters(data):

    #finding the column where the phase_name is stored
    phase_column = np.where(data[0]=='phase_name')[0]
    temperature_column = np.where(data[0]=='T')[0]
    Cr_column = np.where(data[0]=='x_x0028_Cr_x0029_')[0]
    Ti_column = np.where(data[0]=='x_x0028_Ti_x0029_')[0]
    V_column = np.where(data[0]=='x_x0028_V_x0029_')[0]
    W_column = np.where(data[0]=='x_x0028_W_x0029_')[0]

    # Handling phase_name column
    if phase_column.size == 1:
        #print(phase_column)
        phase_column=phase_column[0]
    elif phase_column.size > 1:
        exit("more than one phase_name column found")
    else:
        exit("phase_name column not found")

    # Handling temperature column
    if temperature_column.size == 1:
        #print(temperature_column)
        temperature_column=temperature_column[0]
    elif temperature_column.size > 1:
        exit("more than one temperature column found")
    else:
        exit("Temperature column not found")

    # Handling Cr column
    if Cr_column.size ==1 :
        #print(Cr_column)
        Cr_column=Cr_column[0]
    elif Cr_column.size > 1:
        exit("more than one Cr column found")
    else:
        print("Cr column not found")
        Cr_column=None

    # Handling Ti column
    if Ti_column.size ==1:
        #print(Ti_column)
        Ti_column=Ti_column[0]
    elif Ti_column.size > 1:
        exit("more than one Ti column found")
    else:
        print("Ti column not found")
        Ti_column=None

    # Handling V column
    if V_column.size == 1:
        #print(V_column)
        V_column=V_column[0]
    elif V_column.size > 1:
        exit("more than one V column found")
    else:
        print("V column not found")
        V_column=None

    # Handling W column
    if W_column.size ==1:
        #print(W_column)
        W_column=W_column[0]
    elif W_column.size > 1:
        exit("more than one W column found")
    else:
        print("W column not found")
        W_column=None

    # Assuming phases is your NumPy array
    phases = data[:,phase_column]  # Replace this with your actual array data

    # Find where the phase is 'BCC'
    is_bcc = phases == 'Bcc'

    # Find shifts in 'is_bcc' array (True to False or False to True)
    changes = np.diff(is_bcc.astype(int))

    # Start indices of BCC clusters (+1 because diff reduces index by 1)
    start_indices = np.where(changes == 1)[0] + 1

    # End indices of BCC clusters
    end_indices = np.where(changes == -1)[0]

    # If the phases start with 'Bcc', then prepend 0 to start_indices
    if is_bcc[0]:
        start_indices = np.insert(start_indices, 0, 0)

    # If the phases end with 'Bcc', then append the last index to end_indices
    if is_bcc[-1]:
        end_indices = np.append(end_indices, len(phases) - 1)

    if (len(start_indices)!=len(end_indices)):
        exit("Number of start and end indices do not match")
    else:
        num_clusters=len(start_indices)

    # Now you can print or process the clusters
    for start, end in zip(start_indices, end_indices):
        #print(f"BCC cluster from index {start} to {end}, Length: {end - start + 1}")

        # To access the actual 'BCC' cluster: double checking if the cluster is actually BCC
        # print(phases[start:end+1])
        
        # To access the temperature limits of the BCC cluster
        #print(f"limits of temperature where BCC solid solution is stable: {data[start, temperature_column]} to {data[end, temperature_column]}")

        # Accessing element concentrations conditionally
        Cr_percentage_start = 0 if Cr_column is None else float(data[start, Cr_column])
        Ti_percentage_start = 0 if Ti_column is None else float(data[start, Ti_column])
        V_percentage_start  = 0 if V_column  is None else float(data[start, V_column])
        W_percentage_start  = 0 if W_column  is None else float(data[start, W_column])

        Cr_percentage_end   = 0 if Cr_column is None else float(data[end,   Cr_column])
        Ti_percentage_end   = 0 if Ti_column is None else float(data[end,   Ti_column])
        V_percentage_end    = 0 if V_column  is None else float(data[end,   V_column])
        W_percentage_end    = 0 if W_column  is None else float(data[end,   W_column])

        low_temperature = np.min([float(data[start,temperature_column]),float(data[end,temperature_column])])
        high_temperature = np.max([float(data[start,temperature_column]),float(data[end,temperature_column])])

        if (Cr_percentage_start==Cr_percentage_end and Ti_percentage_start==Ti_percentage_end and V_percentage_start==V_percentage_end and W_percentage_start==W_percentage_end):
            clusters.append([num_clusters,Cr_percentage_start,Ti_percentage_start,V_percentage_start,W_percentage_start,low_temperature,high_temperature])
        else:
            print(Cr_percentage_start,Ti_percentage_start,V_percentage_start,W_percentage_start,data[start,temperature_column])
            print(Cr_percentage_end,Ti_percentage_end,V_percentage_end,W_percentage_end,data[end,temperature_column])
            exit("Element concentrations are not constant in the BCC cluster")

def getTableFiles(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    # lines is now a list where each element is a line in the file
    for i, line in enumerate(lines):
        lines[i]=line.strip()  # strip() removes the newline characters

    return(lines)

if __name__ == '__main__':

    clusters=[]

    fileNames=getTableFiles('table_list.txt')

    for i, file in enumerate(fileNames):

        if (1==1):

            # Define the path to your XML file and the output CSV file
            xml_file_path = file
            csv_file_path = file.replace('.table', '.csv')
            
            # Call the function to convert XML to CSV
            data_array = xml_to_csv(xml_file_path, csv_file_path)
            getBCCclusters(data_array)

    clusters=np.array(clusters)
    np.savetxt('clusters.csv',clusters,delimiter=',',header='# clusters , Cr [%] , Ti [%] , V [%] , W [%] , low_temperature [C] , high_temperature [C]',comments='')
    
