In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os, sys
sys.path.append('../')
import re
import torch
from utils.utils import *
from tqdm import tqdm
set_seed(42)

In [None]:
def extract_number(filename):
    match = re.search(r'output_(\d+)\.npy', filename)
    if match:
        return int(match.group(1))
    else:
        return float('inf')

#  Load the dataset

In [None]:
N_atoms = 1024
output_dir = '../data/output_atoms_1024_steps_2000000'

In [None]:
def preprocess(output_dir):
    for folder in tqdm(os.listdir(output_dir)):
        T = int(folder.split('_')[1])
        folder_path = os.path.join(output_dir, folder)
        log_path = os.path.join(folder_path, 'chemical_order.csv')
        df = pd.read_csv(log_path)
        
        # ========= macroscopic data =========
        macro_val = np.array([
            df['delta_NbNb'].values,
            df['delta_NbMo'].values,
            df['delta_NbTa'].values,
            df['delta_MoMo'].values,
            df['delta_MoTa'].values,
            df['delta_TaTa'].values
        ]).T

        # ========= T,steps,time data =========
        time_path = os.path.join(folder_path, 'log.csv')
        df_time = pd.read_csv(time_path)
        time = df_time['time'].values
        time = np.insert(time, 0, 0.0)  # Insert initial time step
        T_state = np.ones(macro_val.shape[0], dtype=np.int32) * int(T)
        step = df['step'].values

        config_path = os.path.join(folder_path, 'config_data')
        file_path = [f for f in os.listdir(config_path) if f.endswith('.npy')]
        file_path = sorted(file_path, key=extract_number)
        
        # ========= microscopic data =========
        # micro_val = []
        # for file in file_path:
        #     file_full_path = os.path.join(config_path, file)
        #     grid = np.load(file_full_path)
        #     micro_val.append(grid)
        # micro_val = np.array(micro_val)

        # ========= save data =========
        # np.save(os.path.join(folder_path, 'micro_val.npy'), micro_val)
        np.save(os.path.join(folder_path, 'macro_val.npy'), macro_val)
        np.save(os.path.join(folder_path, 'time.npy'), time)
        np.save(os.path.join(folder_path, 'T_state.npy'), T_state)
        np.save(os.path.join(folder_path, 'step.npy'), step)

In [None]:
preprocess(output_dir)

In [None]:

micro_state = [] 
macro_state = []
T_state = []
time_state = []
step_state = []

for folder in tqdm(os.listdir(output_dir)):
    T = int(folder.split('_')[1])
    folder_path = os.path.join(output_dir, folder)

    micro_state.append(np.load(os.path.join(folder_path, 'micro_val.npy')))
    macro_state.append(np.load(os.path.join(folder_path, 'macro_val.npy')))
    # macro_state_partial.append(np.load(os.path.join(folder_path, 'macro_val_partial.npy')))
    time_state.append(np.load(os.path.join(folder_path, 'time.npy')))
    T_state.append(np.load(os.path.join(folder_path, 'T_state.npy')))
    step_state.append(np.load(os.path.join(folder_path, 'step.npy')))


micro_state = np.stack(micro_state, axis=0)
macro_state = np.stack(macro_state, axis=0)
time_state = np.stack(time_state, axis=0)
T_state = np.stack(T_state, axis=0)
step_state = np.stack(step_state, axis=0)

In [None]:
# micro_state.shape, 
macro_state.shape, T_state.shape, time_state.shape, step_state.shape

In [None]:
final_time = {}
T_unique = np.unique(T_state[:, 0])
for T in T_unique:
    idx = np.where(T_state[:, 0] == T)[0]
    steps = np.unique(step_state[idx, -1])
    final_time[T.item()] = time_state[idx, -1].max().item() 

In [None]:
def scale_function(T):
    # Convert T to numpy array if it isn't already
    T = np.asarray(T)
    
    # Handle scalar case
    if T.ndim == 0:
        return 1 / final_time[int(T.item())]
    
    # Handle array case
    result = np.zeros_like(T, dtype=float)
    for i, temp in enumerate(T.flat):
        result.flat[i] = 1 / final_time[int(temp)]
    
    return result

In [None]:
final_time_scaled = {}
for key, val in final_time.items():
    # final_time_scaled[key] = scale_function(key) * val
    final_time_scaled[key] = scale_function(key) * val

In [None]:
plt.figure(figsize=(8,5))
plt.scatter(final_time_scaled.keys(), final_time_scaled.values(), label="data", color="C0")
plt.xlabel("Temperature (K)")
plt.ylabel("Final time (ps)")
plt.legend()
plt.grid(True, which="both", ls="--", alpha=0.5)
plt.show()

In [None]:
T = 800
indices = np.where(T_state[:, 0] == T)[0]
fig = plt.figure(figsize=(40, 6))
for i in range(6):
    axes = fig.add_subplot(1, 6, i+1)
    for j in indices:
        axes.plot(time_state[j], macro_state[j, :, i])
    axes.set_xlabel('Step')
    axes.set_ylabel(f'Delta_{i}')
    axes.hlines(0, 0, np.max(time_state[indices]), colors='black', linestyles='dashed', linewidth=1)
    axes.set_ylim(-3.5, 3.5)
    plt.yticks(fontsize=20)
    plt.grid()
plt.title(f'T = {T} K')
plt.tight_layout()

In [None]:
scale_value = scale_function(T_state)
time_state_scaled = time_state * scale_value

In [None]:
macro_state = torch.tensor(macro_state, dtype=torch.float32)
micro_state = torch.tensor(micro_state, dtype=torch.int32)
T_state = torch.tensor(T_state, dtype=torch.int32)
time_state = torch.tensor(time_state, dtype=torch.float32)
step_state = torch.tensor(step_state, dtype=torch.int32)
time_state_scaled = torch.tensor(time_state_scaled, dtype=torch.float32)

In [None]:
# save the data
save_dir = f'../data/atoms_{N_atoms}'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

# ========= save trainingdata =========
torch.save(micro_state, f'{save_dir}/micro_state.pt')
torch.save(macro_state, f'{save_dir}/macro_state.pt')
torch.save(T_state, f'{save_dir}/T_state.pt')
torch.save(time_state, f'{save_dir}/time_state.pt')
torch.save(step_state, f'{save_dir}/step_state.pt')
torch.save(time_state_scaled, f'{save_dir}/time_state_scaled.pt')
