In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/newdata

/content/drive/MyDrive/newdata


In [3]:
import sys
from pathlib import Path
from tqdm import tqdm
from pandas import HDFStore
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, RepeatVector




In [4]:
directory_path = '/content/drive/MyDrive/newdata'
file_list = os.listdir(directory_path)
file_list

['mod01_out02_doe05_run001.akh5',
 'mod01_out02_doe05_run002.akh5',
 'mod01_out02_doe05_run003.akh5',
 'mod01_out02_doe05_run004.akh5',
 'mod01_out02_doe05_run005.akh5',
 'mod01_out02_doe05_run006.akh5',
 'mod01_out02_doe05_run007.akh5',
 'mod01_out02_doe05_run008.akh5',
 'mod01_out02_doe05_run009 (1).akh5',
 'mod01_out02_doe05_run010.akh5',
 'mod01_out02_doe05_run011.akh5',
 'mod01_out02_doe05_run012.akh5',
 'mod01_out02_doe05_run013.akh5',
 'mod01_out02_doe05_run014.akh5',
 'mod01_out02_doe05_run015.akh5',
 'mod01_out02_doe05_run016.akh5',
 'mod01_out02_doe05_run017.akh5',
 'mod01_out02_doe05_run018.akh5',
 'mod01_out02_doe05_run019.akh5',
 'mod01_out02_doe05_run020.akh5',
 'mod01_out02_doe05_run021.akh5',
 'mod01_out02_doe05_run022.akh5',
 'mod01_out02_doe05_run023.akh5',
 'mod01_out02_doe05_run024.akh5',
 'mod01_out02_doe05_run025.akh5',
 'mod01_out02_doe05_run026.akh5',
 'mod01_out02_doe05_run027.akh5',
 'mod01_out02_doe05_run028.akh5',
 'mod01_out02_doe05_run029.akh5',
 'mod01_ou

In [5]:
def generate_data(obj):
    mass = obj.data['mass']
    shell_thickness = obj.data['SHELL_THICKNESS']
    data_y = obj.data['NODE_COOR_Z']
    y_coordinates = data_y.iloc[:1, :]
    output_sequence = data_y.diff(axis=0).iloc[1:, :]
    return mass, shell_thickness, y_coordinates, output_sequence



In [6]:
def preprocess_feature_engineering(mass, shell_thickness, y_coordinates, output_sequence):

    scaler_mass = StandardScaler()
    scaler_shell_thickness = StandardScaler()
    scaler_y_coordinates = StandardScaler()

    mass_scaled = scaler_mass.fit_transform(mass.values.reshape(-1, 1))
    shell_thickness_scaled = scaler_shell_thickness.fit_transform(shell_thickness.values.reshape(-1, 1))
    y_coordinates_scaled = scaler_y_coordinates.fit_transform(y_coordinates)


    scaler_minmax = MinMaxScaler()
    y_coordinates_minmax = scaler_minmax.fit_transform(y_coordinates)



    return mass_scaled, shell_thickness_scaled, y_coordinates_scaled, output_sequence


In [7]:
from pathlib import Path
import shutil
import h5py
import numpy as np

import pandas as pd
from pandas import Series as Ds
from pandas import DataFrame as Df

import plotly.graph_objects as go
import plotly.express as px



def make_submission_folder(submission_template_folder=None, model_name=None):

    src = str(submission_template_folder)
    dst = Path(src).parent.joinpath(model_name)
    shutil.copytree(src, dst, dirs_exist_ok=True)
    return



class AkH5file():
    ''' AKH5 file is a hdf5 file containing node coordinates,
    element connectivities and attributes as function of time.
    This class provides methods to read and update coordinates of the file.
    '''
    _fig_height = 400
    _fig_width  = 400


    def __init__(self,file_path) -> None:
        self.root_name = 'All_Nodes'
        self.file_path = file_path
        self.file_name = Path(file_path).name

        self._open_file()
        self._read_all_datasets()
        self._close_file()


    def _open_file(self,mode='r'):
        self._h5file = h5py.File(self.file_path, mode=mode)
        self._root   = self._h5file[self.root_name]


    def _close_file(self):
        self._h5file.close()


    def _read_all_datasets(self):
        ''' Reading all data sets contained under All_Nodes group'''
        dataset_names = list(self._h5file[self.root_name].keys())
        self.data={}
        for name in dataset_names:
            self.data[name]=Df(self._root[name])


    @property
    def summary(self):
        summary = Ds(dtype=object)
        for name, dset in self.data.items():
            summary[name] = dset.shape
        return summary



    def __repr__(self) -> str:
        repr = self.summary.to_string()
        return repr


    def write_node_coords(self, coords_X=None , coords_Y=None, coords_Z=None):
        ''' Updates NODE_COOR_X Y and Z tables in ahh5 file.
        Inputs coords_x y z shape should be (n_time * n_nodes)
        '''
        self._open_file(mode='r+')
        # opts={'compression':'gzip','compression_opts':0}
        opts ={}

        del self._root['TIME']
        self._root.create_dataset(name='TIME',data=np.arange(0,82.5,2.5),**opts) # Writing in h5 file
        self.data['TIME']  = Df(np.arange(0,82.5,2.5)) # Just to keep data field in sync

        del self._root['NODE_COOR_X']
        self._root.create_dataset(name='NODE_COOR_X',data=coords_X,**opts) # Writing in h5 file
        self.data['NODE_COOR_X']  = Df(coords_X) # Just to keep data field in sync

        del self._root['NODE_COOR_Y']
        self._root.create_dataset(name='NODE_COOR_Y',data=coords_Y ,**opts) # Writing in h5 file
        self.data['NODE_COOR_Y']  = Df(coords_Y) # Just to keep data field in sync

        del self._root['NODE_COOR_Z']
        self._root.create_dataset(name='NODE_COOR_Z',data=coords_Z,**opts) # Writing in h5 file
        self.data['NODE_COOR_Z']  = Df(coords_Z) # Just to keep data field in sync

        self._close_file()


    def get_signal_rel_dist_nodes(self,node_id,node_id_ref,remove_offset=False):
        ''' Returns distance between nodes as function of time '''
        idx_nod = (self.data['DEFNODE']==node_id).values.ravel()
        idx_ref = (self.data['DEFNODE']==node_id_ref).values.ravel()

        X_nod = self.data['NODE_COOR_X'].values[:,idx_nod].ravel()
        Y_nod = self.data['NODE_COOR_Y'].values[:,idx_nod].ravel()
        Z_nod = self.data['NODE_COOR_Z'].values[:,idx_nod].ravel()

        X_ref = self.data['NODE_COOR_X'].values[:,idx_ref].ravel()
        Y_ref = self.data['NODE_COOR_Y'].values[:,idx_ref].ravel()
        Z_ref = self.data['NODE_COOR_Z'].values[:,idx_ref].ravel()

        X_vec = X_nod - X_ref
        Y_vec = Y_nod - Y_ref
        Z_vec = Z_nod - Z_ref
        res = (X_vec**2 + Y_vec**2 + Z_vec**2 )**.5
        if remove_offset:
            res = res - res[0]
        return res


    def plot3d(self,time=0):
        ''' Plots 3d node positions at a given time.
        The closest time is used for display
        '''
        # Display options
        opts_markers = dict(mode='markers', marker_symbol='square', marker_size=2, opacity=0.2, marker_color='green')
        opts_axis   = dict(showgrid=False, title='',showline=False,zeroline=False,ticks = '',
                           showticklabels = False, autorange=True)

        times_all    = self.data['TIME'].copy()
        times_all.columns = ['time']
        time_closest = self._filter_closest_df(times_all,'time',time)['time']
        time_index   = time_closest.index[0]

        x = self.data['NODE_COOR_X'].loc[time_index,:]
        y = self.data['NODE_COOR_Y'].loc[time_index,:]
        z = self.data['NODE_COOR_Z'].loc[time_index,:]

        # Mapping node id to index
        self.map_idx_uid_nodes = self.data['DEFNODE'].to_dict()[0]
        self.map_uid_idx_nodes = {v:k for k,v in self.map_idx_uid_nodes.items()}

        conn1 = self.data['SHELL2NODE'].iloc[:,[1,2,3]].applymap(lambda x: self.map_uid_idx_nodes[x])
        conn2 = self.data['SHELL2NODE'].iloc[:,[3,4,1]].applymap(lambda x: self.map_uid_idx_nodes[x])
        conn2.columns = conn1.columns
        conn = pd.concat([conn1,conn2])

        i= conn.iloc[:,0]
        j= conn.iloc[:,1]
        k= conn.iloc[:,2]

        geom = go.Mesh3d( x=x, y=y, z=z,  i=i, j=j, k=k,opacity=0.3)
        fig  = go.Figure(data=[geom])

        fig.update_scenes(aspectmode='data', xaxis=opts_axis, yaxis=opts_axis, zaxis=opts_axis)
        fig.update_layout({'scene':{"camera": {"projection":{"type": "orthographic"}}}})
                                                # {"type": "orthographic"}}}})
        fig.update_layout(height=self._fig_height, width=self._fig_width, margin=dict(l=0, r=0, t=35, b=0),
                          title=f'{self.file_name} : {time} ms')
        fig.update_layout(scene_camera=dict(eye=dict(x=2.5, y=0, z=0)))

        return fig


    def plot3danim(self,times=(0,200),skip=1):
        ''' Plots 3d animated mmesh.
        '''
        # Time frames to display
        time_values = self.data['TIME'].copy()
        time_values.columns=['time']
        time_values = time_values.query('@times[0] <= time <= @times[1]')['time'].values[::skip]

        # Spatial ranges
        x_range = [self.data['NODE_COOR_X'].min().min(),self.data['NODE_COOR_X'].max().max()]
        y_range = [self.data['NODE_COOR_Y'].min().min(),self.data['NODE_COOR_Y'].max().max()]
        z_range = [self.data['NODE_COOR_Z'].min().min(),self.data['NODE_COOR_Z'].max().max()]

        # Creating figure and frames
        fig  = go.Figure( data   = [self.plot3d(time=time_values[0]).data[0]],
                          frames = [go.Frame(data=[self.plot3d(time=t).data[0]],
                                    name=f'time{t:.2f}') for t in time_values])

        # Customise figure aspect
        opts_axis   = dict(showgrid=False, title='',showline=False,zeroline=False,ticks = '',
                           showticklabels = False, autorange=False)
        opts_axis_x = dict(range=x_range) | opts_axis
        opts_axis_y = dict(range=y_range) | opts_axis
        opts_axis_z = dict(range=z_range) | opts_axis

        fig.update_scenes(aspectmode='data', xaxis=opts_axis_x, yaxis=opts_axis_y, zaxis=opts_axis_z)
        fig.update_layout(height=self._fig_height, width=self._fig_width, margin=dict(l=0, r=0, t=35, b=0))
        fig.update_layout({'scene':{"camera": {"projection": {"type": "orthographic"}}}})
        fig.update_layout(scene_camera=dict(eye=dict(x=2.5, y=0, z=0)))

        # Create animation control
        fig.update_layout(updatemenus=[dict(type="buttons",
            buttons=[dict(label="Play",method="animate",
                    args=[None, {"frame": {"duration": 1, "redraw": True},
                    "fromcurrent": True, "transition": {"duration": 0}}])])])

        return fig




    # Helper functions --------------------------

    @staticmethod
    def _filter_closest_df(df,filtering_column,target_value):
        ''' Helper function to select dataframe rows where the filtering_column value is the closest to the target value'''
        closest_value = df.iloc[(df[filtering_column]-target_value).abs().argsort()[:1]][filtering_column].values[0]
        df_selection  = df[df[filtering_column]==closest_value]
        return df_selection


In [8]:
for afile in file_list:
    print(afile)

mod01_out02_doe05_run001.akh5
mod01_out02_doe05_run002.akh5
mod01_out02_doe05_run003.akh5
mod01_out02_doe05_run004.akh5
mod01_out02_doe05_run005.akh5
mod01_out02_doe05_run006.akh5
mod01_out02_doe05_run007.akh5
mod01_out02_doe05_run008.akh5
mod01_out02_doe05_run009 (1).akh5
mod01_out02_doe05_run010.akh5
mod01_out02_doe05_run011.akh5
mod01_out02_doe05_run012.akh5
mod01_out02_doe05_run013.akh5
mod01_out02_doe05_run014.akh5
mod01_out02_doe05_run015.akh5
mod01_out02_doe05_run016.akh5
mod01_out02_doe05_run017.akh5
mod01_out02_doe05_run018.akh5
mod01_out02_doe05_run019.akh5
mod01_out02_doe05_run020.akh5
mod01_out02_doe05_run021.akh5
mod01_out02_doe05_run022.akh5
mod01_out02_doe05_run023.akh5
mod01_out02_doe05_run024.akh5
mod01_out02_doe05_run025.akh5
mod01_out02_doe05_run026.akh5
mod01_out02_doe05_run027.akh5
mod01_out02_doe05_run028.akh5
mod01_out02_doe05_run029.akh5
mod01_out02_doe05_run030.akh5
mod01_out02_doe05_run031.akh5
mod01_out02_doe05_run032.akh5
mod01_out02_doe05_run033.akh5
mod01_

In [9]:
import numpy as np

def pad_array(arr, target_length):
    if len(arr) < target_length:
        return np.pad(arr, (0, target_length - len(arr)), constant_values=0)
    elif len(arr) > target_length:
        return arr[:target_length]
    else:
        return arr

processed_dataframes = []

for afile in tqdm(file_list, desc="Processing Datasets"):
    obj = AkH5file(afile)
    mass, shell_thickness, y_coordinates, output_sequence = generate_data(obj)


    mass_scaled, shell_thickness_scaled, y_coordinates_scaled, output_sequence = \
        preprocess_feature_engineering(mass, shell_thickness, y_coordinates, output_sequence)


    max_length = max(len(arr) for arr in [mass_scaled, shell_thickness_scaled, y_coordinates_scaled, output_sequence.values])


    mass_scaled = pad_array(mass_scaled.flatten(), max_length)
    shell_thickness_scaled = pad_array(shell_thickness_scaled.flatten(), max_length)
    y_coordinates_scaled = pad_array(y_coordinates_scaled.flatten(), max_length)
    output_sequence = pad_array(output_sequence.values.flatten(), max_length)


    df = pd.DataFrame({
        'mass_scaled': mass_scaled,
        'shell_thickness_scaled': shell_thickness_scaled,
        'y_coordinates_scaled': y_coordinates_scaled,
        'output_sequence': output_sequence,

    })

    processed_dataframes.append(df)


    print(len(mass_scaled), len(shell_thickness_scaled), len(y_coordinates_scaled), len(output_sequence))


Processing Datasets:   2%|▏         | 1/60 [00:01<01:23,  1.41s/it]

15718 15718 15718 15718


Processing Datasets:   3%|▎         | 2/60 [00:03<01:33,  1.60s/it]

15718 15718 15718 15718


Processing Datasets:   5%|▌         | 3/60 [00:05<01:45,  1.85s/it]

15718 15718 15718 15718


Processing Datasets:   7%|▋         | 4/60 [00:07<01:54,  2.05s/it]

15718 15718 15718 15718


Processing Datasets:   8%|▊         | 5/60 [00:09<01:46,  1.94s/it]

15718 15718 15718 15718


Processing Datasets:  10%|█         | 6/60 [00:10<01:34,  1.75s/it]

15718 15718 15718 15718


Processing Datasets:  12%|█▏        | 7/60 [00:17<02:50,  3.23s/it]

15718 15718 15718 15718


Processing Datasets:  13%|█▎        | 8/60 [00:18<02:25,  2.80s/it]

15718 15718 15718 15718


Processing Datasets:  15%|█▌        | 9/60 [00:20<02:10,  2.56s/it]

15718 15718 15718 15718


Processing Datasets:  17%|█▋        | 10/60 [00:22<01:53,  2.27s/it]

15718 15718 15718 15718


Processing Datasets:  18%|█▊        | 11/60 [00:23<01:38,  2.01s/it]

15718 15718 15718 15718


Processing Datasets:  20%|██        | 12/60 [00:25<01:26,  1.81s/it]

15718 15718 15718 15718


Processing Datasets:  22%|██▏       | 13/60 [00:27<01:22,  1.76s/it]

15718 15718 15718 15718


Processing Datasets:  23%|██▎       | 14/60 [00:28<01:19,  1.73s/it]

15718 15718 15718 15718


Processing Datasets:  25%|██▌       | 15/60 [00:30<01:13,  1.62s/it]

15718 15718 15718 15718


Processing Datasets:  27%|██▋       | 16/60 [00:32<01:22,  1.87s/it]

15718 15718 15718 15718


Processing Datasets:  28%|██▊       | 17/60 [00:34<01:19,  1.84s/it]

15718 15718 15718 15718


Processing Datasets:  30%|███       | 18/60 [00:36<01:17,  1.85s/it]

15718 15718 15718 15718


Processing Datasets:  32%|███▏      | 19/60 [00:37<01:12,  1.76s/it]

15718 15718 15718 15718


Processing Datasets:  33%|███▎      | 20/60 [00:39<01:06,  1.65s/it]

15718 15718 15718 15718


Processing Datasets:  35%|███▌      | 21/60 [00:42<01:29,  2.30s/it]

15718 15718 15718 15718


Processing Datasets:  37%|███▋      | 22/60 [00:44<01:17,  2.03s/it]

15718 15718 15718 15718


Processing Datasets:  38%|███▊      | 23/60 [00:46<01:12,  1.97s/it]

15718 15718 15718 15718


Processing Datasets:  40%|████      | 24/60 [00:47<01:09,  1.93s/it]

15718 15718 15718 15718


Processing Datasets:  42%|████▏     | 25/60 [00:49<01:03,  1.83s/it]

15718 15718 15718 15718


Processing Datasets:  43%|████▎     | 26/60 [00:50<00:57,  1.70s/it]

15718 15718 15718 15718


Processing Datasets:  45%|████▌     | 27/60 [00:52<00:57,  1.74s/it]

15718 15718 15718 15718


Processing Datasets:  47%|████▋     | 28/60 [00:54<00:53,  1.67s/it]

15718 15718 15718 15718


Processing Datasets:  48%|████▊     | 29/60 [00:55<00:49,  1.60s/it]

15718 15718 15718 15718


Processing Datasets:  50%|█████     | 30/60 [00:57<00:47,  1.57s/it]

15718 15718 15718 15718


Processing Datasets:  52%|█████▏    | 31/60 [00:58<00:44,  1.52s/it]

15718 15718 15718 15718


Processing Datasets:  53%|█████▎    | 32/60 [01:00<00:46,  1.67s/it]

15718 15718 15718 15718


Processing Datasets:  55%|█████▌    | 33/60 [01:02<00:44,  1.65s/it]

15718 15718 15718 15718


Processing Datasets:  57%|█████▋    | 34/60 [01:03<00:41,  1.61s/it]

15718 15718 15718 15718


Processing Datasets:  58%|█████▊    | 35/60 [01:05<00:39,  1.57s/it]

15718 15718 15718 15718


Processing Datasets:  60%|██████    | 36/60 [01:06<00:35,  1.48s/it]

15718 15718 15718 15718


Processing Datasets:  62%|██████▏   | 37/60 [01:08<00:34,  1.50s/it]

15718 15718 15718 15718


Processing Datasets:  63%|██████▎   | 38/60 [01:09<00:32,  1.49s/it]

15718 15718 15718 15718


Processing Datasets:  65%|██████▌   | 39/60 [01:10<00:30,  1.46s/it]

15718 15718 15718 15718


Processing Datasets:  67%|██████▋   | 40/60 [01:16<00:52,  2.61s/it]

15718 15718 15718 15718


Processing Datasets:  68%|██████▊   | 41/60 [01:17<00:43,  2.27s/it]

15718 15718 15718 15718


Processing Datasets:  70%|███████   | 42/60 [01:19<00:36,  2.01s/it]

15718 15718 15718 15718


Processing Datasets:  72%|███████▏  | 43/60 [01:20<00:31,  1.88s/it]

15718 15718 15718 15718


Processing Datasets:  73%|███████▎  | 44/60 [01:22<00:28,  1.78s/it]

15718 15718 15718 15718


Processing Datasets:  75%|███████▌  | 45/60 [01:23<00:24,  1.65s/it]

15718 15718 15718 15718


Processing Datasets:  77%|███████▋  | 46/60 [01:28<00:37,  2.71s/it]

15718 15718 15718 15718


Processing Datasets:  78%|███████▊  | 47/60 [01:30<00:30,  2.32s/it]

15718 15718 15718 15718


Processing Datasets:  80%|████████  | 48/60 [01:31<00:24,  2.02s/it]

15718 15718 15718 15718


Processing Datasets:  82%|████████▏ | 49/60 [01:32<00:20,  1.85s/it]

15718 15718 15718 15718


Processing Datasets:  83%|████████▎ | 50/60 [01:34<00:17,  1.76s/it]

15718 15718 15718 15718


Processing Datasets:  85%|████████▌ | 51/60 [01:35<00:14,  1.67s/it]

15718 15718 15718 15718


Processing Datasets:  87%|████████▋ | 52/60 [01:37<00:12,  1.62s/it]

15718 15718 15718 15718


Processing Datasets:  88%|████████▊ | 53/60 [01:39<00:12,  1.75s/it]

15718 15718 15718 15718


Processing Datasets:  90%|█████████ | 54/60 [01:41<00:10,  1.72s/it]

15718 15718 15718 15718


Processing Datasets:  92%|█████████▏| 55/60 [01:42<00:08,  1.62s/it]

15718 15718 15718 15718


Processing Datasets:  93%|█████████▎| 56/60 [01:44<00:06,  1.61s/it]

15718 15718 15718 15718


Processing Datasets:  95%|█████████▌| 57/60 [01:45<00:04,  1.60s/it]

15718 15718 15718 15718


Processing Datasets:  97%|█████████▋| 58/60 [01:47<00:03,  1.53s/it]

15718 15718 15718 15718


Processing Datasets:  98%|█████████▊| 59/60 [01:48<00:01,  1.54s/it]

15718 15718 15718 15718


Processing Datasets: 100%|██████████| 60/60 [01:50<00:00,  1.84s/it]

15718 15718 15718 15718





In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, RepeatVector, TimeDistributed, Dense


X_train_list = []
y_train_list = []

In [None]:
max_length = max(len(df) for df in processed_dataframes)


for df in processed_dataframes:
    df_padded = df.copy()
    for col in df.columns:
        df_padded[col] = np.pad(df[col].values, (0, max_length - len(df[col])), constant_values=0)

    X_train_list.append(df_padded[['mass_scaled', 'shell_thickness_scaled', 'y_coordinates_scaled']])
    y_train_list.append(df_padded['output_sequence'])


In [None]:
X_train = np.array([X.values for X in X_train_list])
y_train = np.array([y.values for y in y_train_list])

In [None]:
y_train

array([[-0.07568741, -0.05715084, -0.05715048, ...,  0.87127686,
         1.2034302 ,  1.4001465 ],
       [-0.11901188, -0.10325432, -0.10325468, ...,  0.62213135,
         1.02771   ,  1.2298584 ],
       [-0.08788395, -0.07227612, -0.07227612, ...,  0.5354614 ,
         0.76068115,  0.93078613],
       ...,
       [-0.07354832, -0.05494118, -0.05494106, ...,  0.79455566,
         1.0390015 ,  1.1972046 ],
       [-0.09612083, -0.07766151, -0.07766187, ...,  0.9920044 ,
         1.3783569 ,  1.5652466 ],
       [-0.09513283, -0.07666206, -0.07666254, ...,  0.95635986,
         1.2802734 ,  1.4658813 ]], dtype=float32)

In [None]:
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import numpy as np
from tensorflow.keras import backend as K

# Define or import processed_dataframes, X_train_list, y_train_list, max_length

# Reshape and pad the data
X_train_padded = []
max_length = max(len(df) for df in processed_dataframes)

for df in processed_dataframes:
    df_padded = df.copy()
    for col in df.columns:
        df_padded[col] = np.pad(df[col].values, (0, max_length - len(df[col])), constant_values=0)

    X_train_padded.append(df_padded[['mass_scaled', 'shell_thickness_scaled', 'y_coordinates_scaled']])

X_train_padded = np.array([X.values for X in X_train_padded])

# Define the number of epochs and batch size
epochs = 100  # Change the number of epochs as needed
batch_size = 10

# Generator
latent_dim = 100
generator_input = Input(shape=(latent_dim,))
x = Dense(128, activation='relu')(generator_input)
x = Dense(256, activation='relu')(x)
generator_output = Dense(max_length * X_train_padded.shape[2], activation='linear')(x)
generator_output = Reshape((max_length, X_train_padded.shape[2]))(generator_output)
generator = Model(generator_input, generator_output)

# Discriminator
discriminator_input = Input(shape=(max_length, X_train_padded.shape[2]))
x = Flatten()(discriminator_input)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
discriminator_output = Dense(1, activation='sigmoid')(x)
discriminator = Model(discriminator_input, discriminator_output)
discriminator.compile(optimizer=Adam(learning_rate=0.0002), loss='mse')  # Change loss to 'mse'

# Combined Model (GAN)
gan_input = Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = Model(gan_input, gan_output)
gan.compile(optimizer=Adam(learning_rate=0.0002), loss='mse')  # Change loss to 'mse'

# Training Loop
for epoch in range(epochs):
    print(f'Epoch {epoch + 1}/{epochs}:')
    for i in range(0, len(X_train_padded), batch_size):
        X_train_batch = X_train_padded[i:i+batch_size]

        # Sample random noise as input to the generator
        noise = np.random.normal(0, 1, (len(X_train_batch), latent_dim))

        # Generate a batch of new sequences
        generated_sequences = generator.predict(noise)

        # Reshape X_train_batch to match the shape of generated_sequences
        X_train_batch_reshaped = X_train_batch.reshape(-1, max_length, X_train_padded.shape[2])

        # Combine generated sequences with real sequences
        X_combined = np.concatenate([X_train_batch_reshaped, generated_sequences])
        y_combined = np.concatenate([np.ones((len(X_train_batch_reshaped), 1)), np.zeros((len(generated_sequences), 1))])

        # Train discriminator
        d_loss = discriminator.train_on_batch(X_combined, y_combined)

        # Train generator (via GAN)
        noise = np.random.normal(0, 1, (len(X_train_batch_reshaped), latent_dim))
        y_gan = np.ones((len(X_train_batch_reshaped), 1))
        g_loss = gan.train_on_batch(noise, y_gan)

    # Print progress after each epoch
    print(f'Discriminator Loss: {d_loss}, Generator Loss: {g_loss}')

    # Clear session to release memory
    K.clear_session()


Epoch 1/100:
Discriminator Loss: 0.25961408019065857, Generator Loss: 0.11466202884912491
Epoch 2/100:
Discriminator Loss: 0.40659254789352417, Generator Loss: 0.02945793606340885
Epoch 3/100:
Discriminator Loss: 0.45734113454818726, Generator Loss: 0.008169583044946194
Epoch 4/100:
Discriminator Loss: 0.4602012634277344, Generator Loss: 0.008130321279168129
Epoch 5/100:
Discriminator Loss: 0.460366815328598, Generator Loss: 0.006932151969522238
Epoch 6/100:
Discriminator Loss: 0.4696977734565735, Generator Loss: 0.009964671917259693
Epoch 7/100:
Discriminator Loss: 0.48271188139915466, Generator Loss: 0.003977140877395868
Epoch 8/100:
Discriminator Loss: 0.48632144927978516, Generator Loss: 0.002307899296283722
Epoch 9/100:
Discriminator Loss: 0.48233795166015625, Generator Loss: 0.0036666966043412685
Epoch 10/100:
Discriminator Loss: 0.48706358671188354, Generator Loss: 0.0037481579929590225
Epoch 11/100:
Discriminator Loss: 0.49212998151779175, Generator Loss: 0.003938969224691391
E

In [24]:
pip install Keras



In [25]:
pip install Tensorflow



In [29]:
from tensorflow.keras.layers import Input, Dense, Flatten, Reshape, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.losses import mse
import numpy as np

# Define or import processed_dataframes, X_train_list, y_train_list, max_length
max_length = max(len(df) for df in processed_dataframes)

# Reshape and pad the data
X_train_padded = []
for df in processed_dataframes:
    df_padded = df.copy()
    for col in df.columns:
        df_padded[col] = np.pad(df[col].values, (0, max_length - len(df[col])), constant_values=0)
    X_train_padded.append(df_padded[['mass_scaled', 'shell_thickness_scaled', 'y_coordinates_scaled']])

X_train_padded = np.array([X.values for X in X_train_padded])

# Define the number of epochs and batch size
epochs = 100  # Change the number of epochs as needed
batch_size = 10

# Define latent dimension
latent_dim = 100

# Encoder
encoder_input = Input(shape=(max_length, X_train_padded.shape[2]))
x = Flatten()(encoder_input)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

# Sampling function
def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

# Decoder
decoder_input = Input(shape=(latent_dim,))
x = Dense(256, activation='relu')(decoder_input)
x = Dense(512, activation='relu')(x)
decoder_output = Dense(max_length * X_train_padded.shape[2], activation='linear')(x)
decoder_output = Reshape((max_length, X_train_padded.shape[2]))(decoder_output)

# Instantiate Encoder and Decoder models
encoder = Model(encoder_input, [z_mean, z_log_var, z])
decoder = Model(decoder_input, decoder_output)

# VAE Model
vae_output = decoder(z)
vae = Model(encoder_input, vae_output)

# Define VAE loss
# Define VAE loss
def vae_loss(x, x_decoded_mean, z_log_var=z_log_var, z_mean=z_mean):
    reconstruction_loss = mse(K.flatten(x), K.flatten(x_decoded_mean))
    kl_loss = -0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return reconstruction_loss + kl_loss

# Compile VAE
vae.compile(optimizer=Adam(learning_rate=0.0002), loss=vae_loss)

# Custom training loop
for epoch in range(epochs):
    print(f'Epoch {epoch + 1}/{epochs}:')
    for i in range(0, len(X_train_padded), batch_size):
        X_train_batch = X_train_padded[i:i+batch_size]

        # Perform a forward pass
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = encoder(X_train_batch)
            x_decoded = decoder(z)

            # Compute loss
            loss = vae_loss(X_train_batch, x_decoded, z_log_var, z_mean)

        # Compute gradients
        grads = tape.gradient(loss, vae.trainable_weights)

        # Update weights
        vae.optimizer.apply_gradients(zip(grads, vae.trainable_weights))

    # Print progress after each epoch
    print(f'Epoch {epoch + 1}/{epochs}, VAE Loss: {loss.numpy()}')


Epoch 1/100:
Epoch 1/100, VAE Loss: [0.40388718 0.40361783 0.40430206 0.4040738  0.40384385 0.4046628
 0.40412468 0.40449002 0.4045557  0.404346  ]
Epoch 2/100:
Epoch 2/100, VAE Loss: [0.2765308  0.27636918 0.27655396 0.2764894  0.27653793 0.27674457
 0.27646613 0.27653882 0.27672955 0.27667856]
Epoch 3/100:
Epoch 3/100, VAE Loss: [0.215251   0.21519777 0.21525402 0.21523227 0.21521878 0.21535327
 0.21527883 0.21530841 0.21533024 0.21528286]
Epoch 4/100:
Epoch 4/100, VAE Loss: [0.15921922 0.15925169 0.1592567  0.15925843 0.15922911 0.15920566
 0.15924294 0.15925217 0.1592131  0.15922026]
Epoch 5/100:
Epoch 5/100, VAE Loss: [0.10588536 0.10588279 0.10588159 0.10588291 0.10588406 0.10588531
 0.10588114 0.10588098 0.105886   0.10588646]
Epoch 6/100:
Epoch 6/100, VAE Loss: [0.04855942 0.04855993 0.0485578  0.04855762 0.04856092 0.04855419
 0.04855935 0.04855689 0.04855318 0.04855546]
Epoch 7/100:
Epoch 7/100, VAE Loss: [0.01279282 0.01279313 0.01279205 0.01279197 0.0127936  0.01279016
 0.0

In [36]:
from tensorflow.keras.layers import Input, Dense, Flatten, Reshape, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.losses import mse
import numpy as np

# Define or import processed_dataframes, X_train_list, y_train_list, max_length
max_length = max(len(df) for df in processed_dataframes)

# Reshape and pad the data
X_train_padded = []
for df in processed_dataframes:
    df_padded = df.copy()
    for col in df.columns:
        df_padded[col] = np.pad(df[col].values, (0, max_length - len(df[col])), constant_values=0)
    X_train_padded.append(df_padded[['mass_scaled', 'shell_thickness_scaled', 'y_coordinates_scaled']])

X_train_padded = np.array([X.values for X in X_train_padded])

# Define the number of epochs and batch size
epochs = 100  # Change the number of epochs as needed
batch_size = 10

# Define latent dimension
latent_dim = 100

# Encoder
encoder_input = Input(shape=(max_length, X_train_padded.shape[2]))
x = Flatten()(encoder_input)
x = Dense(512, activation='relu')(x)
x = Dense(256, activation='relu')(x)
z_mean = Dense(latent_dim)(x)
z_log_var = Dense(latent_dim)(x)

# Sampling function
def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.)
    return z_mean + K.exp(0.5 * z_log_var) * epsilon

z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

# Decoder
decoder_input = Input(shape=(latent_dim,))
x = Dense(256, activation='relu')(decoder_input)
x = Dense(512, activation='relu')(x)
decoder_output = Dense(max_length * X_train_padded.shape[2], activation='linear')(x)
decoder_output = Reshape((max_length, X_train_padded.shape[2]))(decoder_output)

# Instantiate Encoder and Decoder models
encoder = Model(encoder_input, [z_mean, z_log_var, z])
decoder = Model(decoder_input, decoder_output)

# VAE Model
vae_output = decoder(z)
vae = Model(encoder_input, vae_output)

# Define VAE loss
def vae_loss(x, x_decoded_mean, z_log_var=z_log_var, z_mean=z_mean):
    reconstruction_loss = mse(K.flatten(x), K.flatten(x_decoded_mean))
    kl_loss = -0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return reconstruction_loss + kl_loss

# Compile VAE
vae.compile(optimizer=Adam(learning_rate=0.0002), loss=vae_loss)

# Custom training loop
for epoch in range(epochs):
    print(f'Epoch {epoch + 1}/{epochs}:')
    for df_idx in range(len(processed_dataframes)):
        X_train_batch = np.expand_dims(X_train_padded[df_idx], axis=0)

        # Perform a forward pass
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = encoder(X_train_batch)
            x_decoded = decoder(z)

            # Compute loss
            loss = vae_loss(X_train_batch, x_decoded, z_log_var, z_mean)

        # Compute gradients
        grads = tape.gradient(loss, vae.trainable_weights)

        # Update weights
        vae.optimizer.apply_gradients(zip(grads, vae.trainable_weights))

    # Print progress after each epoch
    print(f'Epoch {epoch + 1}/{epochs}, VAE Loss: {loss.numpy()}')


Epoch 1/100:
Epoch 1/100, VAE Loss: [0.01537677]
Epoch 2/100:
Epoch 2/100, VAE Loss: [0.004081]
Epoch 3/100:
Epoch 3/100, VAE Loss: [0.00719746]
Epoch 4/100:
Epoch 4/100, VAE Loss: [0.00328739]
Epoch 5/100:
Epoch 5/100, VAE Loss: [0.00155223]
Epoch 6/100:
Epoch 6/100, VAE Loss: [0.00198486]
Epoch 7/100:
Epoch 7/100, VAE Loss: [0.00758549]
Epoch 8/100:
Epoch 8/100, VAE Loss: [0.01460239]
Epoch 9/100:
Epoch 9/100, VAE Loss: [0.00094177]
Epoch 10/100:
Epoch 10/100, VAE Loss: [0.00410134]
Epoch 11/100:
Epoch 11/100, VAE Loss: [0.01943804]
Epoch 12/100:
Epoch 12/100, VAE Loss: [0.00089334]
Epoch 13/100:
Epoch 13/100, VAE Loss: [0.00372485]
Epoch 14/100:
Epoch 14/100, VAE Loss: [0.00079911]
Epoch 15/100:
Epoch 15/100, VAE Loss: [0.00252374]
Epoch 16/100:
Epoch 16/100, VAE Loss: [0.00250087]
Epoch 17/100:
Epoch 17/100, VAE Loss: [0.00104605]
Epoch 18/100:
Epoch 18/100, VAE Loss: [0.00070155]
Epoch 19/100:
Epoch 19/100, VAE Loss: [0.00295269]
Epoch 20/100:
Epoch 20/100, VAE Loss: [0.00537141]


In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, RepeatVector, TimeDistributed, Dense
import numpy as np

X_train_list, X_val_list, y_train_list, y_val_list = train_test_split(
    processed_dataframes, processed_dataframes, test_size=0.1, random_state=42
)


X_train = np.array([df[['mass_scaled', 'shell_thickness_scaled', 'y_coordinates_scaled']].values for df in X_train_list])
y_train = np.array([df['output_sequence'].values for df in y_train_list])
X_val = np.array([df[['mass_scaled', 'shell_thickness_scaled', 'y_coordinates_scaled']].values for df in X_val_list])
y_val = np.array([df['output_sequence'].values for df in y_val_list])


max_length = max(len(seq) for seq in y_train)


X_train_padded = np.array([np.pad(X_seq, ((0, max_length - len(X_seq)), (0, 0)), constant_values=0) for X_seq in X_train])
y_train_padded = np.array([np.pad(y_seq, (0, max_length - len(y_seq)), constant_values=0) for y_seq in y_train])
X_val_padded = np.array([np.pad(X_seq, ((0, max_length - len(X_seq)), (0, 0)), constant_values=0) for X_seq in X_val])
y_val_padded = np.array([np.pad(y_seq, (0, max_length - len(y_seq)), constant_values=0) for y_seq in y_val])


model = Sequential()
model.add(LSTM(64, input_shape=(X_train_padded.shape[1], X_train_padded.shape[2]), return_sequences=True))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(16, return_sequences=False))
model.add(RepeatVector(max_length))
model.add(LSTM(16, return_sequences=True))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(64, return_sequences=True))
model.add(TimeDistributed(Dense(500, activation='relu')))
model.add(TimeDistributed(Dense(500, activation='relu')))
model.add(TimeDistributed(Dense(X_train_padded.shape[2], activation='linear')))


model.compile(optimizer='adam', loss='mean_squared_error')


y_train_padded_reshaped = np.expand_dims(y_train_padded, axis=-1)
y_val_padded_reshaped = np.expand_dims(y_val_padded, axis=-1)


model.fit(X_train_padded, y_train_padded_reshaped, epochs=50, batch_size=10, validation_data=(X_val_padded, y_val_padded_reshaped))

