In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Layer
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

In [12]:
# Load the dataset
df = pd.read_csv('../dPattern24.csv')  # Update with the path to your dataset

# Normalize coordinates
coordinate_columns = [f'x{i}' for i in range(1, 10)] + [f'y{i}' for i in range(1, 10)]

scaler = MinMaxScaler(feature_range=(-1, 1))
df[coordinate_columns] = scaler.fit_transform(df[coordinate_columns])


def prepare_inputs(df):

    test_items_df = df[df['Pattern_Token'].isin(['new_low', 'new_med', 'new_high'])]
    
    prototypes_df = df[df['Pattern_Token'] == 'prototype']
    
    # Initialize lists to hold the organized inputs
    test_items = []
    prototypes = []  # This will be a list of lists, each containing three prototypes' coordinates
    
    # Iterate through test items and find corresponding prototypes
    for index, row in test_items_df.iterrows():
        subject = row['sbjCode']
        category = row['Category']
        
        # Retrieve the coordinates for the test item
        test_item_coords = row[coordinate_columns].values
        
        # Retrieve prototypes for the subject (assuming one prototype per category)
        subject_prototypes = prototypes_df[prototypes_df['sbjCode'] == subject]
        
        # Placeholder: Retrieve or calculate the exact prototypes for the subject and category
        # You need to adapt this logic based on how your prototypes are defined or calculated
        proto_coords = [subject_prototypes[subject_prototypes['Category'] == c][coordinate_columns].values
                        for c in range(1, 4)]  # Assuming 3 categories
        
        # Check if we have found all necessary prototypes and the test item
        if len(proto_coords) == 3 and all(len(coords) > 0 for coords in proto_coords):
            test_items.append(test_item_coords)
            prototypes.append(np.concatenate(proto_coords))  # Flatten the prototype coordinates
            
    return np.array(test_items), np.array(prototypes)


test_items, prototypes = prepare_inputs(df)

# preview
print(test_items.shape, prototypes.shape)
print(test_items[0])
print(prototypes[0])

NameError: name 'test_items_df' is not defined

In [2]:
class DistanceLayer(Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, inputs):
        # Assuming inputs is a list of four tensors: [test_item, proto1, proto2, proto3]
        test_item, proto1, proto2, proto3 = inputs
        
        # Compute distances
        d1 = tf.norm(test_item - proto1, axis=1)
        d2 = tf.norm(test_item - proto2, axis=1)
        d3 = tf.norm(test_item - proto3, axis=1)
        
        # Combine distances into a single tensor
        distances = tf.stack([d1, d2, d3], axis=1)
        
        # Convert distances to probabilities (or any other logic you want)
        probabilities = tf.nn.softmax(-distances)  # Inverting distances since lower means more similar
        return probabilities

# Define the encoder
def create_encoder():
    inputs = Input(shape=(18,))  # Adjust based on your input shape
    x = Dense(128, activation='relu')(inputs)
    x = Dense(64, activation='relu')(x)
    bottleneck = Dense(32, activation='relu')(x)
    return Model(inputs, bottleneck)

# Instantiate one encoder model to be shared
encoder = create_encoder()

# Define model with custom logic
test_item_input = Input(shape=(18,))
proto1_input = Input(shape=(18,))
proto2_input = Input(shape=(18,))
proto3_input = Input(shape=(18,))

test_item_encoded = encoder(test_item_input)
proto1_encoded = encoder(proto1_input)
proto2_encoded = encoder(proto2_input)
proto3_encoded = encoder(proto3_input)

# Use the DistanceLayer to compute the predicted category
predicted_category = DistanceLayer()([test_item_encoded, proto1_encoded, proto2_encoded, proto3_encoded])

model = Model(inputs=[test_item_input, proto1_input, proto2_input, proto3_input], outputs=predicted_category)

# Compile model - the loss function should match your exact needs, possibly custom
model.compile(optimizer='adam', loss='categorical_crossentropy')

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 18)]                 0         []                            
                                                                                                  
 input_3 (InputLayer)        [(None, 18)]                 0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, 18)]                 0         []                            
                                                                                                  
 input_5 (InputLayer)        [(None, 18)]                 0         []                            
                                                                                            