In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
import pandas
import os
import numpy as np
import pickle as pk
import sklearn as sk
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [None]:
data = pandas.read_csv("./../codes/tuke-codes-representation")
data.dropna()
data = data[2:]
gpus = tf.config.list_physical_devices('GPU')
print(gpus)
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")

In [None]:
typeMap = {}
c = 1
for i in range(data.shape[0]):
    file = "".join(filter(str.isalpha, data.iloc[i,0].split(".")[0]))
    funcName = data.iloc[i,0].split(".")[-2]
    if not typeMap.get(file+funcName, 0):
        typeMap[file+funcName] = c
        c += 1
    data.iloc[i,0] = typeMap[file+funcName]
data.head()

In [None]:
frames = [data.loc[data["type"] == x].iloc[0:,1:].values.tolist() for x in range(1,594)]
with open("data.pkl", "wb") as d:
    pk.dump(frames, d)
    d.close()

In [None]:
x = []
y = []
for i in frames[1:]:
    for j in frames[1:]:
        minlen = min(len(i), len(j))
        truthy = i == j
        for k in range(minlen):
            x.append([i[k],j[k]])
            y.append(truthy)

In [None]:
tf.random.set_seed(12)
tf.keras.backend.clear_session()
x = np.array(x).astype("float64")
y = np.array(y).astype("int")

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=23,shuffle=True)
x_train_0 = np.array([x[0] for x in x_train])
x_train_1 = np.array([x[1] for x in x_train])
x_test_0 = np.array([x[0] for x in x_train])
x_test_1 = np.array([x[1] for x in x_train])

def euclidean_distance(vects):
    x, y = vects
    sum_square = tf.reduce_sum(tf.square(x - y), axis=1, keepdims=True)
    return tf.sqrt(tf.maximum(sum_square, tf.keras.backend.epsilon()))

def create_network(input_size):
    # Define the tensors for the two input images
    input_1 = Input(shape=(input_size,))
    input_2 = Input(shape=(input_size,))

    # Neural Network
    model = tf.keras.Sequential()
    model.add(Flatten(input_shape=(input_size,)))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))

    # Generate the encodings (feature vectors) for the two images
    encoded_1 = model(input_1)
    encoded_2 = model(input_2)

    # Add a customized layer to compute the Euclidean distance between the encodings
    distance = Lambda(euclidean_distance)([encoded_1, encoded_2])

    # Add a dense layer with a sigmoid unit to generate the similarity score
    prediction = Dense(1, activation='sigmoid')(distance)

    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[input_1, input_2], outputs=prediction)

    # return the model
    return siamese_net

# Create the siamese network
model = create_network(16)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=['accuracy'])

# Print the model summary
model.summary()

In [None]:
history = model.fit([x_train_0,x_train_1], y_train,validation_data=([x_test_0,x_test_1],y_test), epochs=40)

In [None]:

y_pred = model.evaluate([left_x_test, right_x_test],right_y_test)


In [None]:
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, auc
import matplotlib.pyplot as plt

# Assuming that 'model' is your trained model and 'X_test', 'y_test' are your test data.

# Predict the probabilities for the test data
y_pred_prob = model.predict([left_x_test, right_x_test]).ravel()

# Predict the classes for the test data
y_pred = np.round(y_pred_prob)

# Print precision, recall, and F1-score
print(classification_report(left_y_test, y_pred))

# Calculate AUC-ROC
roc_auc = roc_auc_score(left_y_test, y_pred_prob)
print("AUC-ROC:", roc_auc)

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(left_y_test, y_pred_prob)

# Plot ROC curve
plt.figure()
lw = 2  # Line width
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Assuming that 'model' is your trained model, and 'history' is the returned History object from model.fit
# e.g., history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=20)

# Plot training & validation accuracy values
plt.figure(figsize=(12,6))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()