# Graph Network Based Correlation 

## Import Extraction Library

In [28]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import librosa
from PIL import Image

## Import Data

In [29]:
# Path to the UrbanSound8K dataset
dataset_path = '/home/nathanael-seay/Downloads/Urban Sound/UrbanSound8K'
metadata_path = os.path.join(dataset_path, 'metadata', 'UrbanSound8K.csv')
audio_folder_path = os.path.join(dataset_path, 'audio')

## Import Metadata

In [30]:
# Load the metadata file
metadata = pd.read_csv(metadata_path)

## Load YAMNet

In [31]:
# Load YAMNet model from TensorFlow Hub
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

## Extraction Function

In [32]:
def extract_yamnet_features(file_name, spectrogram_save_path):
    try:
        # Load the audio file
        audio_data, sr = librosa.load(file_name, sr=16000)  # YAMNet expects 16kHz audio
        # Ensure audio is 1D
        if len(audio_data.shape) > 1:
            audio_data = np.mean(audio_data, axis=1)
        # Extract YAMNet embeddings
        scores, embeddings, spectrogram = yamnet_model(audio_data)
        # Average the embeddings
        embeddings = tf.reduce_mean(embeddings, axis=0).numpy()

        # Normalize and convert the spectrogram data
        spectrogram_data = spectrogram.numpy()
        spectrogram_data -= spectrogram_data.min()
        spectrogram_data /= spectrogram_data.max()
        spectrogram_data *= 255.0
        spectrogram_data = spectrogram_data.astype(np.uint8)

         # Save the spectrogram without plotting using Pillow
        image = Image.fromarray(spectrogram_data)
        image.save(spectrogram_save_path)
        return embeddings
    
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}, {e}")
        return None

## Extract Features

In [33]:
# Ensure the spectrograms directory exists
os.makedirs('spectrograms', exist_ok=True)

# Extract features and store in a list
features = []
labels = []

for index, row in metadata.iterrows():
    file_name = os.path.join(audio_folder_path, f'fold{row["fold"]}', row["slice_file_name"])
    class_label = row["class"]
    spectrogram_save_path = os.path.join('spectrograms', f'{row["slice_file_name"]}.png')
    data = extract_yamnet_features(file_name, spectrogram_save_path)
    if data is not None:
        features.append(data)
        labels.append(class_label)

# Save Features

In [7]:
# Convert to a DataFrame
features_df = pd.DataFrame(features)
labels_df = pd.Series(labels, name='label')

# Combine features and labels
final_df = pd.concat([features_df, labels_df], axis=1)

# Save the DataFrame to a CSV file
final_df.to_csv('urbansound8k_yamnet_features.csv', index=False)

## Pre-process data

In [8]:
import pandas as pd
import numpy as np
from sklearn.neighbors import kneighbors_graph
import tensorflow as tf

# Load the feature dataset
data = pd.read_csv('urbansound8k_yamnet_features.csv')

# Separate features and labels
X = data.drop(columns=['label'])
y = data['label']

# Number of neighbors
k_neighbors = 5

# Construct the KNN graph
knn_graph = kneighbors_graph(X, n_neighbors=k_neighbors, mode='connectivity', include_self=False)
adj_matrix = knn_graph.toarray()

# Create adjacency matrix and edge list
edges = np.array(knn_graph.nonzero()).T
node_features = X.values

print("Adjacency Matrix:")
print(adj_matrix)
print("\nEdge List:")
print(edges)


Adjacency Matrix:
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]]

Edge List:
[[   0 6563]
 [   0 8349]
 [   0 8129]
 ...
 [8731 1953]
 [8731 1954]
 [8731 5956]]


## Split Data into train-test Datasets

In [9]:
import random

# Function to generate negative samples
def generate_negative_samples(adj_matrix, num_samples):
    neg_samples = []
    while len(neg_samples) < num_samples:
        u, v = random.sample(range(adj_matrix.shape[0]), 2)
        if adj_matrix[u, v] == 0:
            neg_samples.append((u, v))
    return np.array(neg_samples)

# Split edges into train/test sets
num_edges = len(edges)
num_test = int(0.2 * num_edges)
num_train = num_edges - num_test

np.random.shuffle(edges)
train_edges = edges[:num_train]
test_edges = edges[num_train:]

# Generate negative samples
train_neg_edges = generate_negative_samples(adj_matrix, num_train)
test_neg_edges = generate_negative_samples(adj_matrix, num_test)

print("\nTrain Edges:")
print(train_edges)
print("\nTest Edges:")
print(test_edges)
print("\nTrain Negative Edges:")
print(train_neg_edges)
print("\nTest Negative Edges:")
print(test_neg_edges)



Train Edges:
[[ 677 7982]
 [6565 1136]
 [3772 3791]
 ...
 [ 769  775]
 [2925 8389]
 [4854 4564]]

Test Edges:
[[5325 5328]
 [8318 3192]
 [1007 3660]
 ...
 [3104 3107]
 [1625 1591]
 [3966  921]]

Train Negative Edges:
[[6572 1707]
 [8124 1319]
 [1873 2193]
 ...
 [6490 7057]
 [4623 7439]
 [5898 3678]]

Test Negative Edges:
[[3017 4575]
 [2760 7395]
 [8687  424]
 ...
 [3137 2329]
 [8538 7265]
 [1080  281]]


## GCN Model

In [10]:
class GCNLayer(tf.keras.layers.Layer):
    def __init__(self, input_dim, output_dim):
        super(GCNLayer, self).__init__()
        self.dense = tf.keras.layers.Dense(output_dim)
    
    def call(self, adj_matrix, node_features):
        # Perform the graph convolution operation
        support = tf.matmul(adj_matrix, node_features)
        output = self.dense(support)
        return tf.nn.relu(output)

class GCN(tf.keras.Model):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.gcn1 = GCNLayer(input_dim, hidden_dim)
        self.gcn2 = GCNLayer(hidden_dim, output_dim)

    def call(self, adj_matrix, node_features):
        h = self.gcn1(adj_matrix, node_features)
        h = self.gcn2(adj_matrix, h)
        return h

# Create the model
input_dim = node_features.shape[1]
hidden_dim = 128
output_dim = 64

model = GCN(input_dim, hidden_dim, output_dim)


## Train Model

In [14]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

@tf.function
def compute_loss(pos_score, neg_score):
    labels = tf.concat([tf.ones_like(pos_score), tf.zeros_like(neg_score)], 0)
    logits = tf.concat([pos_score, neg_score], 0)
    return loss_fn(labels, logits)

@tf.function
def train_step(model, adj_matrix, node_features, train_edges, train_neg_edges):
    with tf.GradientTape() as tape:
        # Positive edge predictions
        h = model(adj_matrix, node_features)
        pos_u, pos_v = train_edges[:, 0], train_edges[:, 1]
        pos_score = tf.reduce_sum(tf.gather(h, pos_u) * tf.gather(h, pos_v), axis=1)
        
        # Negative edge predictions
        neg_u, neg_v = train_neg_edges[:, 0], train_neg_edges[:, 1]
        neg_score = tf.reduce_sum(tf.gather(h, neg_u) * tf.gather(h, neg_v), axis=1)
        
        # Compute loss
        loss = compute_loss(pos_score, neg_score)
    
    # Backpropagation
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    loss = train_step(model, adj_matrix, node_features, train_edges, train_neg_edges)
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.numpy()}')


Epoch 10/100, Loss: 0.6931463479995728
Epoch 20/100, Loss: 0.6931463479995728
Epoch 30/100, Loss: 0.6931463479995728
Epoch 40/100, Loss: 0.6931463479995728
Epoch 50/100, Loss: 0.6931463479995728
Epoch 60/100, Loss: 0.6931463479995728
Epoch 70/100, Loss: 0.6931463479995728
Epoch 80/100, Loss: 0.6931463479995728
Epoch 90/100, Loss: 0.6931463479995728
Epoch 100/100, Loss: 0.6931463479995728


## Evaluate Model

In [15]:
@tf.function
def compute_auc(pos_score, neg_score):
    scores = tf.concat([pos_score, neg_score], 0)
    labels = tf.concat([tf.ones_like(pos_score), tf.zeros_like(neg_score)], 0)
    return tf.keras.metrics.AUC()(labels, scores)

@tf.function
def test_step(model, adj_matrix, node_features, test_edges, test_neg_edges):
    h = model(adj_matrix, node_features)
    
    pos_u, pos_v = test_edges[:, 0], test_edges[:, 1]
    pos_score = tf.reduce_sum(tf.gather(h, pos_u) * tf.gather(h, pos_v), axis=1)
    
    neg_u, neg_v = test_neg_edges[:, 0], test_neg_edges[:, 1]
    neg_score = tf.reduce_sum(tf.gather(h, neg_u) * tf.gather(h, neg_v), axis=1)
    
    return compute_auc(pos_score, neg_score)

auc = test_step(model, adj_matrix, node_features, test_edges, test_neg_edges)
print(f'Test AUC: {auc.numpy()}')


ValueError: in user code:

    File "/tmp/ipykernel_27194/749866727.py", line 17, in test_step  *
        return compute_auc(pos_score, neg_score)
    File "/tmp/ipykernel_27194/749866727.py", line 5, in compute_auc  *
        return tf.keras.metrics.AUC()(labels, scores)
    File "/home/nathanael-seay/AIenv/lib/python3.11/site-packages/keras/src/metrics/confusion_metrics.py", line 1288, in __init__  **
        self._build(None)
    File "/home/nathanael-seay/AIenv/lib/python3.11/site-packages/keras/src/metrics/confusion_metrics.py", line 1312, in _build
        self.true_positives = self.add_variable(
    File "/home/nathanael-seay/AIenv/lib/python3.11/site-packages/keras/src/metrics/metric.py", line 192, in add_variable
        variable = backend.Variable(
    File "/home/nathanael-seay/AIenv/lib/python3.11/site-packages/keras/src/backend/common/variables.py", line 165, in __init__
        self._initialize(value)
    File "/home/nathanael-seay/AIenv/lib/python3.11/site-packages/keras/src/backend/tensorflow/core.py", line 31, in _initialize
        self._value = tf.Variable(

    ValueError: tf.function only supports singleton tf.Variables created on the first call. Make sure the tf.Variable is only created once or created outside tf.function. See https://www.tensorflow.org/guide/function#creating_tfvariables for more information.
