Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

explanation issues #71

Open
ibrahim-patwary opened this issue Oct 31, 2023 · 1 comment
Open

explanation issues #71

ibrahim-patwary opened this issue Oct 31, 2023 · 1 comment

Comments

@ibrahim-patwary
Copy link

First of all , Look my model code :

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from torch_geometric.data import Data
from torch_geometric.nn import SAGEConv
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

from captum.attr import Saliency, IntegratedGradients
import random

Load your tabular data

excel_file_path = "/content/drive/MyDrive/GNN/chest_x_ray_dataset.xlsx"
df = pd.read_excel(excel_file_path)
df = df.fillna(df.mean())

Assuming your target column is named 'class'

X = df.drop('class', axis=1).values # Features
y = df['class'].values # Target variable

Initialize the MinMaxScaler

scaler = MinMaxScaler()

Fit the scaler and transform X

X_normalized = scaler.fit_transform(X)

Apply Log Transformation to the features

X_log_transformed = np.log(X_normalized + 1) # Adding 1 to avoid log(0)

Generate a graph based on your features

K = 10 # Number of nearest neighbors to consider (adjust as needed)
knn = NearestNeighbors(n_neighbors=K, algorithm='ball_tree')
knn.fit(X_log_transformed) # Use the log-transformed data for graph construction
knn_indices = knn.kneighbors(return_distance=False)

graph = nx.Graph()

for i in range(len(df)):
graph.add_node(i)

for i, neighbors in enumerate(knn_indices):
for neighbor in neighbors:
if i != neighbor:
graph.add_edge(i, neighbor)

labels = {i: label for i, label in enumerate(y)}
nx.set_node_attributes(graph, labels, 'label')

Create the PyTorch Geometric Data object for the graph data

Convert the list of edges to a NumPy array and transpose it

edge_index = torch.tensor(np.array(list(graph.edges())).T, dtype=torch.long)
x = torch.tensor(X_log_transformed, dtype=torch.float) # Use the log-transformed data
y = torch.tensor(y, dtype=torch.long)
data = Data(x=x, edge_index=edge_index, y=y)

Define a custom GNN model with more complex architecture

class CustomGNN(torch.nn.Module):
def init(self, num_features, hidden_channels, num_classes):
super(CustomGNN, self).init()
self.conv1 = SAGEConv(num_features, hidden_channels)
self.conv2 = SAGEConv(hidden_channels, hidden_channels)
self.conv3 = SAGEConv(hidden_channels, hidden_channels)
self.conv4 = SAGEConv(hidden_channels, hidden_channels)
self.conv5 = SAGEConv(hidden_channels, hidden_channels)
self.conv6 = SAGEConv(hidden_channels, hidden_channels) # Additional layer
self.conv7 = SAGEConv(hidden_channels, hidden_channels) # Additional layer
self.conv8 = SAGEConv(hidden_channels, num_classes) # Adjust output layer
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(hidden_channels)
self.bn2 = nn.BatchNorm1d(hidden_channels)
self.bn3 = nn.BatchNorm1d(hidden_channels)
self.bn4 = nn.BatchNorm1d(hidden_channels)
self.bn5 = nn.BatchNorm1d(hidden_channels)
self.bn6 = nn.BatchNorm1d(hidden_channels) # Additional layer
self.bn7 = nn.BatchNorm1d(hidden_channels) # Additional layer
self.dropout = nn.Dropout(0.3)

def forward(self, x, edge_index, batch):
    x = self.conv1(x, edge_index)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv2(x, edge_index)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv3(x, edge_index)
    x = self.bn3(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv4(x, edge_index)
    x = self.bn4(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv5(x, edge_index)
    x = self.bn5(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv6(x, edge_index)  # Additional layer
    x = self.bn6(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv7(x, edge_index)  # Additional layer
    x = self.bn7(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv8(x, edge_index, batch)  # Adjust output layer
    return x

Initialize the custom GNN model with the best hyperparameters

best_hidden_channels = 256 # Replace with your best value
model = CustomGNN(num_features=X_log_transformed.shape[1], hidden_channels=best_hidden_channels, num_classes=6)

Weight initialization

def weight_init(m):
if isinstance(m, nn.Conv2d):
nn.init.xavier_normal_(m.weight.data)

model.apply(weight_init)

Define an optimizer with the best learning rate

best_lr = 0.005689229656484651 # Replace with your best value
optimizer = optim.Adam(model.parameters(), lr=best_lr)

Define a loss function

criterion = nn.CrossEntropyLoss()

Learning rate scheduling

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)

Training loop with early stopping

best_accuracy = 0.0
patience = 150
early_stopping_counter = 0

for epoch in range(350):
model.train()
optimizer.zero_grad()

# Provide the batch argument when calling the model
out = model(data.x, data.edge_index, data.batch)

loss = criterion(out, data.y)
loss.backward()
optimizer.step()

model.eval()
with torch.no_grad():
    # Provide the batch argument when calling the model
    out = model(data.x, data.edge_index, data.batch)
    y_pred = out.argmax(dim=1)
    accuracy = accuracy_score(data.y, y_pred)

scheduler.step(loss)  # Adjust learning rate based on loss

print(f'Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}')

if accuracy > best_accuracy:
    best_accuracy = accuracy
    early_stopping_counter = 0
else:
    early_stopping_counter += 1
    if early_stopping_counter >= patience:
        print("Early stopping")
        break

print(f"Best Accuracy: {best_accuracy:.4f}")

Now i want to explain my prediction using your provided library but i can't implement it for custom dataset ( like my dataset) .Can you Provide me a simple code for it ??
How i can implement same as "vis_shapegraph.ipynb" ?

Lastly i face library installation issue like
ERROR: Could not find a version that satisfies the requirement torch-cluster (from versions: 0.1.1, 0.2.3, 0.2.4, 1.0.1, 1.0.3, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.3.0, 1.4.0, 1.4.1, 1.4.2, 1.4.3a1, 1.4.3, 1.4.4, 1.4.5, 1.5.2, 1.5.3, 1.5.4, 1.5.5, 1.5.6, 1.5.7, 1.5.8, 1.5.9, 1.6.0, 1.6.1)
ERROR: No matching distribution found for torch-cluster

@ibrahim-patwary
Copy link
Author

Thank you so much for your valuable response :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant