### import the necessary modules

In [None]:
!pip install giotto-tda

In [9]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, PersistenceImage

from torch_geometric.datasets import TUDataset

import networkx as nx

ModuleNotFoundError: No module named 'gtda'

### step 1: load the dataset

In [None]:
dataset = TUDataset(root='data/TUDataset', name=MUTAG)

# extracting the graphs from the dataset
graphs = [data for data in dataset]

### step 2: node filtration values

- we assign a metric to track each graph, here, we deploy degree
- setting up a threshold t and then progressively increasing it. As we do this, we only read/learn from the graphs with degree <= t, and therefore, allow more graphs as t is made to increase.
- learning and keeping a track of these graphs progressively is done in the next step - persistent homology

In [None]:
G = nx.Graph()

for u, v in dataset[0].edge_index.t().tolist():
    G.add_edges_from([(u.item(), v.item()])

degree_filtration = dict(G.degree())

### step 3: persistent homology pipeline

In [None]:
VR = VietorisRipsPersistence()
diagrams = VR.fit_transform([nx.to_numpy_array(G)])

### step 4: vectorize persistence diagrams

In [None]:
persistence_image = PersistenceImage(bandwidth=1.0, weight=lambda x:x[1]**2)
X = PI.fit_transform(diagrams)

### step 5: train the model

In [None]:
y = dataset.data.y.numpy()
X_train, X_test, y_train, y_test = train_test_split(X.reshape(len(X), -1), y, test_size=0.2)

clf = RandomForestClassifier(n_estimators = 100)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

### step 6: analyze accuracy

In [None]:
print("accuracy: ", accuracy_score(y_test, y_pred))