Merge branch 'dev' of https://github.com/pygod-team/pygod into dev

pygod-team · Dec 19, 2023 · d1954f3 · d1954f3
2 parents b6f5d02 + 246309f
commit d1954f3
Show file tree

Hide file tree

Showing 24 changed files with 77 additions and 63 deletions.
diff --git a/.github/workflows/testing-cron.yml b/.github/workflows/testing-cron.yml
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v3
@@ -30,7 +30,7 @@ jobs:
         python -m pip install --upgrade pip
         pip install torch --index-url https://download.pytorch.org/whl/cpu
         pip install torch_geometric
-        pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html
+        pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
         pip install pytest
         pip install coverage
         pip install coveralls

diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v3
@@ -35,7 +35,7 @@ jobs:
         python -m pip install --upgrade pip
         pip install torch --index-url https://download.pytorch.org/whl/cpu
         pip install torch_geometric
-        pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html
+        pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
         pip install pytest
         pip install coverage
         pip install coveralls

diff --git a/README.rst b/README.rst
@@ -105,12 +105,12 @@ Installation
 ^^^^^^^^^^^^
 
 **Note on PyG and PyTorch Installation**\ :
-PyGOD depends on `PyTorch Geometric (PyG) <https://www.pyg.org/>`_ and `PyTorch <https://pytorch.org/>`_.
+PyGOD depends on `torch <https://https://pytorch.org/get-started/locally/>`_ and `torch_geometric (including its optional dependencies) <https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html#>`_.
 To streamline the installation, PyGOD does **NOT** install these libraries for you.
 Please install them from the above links for running PyGOD:
 
 * torch>=2.0.0
-* pytorch_geometric>=2.3.0
+* torch_geometric>=2.3.0
 
 It is recommended to use **pip** for installation.
 Please make sure **the latest version** is installed, as PyGOD is updated frequently:
@@ -130,7 +130,7 @@ Alternatively, you could clone and run setup.py file:
 
 **Required Dependencies**\ :
 
-* Python 3.8+
+* python>=3.8
 * numpy>=1.24.3
 * scikit-learn>=1.2.2
 * scipy>=1.10.1
@@ -145,19 +145,16 @@ API Cheatsheet & Reference
 
 Full API Reference: (https://docs.pygod.org). API cheatsheet for all detectors:
 
-* **fit(data)**\ : Fit detector.
-* **decision_function(data)**\ : Predict raw anomaly score of PyG data using the fitted detector.
+* **fit(data)**\ : Fit the detector with train data.
+* **predict(data)**\ : Predict on test data (train data if not provided) using the fitted detector.
 
 Key Attributes of a fitted detector:
 
 * **decision_score_**\ : The outlier scores of the input data. Outliers tend to have higher scores.
 * **label_**\ : The binary labels of the input data. 0 stands for inliers and 1 for outliers.
+* **threshold_**\ : The determined threshold for binary classification. Scores above the threshold are outliers.
 
-For the inductive setting:
-
-* **predict(data)**\ : Predict if nodes in PyG data G is an outlier or not using the fitted detector.
-
-**Input of PyGOD**: Please pass in a `PyTorch Geometric (PyG) <https://www.pyg.org/>`_ data object.
+**Input of PyGOD**: Please pass in a `PyG Data object <https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.data.Data.html#torch_geometric.data.Data>`_.
 See `PyG data processing examples <https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html#data-handling-of-graphs>`_.
 
 
@@ -177,7 +174,7 @@ DONE                2020   MLP+AE       Yes          [#Bandyopadhyay2020Outlier]
 AdONE               2020   MLP+AE       Yes          [#Bandyopadhyay2020Outlier]_
 AnomalyDAE          2020   GNN+AE       Yes          [#Fan2020AnomalyDAE]_
 GAAN                2020   GAN          Yes          [#Chen2020Generative]_
-OCGNN               2021   GNN+AE       Yes          [#Wang2021One]_
+OCGNN               2021   GNN          Yes          [#Wang2021One]_
 CoLA                2021   GNN+AE+SSL   Yes          [#Liu2021Anomaly]_
 GUIDE               2021   GNN+AE       Yes          [#Yuan2021Higher]_
 CONAD               2022   GNN+AE+SSL   Yes          [#Xu2022Contrastive]_

diff --git a/benchmark/README.md b/benchmark/README.md
@@ -4,7 +4,7 @@ Official implementation of paper [BOND: Benchmarking Unsupervised Outlier Node D
 
 ## Usage
 
-**Please update to the latest PyGOD version before the experiments.**
+**Please update to the latest PyGOD version and install additional dependencies for benchmark via ```pip install -r requirements.txt``` before the experiments.**
 
 To obtain the main result of each model on each dataset, run:
 

diff --git a/benchmark/main.py b/benchmark/main.py
@@ -3,7 +3,7 @@
 import argparse
 import warnings
 from pygod.metric import *
-from pygod.utils.utility import load_data
+from pygod.utils import load_data
 from utils import init_model
 
 
@@ -19,7 +19,7 @@ def main(args):
             score = model.decision_function(data.x)
         else:
             model.fit(data)
-            score = model.decision_scores_
+            score = model.decision_score_
 
         y = data.y.bool()
         k = sum(y)
@@ -32,6 +32,10 @@ def main(args):
         ap.append(eval_average_precision(y, score))
         rec.append(eval_recall_at_k(y, score, k))
 
+    auc = torch.tensor(auc)
+    ap = torch.tensor(ap)
+    rec = torch.tensor(rec)
+
     print(args.dataset + " " + model.__class__.__name__ + " " +
           "AUC: {:.4f}±{:.4f} ({:.4f})\t"
           "AP: {:.4f}±{:.4f} ({:.4f})\t"

diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt
@@ -0,0 +1,2 @@
+tqdm
+pyod
diff --git a/benchmark/time.py b/benchmark/time.py
@@ -28,7 +28,7 @@ def main(args):
         else:
             model.fit(data)
             t = time.time() - start_time
-            score = model.decision_scores_
+            score = model.decision_score_
 
         if os.path.isdir('./tmp'):
             shutil.rmtree('./tmp')

diff --git a/benchmark/type.py b/benchmark/type.py
@@ -21,7 +21,7 @@ def main(args):
             score = model.decision_function(data.x)
         else:
             model.fit(data)
-            score = model.decision_scores_
+            score = model.decision_score_
 
         yc = data.y >> 0 & 1
         ys = data.y >> 1 & 1

diff --git a/benchmark/utils.py b/benchmark/utils.py
@@ -34,11 +34,7 @@ def init_model(args):
     else:
         hid_dim = [32, 64, 128, 256]
 
-    if args.dataset[:3] == 'inj' or args.dataset[:3] == 'gen':
-        # auto balancing on injected dataset
-        alpha = [None]
-    else:
-        alpha = [0.8, 0.5, 0.2]
+    alpha = [0.8, 0.5, 0.2]
 
     if model_name == "adone":
         return AdONE(hid_dim=choice(hid_dim),
@@ -70,7 +66,7 @@ def init_model(args):
                      lr=choice(lr),
                      epoch=epoch,
                      gpu=gpu,
-                     alpha=choice(alpha),
+                     weight=choice(alpha),
                      batch_size=batch_size,
                      num_neigh=num_neigh)
     elif model_name == 'dominant':
@@ -80,7 +76,7 @@ def init_model(args):
                         lr=choice(lr),
                         epoch=epoch,
                         gpu=gpu,
-                        alpha=choice(alpha),
+                        weight=choice(alpha),
                         batch_size=batch_size,
                         num_neigh=num_neigh)
     elif model_name == 'done':
@@ -100,7 +96,7 @@ def init_model(args):
                     lr=choice(lr),
                     epoch=epoch,
                     gpu=gpu,
-                    alpha=choice(alpha),
+                    weight=choice(alpha),
                     batch_size=batch_size,
                     num_neigh=num_neigh)
     elif model_name == 'gcnae':

diff --git a/docs/api_cc.rst b/docs/api_cc.rst
@@ -3,19 +3,16 @@ API CheatSheet
 
 The following APIs are applicable for all detectors for easy use.
 
-* :func:`pygod.detector.Detector.fit`: Fit detector.
-* :func:`pygod.detector.Detector.decision_function`: Predict raw anomaly scores of PyG data using the fitted detector
+* :func:`pygod.detector.Detector.fit`: Fit the detector with train data.
+* :func:`pygod.detector.Detector.predict`: Predict on test data (train data if not provided) using the fitted detector.
 
 Key Attributes of a fitted detector:
 
 * :attr:`pygod.detector.Detector.decision_score_`: The outlier scores of the input data. Outliers tend to have higher scores.
 * :attr:`pygod.detector.Detector.label_`: The binary labels of the input data. 0 stands for inliers and 1 for outliers.
+* :attr:`threshold_` : The determined threshold for binary classification. Scores above the threshold are outliers.
 
-For the inductive setting:
-
-* :func:`pygod.detector.Detector.predict`: Predict if a particular sample is an outlier or not using the fitted detector.
-
-**Input of PyGOD**: Please pass in a `PyTorch Geometric (PyG) <https://www.pyg.org/>`_ data object.
+**Input of PyGOD**: Please pass in a `PyG Data object <https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.data.Data.html#torch_geometric.data.Data>`_.
 See `PyG data processing examples <https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html#data-handling-of-graphs>`_.
 
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -100,7 +100,7 @@ DONE                2020   MLP+AE       Yes          :class:`pygod.detector.DONE
 AdONE               2020   MLP+AE       Yes          :class:`pygod.detector.AdONE`
 AnomalyDAE          2020   GNN+AE       Yes          :class:`pygod.detector.AnomalyDAE`
 GAAN                2020   GAN          Yes          :class:`pygod.detector.GAAN`
-OCGNN               2021   GNN+AE       Yes          :class:`pygod.detector.OCGNN`
+OCGNN               2021   GNN          Yes          :class:`pygod.detector.OCGNN`
 CoLA                2021   GNN+AE+SSL   Yes          :class:`pygod.detector.CoLA`
 GUIDE               2021   GNN+AE       Yes          :class:`pygod.detector.GUIDE`
 CONAD               2022   GNN+AE+SSL   Yes          :class:`pygod.detector.CONAD`

diff --git a/docs/install.rst b/docs/install.rst
@@ -21,17 +21,17 @@ Alternatively, you could clone and run setup.py file:
 
 **Required Dependencies**\ :
 
-* Python 3.8+
+* python>=3.8
 * numpy>=1.24.3
 * scikit-learn>=1.2.2
 * scipy>=1.10.1
 * networkx>=3.1
 
 
 **Note on PyG and PyTorch Installation**\ :
-PyGOD depends on `PyTorch Geometric (PyG) <https://www.pyg.org/>`_ and `PyTorch <https://pytorch.org/>`_.
+PyGOD depends on `torch <https://https://pytorch.org/get-started/locally/>`_ and `torch_geometric (including its optional dependencies) <https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html#>`_.
 To streamline the installation, PyGOD does **NOT** install these libraries for you.
 Please install them from the above links for running PyGOD:
 
 * torch>=2.0.0
-* pytorch_geometric>=2.3.0
+* torch_geometric>=2.3.0
diff --git a/docs/tigergraph_pygod_demo.ipynb b/docs/tigergraph_pygod_demo.ipynb
@@ -1,13 +1,13 @@
 {
  "cells": [
   {
+   "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "# PyGOD Demo on TigerGraph ML Workbench\n",
     "This notebook demonstrates how to run Python Graph Outlier Detection (PyGOD) package on TigerGraph Database and TigerGraph ML workbench. Please install the TigerGraph server (https://docs.tigergraph.com/tigergraph-server/current/intro/) on your local machine or remote server first, read the data ingestion tutorial from Tigergraph (https://github.com/TigerGraph-DevLabs/mlworkbench-docs/tree/main/tutorials/basics) and download necessary data files.\n",
     "We use the Cora data for demo."
-   ],
-   "cell_type": "markdown",
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
@@ -24,11 +24,11 @@
    ]
   },
   {
+   "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## Data Ingestion\n"
-   ],
-   "cell_type": "markdown",
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
@@ -414,7 +414,7 @@
     }
    ],
    "source": [
-    "outlier_scores = model.decision_scores_ # raw outlier scores on the input data\n",
+    "outlier_scores = model.decision_score_ # raw outlier scores on the input data\n",
     "print(outlier_scores)"
    ]
   },
@@ -475,4 +475,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
diff --git a/pygod/__init__.py b/pygod/__init__.py
@@ -1,5 +1,8 @@
 from . import detector
+from . import nn
+from . import generator
+from . import metric
 from . import utils
 from .version import __version__
 
-__all__ = ['detectors', 'nn', 'generator', 'metric', 'utils']
+__all__ = ['detector', 'nn', 'generator', 'metric', 'utils']
diff --git a/pygod/detector/anomalous.py b/pygod/detector/anomalous.py
@@ -114,15 +114,15 @@ def decision_function(self, data, label=None):
         return self.decision_score_
 
     def process_graph(self, data):
-        x = data.x
-        s = data.s
+        x = data.x.to(self.device)
+        s = data.s.to(self.device)
 
         s = torch.max(s, s.T)
         laplacian = torch.diag(torch.sum(s, dim=1)) - s
 
-        w_init = torch.randn_like(x.T)
+        w_init = torch.randn_like(x.T).to(self.device)
         r_init = torch.inverse((1 + self.weight_decay)
-            * torch.eye(x.shape[0]) + self.gamma * laplacian) @ x
+            * torch.eye(x.shape[0]).to(self.device) + self.gamma * laplacian) @ x
 
         return x, s, laplacian, w_init, r_init
 

diff --git a/pygod/detector/gae.py b/pygod/detector/gae.py
@@ -149,7 +149,7 @@ def init_model(self, **kwargs):
                        recon_s=self.recon_s,
                        sigmoid_s=self.sigmoid_s,
                        backbone=self.backbone,
-                       **kwargs)
+                       **kwargs).to(self.device)
 
     def forward_model(self, data):
 

diff --git a/pygod/generator/__init__.py b/pygod/generator/__init__.py
@@ -1 +1,4 @@
-from .outlier_generator import *
+from .outlier_generator import gen_contextual_outlier
+from .outlier_generator import gen_structural_outlier
+
+__all__ = ['gen_contextual_outlier', 'gen_structural_outlier']
diff --git a/pygod/metric/__init__.py b/pygod/metric/__init__.py
@@ -1 +1,8 @@
-from .metric import *
+from .metric import eval_average_precision
+from .metric import eval_f1
+from .metric import eval_precision_at_k
+from .metric import eval_recall_at_k
+from .metric import eval_roc_auc
+
+__all__ = ['eval_average_precision', 'eval_f1', 'eval_precision_at_k',
+           'eval_recall_at_k', 'eval_roc_auc']
diff --git a/pygod/nn/__init__.py b/pygod/nn/__init__.py
@@ -8,6 +8,10 @@
 from .guide import GUIDEBase
 from .ocgnn import OCGNNBase
 from .gadnr import GADNRBase
+from . import conv
+from . import decoder
+from . import encoder
+from . import functional
 
 __all__ = [
     "AdONEBase", "AnomalyDAEBase", "CoLABase", "DOMINANTBase", "DONEBase",

diff --git a/pygod/nn/functional.py b/pygod/nn/functional.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-"""Funtional Interface for PyGOD"""
+"""Functional Interface for PyGOD"""
 # Author: Kay Liu <zliu234@uic.edu>
 # License: BSD 2 clause
 
@@ -26,14 +26,14 @@ def double_recon_loss(x,
     The first dimension is kept for outlier scores of each node.
 
     For feature reconstruction, we use mean squared error loss:
-    :math:`\symbf{E_a} = \|\symbf{X}-\symbf{X}'\odot H\|`,
+    :math:`\symbf{E_a} = \|\symbf{X}-\symbf{X}'\|\odot H`,
     where :math:`H=\begin{cases}1 - \eta &
     \text{if }x_{ij}=0\\ \eta & \text{if }x_{ij}>0\end{cases}`, and
     :math:`\eta` is the positive weight for feature.
 
     For structure reconstruction, we use mean squared error loss by
-    default: :math:`\symbf{E_s} = \|\symbf{S}-\symbf{S}'\odot
-    \Theta\|`, where :math:`\Theta=\begin{cases}1 -
+    default: :math:`\symbf{E_s} = \|\symbf{S}-\symbf{S}'\|\odot
+    \Theta`, where :math:`\Theta=\begin{cases}1 -
     \theta & \text{if }s_{ij}=0\\ \theta & \text{if }s_{ij}>0
     \end{cases}`, and :math:`\theta` is the positive weight for
     structure. Alternatively, we can use binary cross entropy loss

diff --git a/pygod/utils/__init__.py b/pygod/utils/__init__.py
@@ -1,2 +1,2 @@
 from .utility import *
-from .score_converter import *
+from .score_converter import to_edge_score, to_graph_score
diff --git a/pygod/version.py b/pygod/version.py
@@ -20,4 +20,4 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 
-__version__ = '0.4.0'
+__version__ = '1.0.0'