Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 82 additions & 63 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,74 +1,93 @@
name: CI

on:
on:
push:
path:
- 'ge/*'
- 'tests/*'
paths:
- "ge/**"
- "tests/**"
- "examples/**"
- ".github/workflows/**"
- "setup.py"
- "README.md"
pull_request:
path:
- 'ge/*'
- 'tests/*'

paths:
- "ge/**"
- "tests/**"
- "examples/**"
- ".github/workflows/**"
- "setup.py"
- "README.md"

jobs:
build:

runs-on: ubuntu-latest
runs-on: ubuntu-22.04
timeout-minutes: 180
strategy:
fail-fast: false
matrix:
python-version: [3.6,3.7,3.8]
tf-version: [1.4.0,1.15.0,2.5.0,2.6.0,2.7.0,2.8.0,2.9.0]

exclude:
- python-version: 3.7
tf-version: 1.4.0
- python-version: 3.7
tf-version: 1.15.0
- python-version: 3.8
tf-version: 1.4.0
- python-version: 3.8
tf-version: 1.14.0
- python-version: 3.8
tf-version: 1.15.0
- python-version: 3.6
tf-version: 2.7.0
- python-version: 3.6
tf-version: 2.8.0
- python-version: 3.6
tf-version: 2.9.0
- python-version: 3.9
tf-version: 1.4.0
- python-version: 3.9
tf-version: 1.15.0
- python-version: 3.9
tf-version: 2.2.0
include:
- python-version: "3.7"
tf-version: "1.15.5"
use-legacy-keras: "0"
- python-version: "3.10"
tf-version: "2.10.0"
use-legacy-keras: "0"
- python-version: "3.10"
tf-version: "2.15.0"
use-legacy-keras: "0"
- python-version: "3.11"
tf-version: "2.15.0"
use-legacy-keras: "0"
- python-version: "3.10"
tf-version: "2.20.0"
use-legacy-keras: "1"
- python-version: "3.11"
tf-version: "2.20.0"
use-legacy-keras: "1"
- python-version: "3.12"
tf-version: "2.20.0"
use-legacy-keras: "0"
- python-version: "3.13"
tf-version: "2.20.0"
use-legacy-keras: "0"

steps:

- uses: actions/checkout@v3

- name: Setup python environment
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- uses: actions/checkout@v4

- name: Setup Python environment
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [[ "${{ matrix.tf-version }}" == "2.10.0" ]]; then
python -m pip install -q "numpy<2"
fi
python -m pip install -q "tensorflow==${{ matrix.tf-version }}"
if [[ "${{ matrix.tf-version }}" == 1.* ]]; then
python -m pip install -q "protobuf==3.20.3"
fi
if [[ "${{ matrix.use-legacy-keras }}" == "1" ]]; then
python -m pip install -q "tf-keras~=2.20"
fi
python -m pip install -e ".[test]"
if [[ "${{ matrix.tf-version }}" == "2.10.0" ]]; then
python -m pip install -q "numpy<2"
fi

- name: Test with pytest
timeout-minutes: 180
env:
TF_USE_LEGACY_KERAS: ${{ matrix.use-legacy-keras }}
run: |
pytest --cov=ge --cov=examples --cov-report=xml

- name: Install dependencies
run: |
pip3 install -q tensorflow==${{ matrix.tf-version }}
pip install -q protobuf==3.19.0
pip install -q requests
pip install -e .
- name: Test with pytest
timeout-minutes: 180
run: |
pip install -q pytest
pip install -q pytest-cov
pip install -q python-coveralls
pytest --cov=ge --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3.1.0
with:
token: ${{secrets.CODECOV_TOKEN}}
file: ./coverage.xml
flags: pytest
name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }}
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
flags: pytest
name: py${{ matrix.python-version }}-tf${{ matrix.tf-version }}
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![GitHub Issues](https://img.shields.io/github/issues/shenweichen/graphembedding.svg
)](https://github.com/shenweichen/graphembedding/issues)
![CI status](https://github.com/shenweichen/graphembedding/workflows/CI/badge.svg)
[![CI status](https://github.com/shenweichen/graphembedding/actions/workflows/ci.yml/badge.svg)](https://github.com/shenweichen/graphembedding/actions/workflows/ci.yml)
[![codecov](https://codecov.io/gh/shenweichen/graphembedding/branch/master/graph/badge.svg)](https://codecov.io/gh/shenweichen/graphembedding)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/c46407f5931f40048e28860dccf7dabc)](https://www.codacy.com/gh/shenweichen/GraphEmbedding/dashboard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=shenweichen/GraphEmbedding&amp;utm_campaign=Badge_Grade)
[![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#disscussiongroup--related-projects)
Expand All @@ -21,13 +21,16 @@
| Struc2Vec | [KDD 2017][struc2vec: Learning Node Representations from Structural Identity](https://arxiv.org/pdf/1704.03165.pdf) | [【Graph Embedding】Struc2Vec:算法原理,实现和应用](https://zhuanlan.zhihu.com/p/56733145) |




# How to run examples
1. clone the repo and make sure you have installed `tensorflow` or `tensorflow-gpu` on your local machine.
2. run following commands

1. Clone the repo and install dependencies.
2. Run one example script.

```bash
python setup.py install
cd examples
python deepwalk_wiki.py
pip install -e .[tf]
python examples/deepwalk_wiki.py
```

## DisscussionGroup & Related Projects
Expand Down
49 changes: 33 additions & 16 deletions examples/alias.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,47 @@
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np

PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))

from ge.alias import alias_sample, create_alias_table


def gen_prob_dist(N):
p = np.random.randint(0, 100, N)
return p/np.sum(p)
def gen_prob_dist(size):
probabilities = np.random.randint(0, 100, size)
return probabilities / np.sum(probabilities)


def simulate(size=100, sample_count=10000):
truth = gen_prob_dist(size)
accept, alias = create_alias_table(truth)

sampled = np.zeros(size)
for _ in range(sample_count):
sampled[alias_sample(accept, alias)] += 1
return sampled / np.sum(sampled), truth


def simulate(N=100, k=10000,):
def main(smoke=False, show=True):
size = 20 if smoke else 100
sample_count = 300 if smoke else 10000
alias_result, truth = simulate(size=size, sample_count=sample_count)

truth = gen_prob_dist(N)
assert np.isclose(alias_result.sum(), 1.0)
assert np.isclose(truth.sum(), 1.0)

area_ratio = truth
accept, alias = create_alias_table(area_ratio)
if show:
plt.bar(list(range(len(alias_result))), alias_result, label="alias_result")
plt.bar(list(range(len(truth))), truth, label="truth")
plt.legend()
plt.show()

ans = np.zeros(N)
for _ in range(k):
i = alias_sample(accept, alias)
ans[i] += 1
return ans/np.sum(ans), truth
return alias_result, truth


if __name__ == "__main__":
alias_result, truth = simulate()
plt.bar(list(range(len(alias_result))), alias_result, label='alias_result')
plt.bar(list(range(len(truth))), truth, label='truth')
plt.legend()
main()
90 changes: 57 additions & 33 deletions examples/deepwalk_wiki.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,77 @@
from pathlib import Path
import sys

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE

PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))

from ge.classify import read_node_label, Classifier
from ge import DeepWalk
from sklearn.linear_model import LogisticRegression
from ge.classify import Classifier, read_node_label

import matplotlib.pyplot as plt
import networkx as nx
from sklearn.manifold import TSNE
WIKI_GRAPH_PATH = PROJECT_ROOT / "data" / "wiki" / "Wiki_edgelist.txt"
WIKI_LABEL_PATH = PROJECT_ROOT / "data" / "wiki" / "wiki_labels.txt"
SMOKE_GRAPH_PATH = PROJECT_ROOT / "tests" / "Wiki_edgelist.txt"


def evaluate_embeddings(embeddings):
X, Y = read_node_label('../data/wiki/wiki_labels.txt')
tr_frac = 0.8
print("Training classifier using {:.2f}% nodes...".format(
tr_frac * 100))
def evaluate_embeddings(embeddings, label_path):
x_data, y_data = read_node_label(str(label_path))
train_fraction = 0.8
print("Training classifier using {:.2f}% nodes...".format(train_fraction * 100))
clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
clf.split_train_evaluate(X, Y, tr_frac)

clf.split_train_evaluate(x_data, y_data, train_fraction)

def plot_embeddings(embeddings,):
X, Y = read_node_label('../data/wiki/wiki_labels.txt')

emb_list = []
for k in X:
emb_list.append(embeddings[k])
emb_list = np.array(emb_list)
def plot_embeddings(embeddings, label_path, show=True):
x_data, y_data = read_node_label(str(label_path))

model = TSNE(n_components=2)
node_pos = model.fit_transform(emb_list)
embedding_list = np.array([embeddings[node] for node in x_data])
node_pos = TSNE(n_components=2).fit_transform(embedding_list)

color_idx = {}
for i in range(len(X)):
color_idx.setdefault(Y[i][0], [])
color_idx[Y[i][0]].append(i)
for index, label in enumerate(y_data):
color_idx.setdefault(label[0], [])
color_idx[label[0]].append(index)

for c, idx in color_idx.items():
plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
for label, indexes in color_idx.items():
plt.scatter(node_pos[indexes, 0], node_pos[indexes, 1], label=label)
plt.legend()
plt.show()
if show:
plt.show()
else:
plt.close()


if __name__ == "__main__":
G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
def main(smoke=False, show=True):
graph_path = SMOKE_GRAPH_PATH if smoke else WIKI_GRAPH_PATH
graph = nx.read_edgelist(
str(graph_path),
create_using=nx.DiGraph(),
nodetype=None,
data=[("weight", int)],
)

model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
model.train(window_size=5, iter=3)
model = DeepWalk(
graph,
walk_length=3 if smoke else 10,
num_walks=2 if smoke else 80,
workers=1,
)
model.train(window_size=2 if smoke else 5, iter=1 if smoke else 3, workers=1)
embeddings = model.get_embeddings()
assert len(embeddings) > 0

if not smoke:
evaluate_embeddings(embeddings, WIKI_LABEL_PATH)
plot_embeddings(embeddings, WIKI_LABEL_PATH, show=show)

evaluate_embeddings(embeddings)
plot_embeddings(embeddings)
return embeddings


if __name__ == "__main__":
main()
Loading
Loading