Skip to content

Commit

Permalink
Create a small demo dataset by parsing JSON files created from OSE wi…
Browse files Browse the repository at this point in the history
…ki GVCS product ecology (#7811)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: rusty1s <matthias.fey@tu-dortmund.de>
  • Loading branch information
3 people committed Jul 31, 2023
1 parent e8f752f commit 1199597
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

### Added

- Added the `OSE_GVCS` dataset ([#7811](https://github.com/pyg-team/pytorch_geometric/pull/7811))
- Added `output_initializer` argument to `DimeNet` models ([#7774](https://github.com/pyg-team/pytorch_geometric/pull/7774), [#7780](https://github.com/pyg-team/pytorch_geometric/pull/7780))
- Added `lexsort` implementation ([#7775](https://github.com/pyg-team/pytorch_geometric/pull/7775))
- Added possibility to run inference benchmarks on XPU device ([#7705](https://github.com/pyg-team/pytorch_geometric/pull/7705))
Expand Down
2 changes: 2 additions & 0 deletions torch_geometric/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
from .igmc_dataset import IGMCDataset
from .amazon_book import AmazonBook
from .hm import HM
from .ose_gvcs import OSE_GVCS

from .fake import FakeDataset, FakeHeteroDataset
from .sbm_dataset import StochasticBlockModelDataset
Expand Down Expand Up @@ -190,6 +191,7 @@
'IGMCDataset',
'AmazonBook',
'HM',
'OSE_GVCS',
]
synthetic_datasets = [
'FakeDataset',
Expand Down
111 changes: 111 additions & 0 deletions torch_geometric/datasets/ose_gvcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import json
import os
from collections import defaultdict
from typing import Callable, List, Optional

import torch

from torch_geometric.data import (
HeteroData,
InMemoryDataset,
download_url,
extract_tar,
)


class OSE_GVCS(InMemoryDataset):
r"""A dataset describing the `Product ecology
<https://wiki.opensourceecology.org/wiki/Product_Ecologies>`_ of the Open
Source Ecology's iconoclastic `Global Village Construction Set
<https://wiki.opensourceecology.org/wiki/
Global_Village_Construction_Set>`_.
GVCS is a modular, DIY, low-cost set of blueprints that enables the
fabrication of the 50 different industrial machines that it takes to
build a small, sustainable civilization with modern comforts.
The dataset contains a heterogenous graphs with 50 :obj:`machine` nodes,
composing the GVCS, and 290 directed edges, each representing one out of
three relationships of machines.
"""
machines = [
'3D Printer', '3D Scanner', 'Aluminum Extractor', 'Backhoe',
'Bakery Oven', 'Baler', 'Bioplastic Extruder', 'Bulldozer', 'Car',
'CEB Press', 'Cement Mixer', 'Chipper Hammermill', 'CNC Circuit Mill',
'CNC Torch Table', 'Dairy Milker', 'Drill Press',
'Electric Motor Generator', 'Gasifier Burner', 'Hay Cutter',
'Hay Rake', 'Hydraulic Motor', 'Induction Furnace', 'Industrial Robot',
'Ironworker', 'Laser Cutter', 'Metal Roller', 'Microcombine',
'Microtractor', 'Multimachine', 'Nickel-Iron Battery', 'Pelletizer',
'Plasma Cutter', 'Power Cube', 'Press Forge', 'Rod and Wire Mill',
'Rototiller', 'Sawmill', 'Seeder', 'Solar Concentrator', 'Spader',
'Steam Engine', 'Steam Generator', 'Tractor', 'Trencher', 'Truck',
'Universal Power Supply', 'Universal Rotor', 'Welder',
'Well-Drilling Rig', 'Wind Turbine'
]
categories = [
'habitat', 'agriculture', 'industry', 'energy', 'materials',
'transportation'
]
relationships = ['from', 'uses', 'enables']

url = 'https://github.com/Wesxdz/ose_gvcs/raw/master/ose_gvcs.tar.gz'

def __init__(
self,
root: str,
transform: Optional[Callable] = None,
pre_transform: Optional[Callable] = None,
):
super().__init__(root, transform, pre_transform)
self.load(self.processed_paths[0], data_cls=HeteroData)

@property
def raw_file_names(self) -> List[str]:
return [
f"{machine.lower().replace(' ', '_')}.json"
for machine in self.machines
]

@property
def processed_file_names(self) -> str:
return 'data.pt'

def download(self):
path = download_url(self.url, self.root)
extract_tar(path, self.raw_dir)
os.unlink(path)

def process(self):
data = HeteroData()

categories = []
edges = defaultdict(list)

for path in self.raw_paths:
with open(path, 'r') as f:
product = json.load(f)
categories.append(self.categories.index(product['category']))
for interaction in product['ecology']:
# NOTE Some ecology items are not GVCS machines or have other
# relationship types we don't want included.
rt = interaction['relationship']
if rt not in self.relationships:
continue
dst = interaction['tool']
if dst not in self.machines:
continue
src = self.machines.index(product['machine'])
dst = self.machines.index(dst)
edges[rt].append((src, dst))

data['machine'].num_nodes = len(categories)
data['machine'].category = torch.tensor(categories)

for rel, edge_index, in edges.items():
edge_index = torch.tensor(edge_index).t()
data['machine', rel, 'machine'].edge_index = edge_index

if self.pre_transform is not None:
data = self.pre_transform(data)

self.save([data], self.processed_paths[0])

0 comments on commit 1199597

Please sign in to comment.