Skip to content

Commit

Permalink
Merge commit '197c0cbfd529fd617c3c4e893ea8e3cd0d2493a0' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
mdekstrand committed Jan 22, 2022
2 parents 8857440 + 197c0cb commit ee43dcd
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 3 deletions.
33 changes: 33 additions & 0 deletions lkbuild/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from pathlib import Path
import requests
from zipfile import ZipFile

ML_LOC = "http://files.grouplens.org/datasets/movielens/"
ML_DATASETS = {
'ml-100k': 'ml-100k/u.data',
'ml-1m': 'ml-1m/ratings.dat',
'ml-10m': 'ml-10M100K/ratings.dat',
'ml-20m': 'ml-20m/ratings.csv',
'ml-25m': 'ml-25m/ratings.csv',
}


def fetch_ml(dir: Path, ds: str):
zipname = f'{ds}.zip'
zipfile = dir / zipname
zipurl = ML_LOC + zipname

test_file = dir / ML_DATASETS[ds]
if test_file.exists():
print(test_file, 'already exists')
return

print('downloading data set', ds)
with zipfile.open('wb') as zf:
res = requests.get(zipurl, stream=True)
for block in res.iter_content(None):
zf.write(block)

print('unpacking data set')
with ZipFile(zipfile, 'r') as zf:
zf.extractall(dir)
2 changes: 1 addition & 1 deletion lkbuild/mkl-spec.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# environment mini-spec for ensuring MKL
dependencies:
- libblas=*=*mkl
- mkl=2020
- tbb
62 changes: 60 additions & 2 deletions lkbuild/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,23 @@
"""

import sys
from pathlib import Path
from invoke import task
from . import env
import yaml
import requests

__ALL__ = [
'dev_lock',
'conda_platform'
]

DATA_DIR = Path('data')
BIBTEX_URL = 'https://paperpile.com/eb/YdOlWmnlit'
BIBTEX_FILE = Path('docs/lenskit.bib')

@task(iterable=['extras'])
def dev_lock(c, platform=None, extras=None, version=None, blas=None, env_file=False):
@task(iterable=['extras', 'mixins'])
def dev_lock(c, platform=None, extras=None, version=None, blas=None, mixins=None, env_file=False):
"Create a development lockfile"
plat = env.conda_platform()

Expand All @@ -32,17 +38,69 @@ def dev_lock(c, platform=None, extras=None, version=None, blas=None, env_file=Fa
cmd += f' -f lkbuild/python-{version}-spec.yml'
if blas:
cmd += f' -f lkbuild/{blas}-spec.yml'
for m in mixins:
cmd += f' -f {m}'
for e in extras:
cmd += f' -e {e}'

print('running', cmd, file=sys.stderr)
c.run(cmd)


@task(iterable=['extras'])
def env_file(c, platform=None, extras=None, version=None, blas=None, dev_deps=True,
output=None, name=None):
"Create an unresolved environment file"
from conda_lock.conda_lock import parse_source_files, aggregate_lock_specs

if not platform:
platform = env.conda_platform()

files = [Path('pyproject.toml')]
if version:
files.append(Path(f'lkbuild/python-{version}-spec.yml'))
if blas:
files.append(Path(f'lkbuild/{blas}-spec.yml'))

lock = parse_source_files(files, platform, dev_deps, extras)
lock = aggregate_lock_specs(lock)
env_spec = {
'channels': lock.channels,
'dependencies': lock.specs,
}
if name:
env_spec['name'] = name

if output:
print('writing environment to', output, file=sys.stderr)
out = Path(output)
with out.open('w') as f:
yaml.dump(env_spec, f)
else:
yaml.dump(env_spec, sys.stdout)


@task
def conda_platform(c, gh_output=False):
plat = env.conda_platform()
if gh_output:
print('::set-output name=conda-platform::' + plat)
else:
print(plat)


@task
def update_bibtex(c):
"Update the BibTeX file"
res = requests.get(BIBTEX_URL)
print('updating file', BIBTEX_FILE)
BIBTEX_FILE.write_text(res.text, encoding='utf-8')


@task
def fetch_data(c, data='ml-100k', data_dir=DATA_DIR):
"Fetch a data set."
from . import datasets

if data.startswith('ml-'):
datasets.fetch_ml(DATA_DIR, data)

0 comments on commit ee43dcd

Please sign in to comment.