Skip to content

Commit

Permalink
Merge pull request #486 from sjsrey/examples
Browse files Browse the repository at this point in the history
ENH: more granularity in example handling
  • Loading branch information
sjsrey committed Nov 14, 2022
2 parents 483caf0 + 2a9ee94 commit fd8b9d8
Show file tree
Hide file tree
Showing 7 changed files with 661 additions and 259 deletions.
49 changes: 31 additions & 18 deletions examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -25,7 +25,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -83,7 +83,7 @@
},
{
"cell_type": "code",
"execution_count": 71,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -126,28 +126,41 @@
},
{
"cell_type": "code",
"execution_count": 72,
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: 'libpysal/examples/examples.json'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [5]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlibpysal/examples/examples.json\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m efile:\n\u001b[1;32m 2\u001b[0m examples \u001b[38;5;241m=\u001b[39m json\u001b[38;5;241m.\u001b[39mload(efile)\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'libpysal/examples/examples.json'"
]
}
],
"source": [
"with open('libpysal/examples/examples.json', 'r') as efile:\n",
" examples = json.load(efile)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['nat', 'south', 'rio', 'mexico', 'baltimore'])"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
"ename": "NameError",
"evalue": "name 'examples' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Input \u001b[0;32mIn [6]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mexamples\u001b[49m\u001b[38;5;241m.\u001b[39mkeys()\n",
"\u001b[0;31mNameError\u001b[0m: name 'examples' is not defined"
]
}
],
"source": [
Expand Down Expand Up @@ -555,7 +568,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -569,9 +582,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
40 changes: 24 additions & 16 deletions libpysal/examples/__init__.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
""" The :mod:`libpysal.examples` module includes a number of small built-in
example datasets as well as functions to fetch larger datasets.
"""The :mod:`libpysal.examples` module provides example datasets.
The datasets consist of two sets, built-ins which are installed with
this module and remotes that can be downloaded.
This module provides functionality for working with these example datasets.
"""


import pandas as pd
from .base import example_manager
from .remotes import datasets as remote_datasets
from .builtin import datasets as builtin_datasets
from typing import Union


from typing import Union
available_datasets = builtin_datasets.copy()
available_datasets.update(remote_datasets.datasets)

__all__ = ["get_path", "available", "explain", "fetch_all"]
__all__ = ["get_path", "available", "explain", "fetch_all",
"get_url", "load_example", "summary"]

example_manager.add_examples(available_datasets)

example_manager.add_examples(builtin_datasets)

def fetch_all():
"""Fetch and install all remote datasets
"""
"""Fetch and install all remote datasets."""
datasets = remote_datasets.datasets
names = list(datasets.keys())
names.sort()
Expand All @@ -32,35 +39,36 @@ def fetch_all():

def available() -> pd.DataFrame:
"""Return a dataframe with available datasets."""
fetch_all()

return example_manager.available()


def explain(name: str) -> str:
"""Explain a dataset by name."""

return example_manager.explain(name)


def get_url(name: str) -> str:
"""Get url for remote dataset."""
return example_manager.get_remote_url(name)


def load_example(example_name: str) -> Union[base.Example, builtin.LocalExample]:
"""Load example dataset instance."""
example = example_manager.load(example_name)

if example is None:
fetch_all() # refresh remotes
example = example_manager.load(example_name)

return example


def get_path(file_name: str) -> str:
"""Get the path for a file by searching installed datasets."""

installed = example_manager.get_installed_names()
for name in installed:
example = example_manager.datasets[name]
pth = example.get_path(file_name, verbose=False)
if pth:
return pth
print("{} is not a file in any installed dataset.".format(file_name))


def summary():
"""Summary of datasets."""
example_manager.summary()
41 changes: 28 additions & 13 deletions libpysal/examples/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import io
import os
import webbrowser
from os import environ, makedirs
from os.path import exists, expanduser, join
from platformdirs import user_data_dir
import zipfile
import requests
Expand All @@ -35,9 +33,9 @@ def get_data_home():

appname = "pysal"
appauthor = "pysal"
data_home = user_data_dir(appname, appauthor)
if not exists(data_home):
makedirs(data_home, exist_ok=True)
data_home = user_data_dir(appname, appauthor)
if not os.path.exists(data_home):
os.makedirs(data_home, exist_ok=True)
return data_home


Expand Down Expand Up @@ -122,7 +120,9 @@ class Example:
"""

def __init__(self, name, description, n, k, download_url, explain_url):
def __init__(self, name, description, n, k, download_url,
explain_url):
"""Initialze Example."""
self.name = name
self.description = description
self.n = n
Expand All @@ -134,12 +134,10 @@ def __init__(self, name, description, n, k, download_url, explain_url):

def get_local_path(self, path=get_data_home()) -> str:
"""Get the local path for example."""

return join(path, self.root)
return os.path.join(path, self.root)

def get_path(self, file_name, verbose=True) -> Union[str, None]:
"""Get the path for local file."""

file_list = self.get_file_list()
for file_path in file_list:
base_name = os.path.basename(file_path)
Expand All @@ -151,7 +149,6 @@ def get_path(self, file_name, verbose=True) -> Union[str, None]:

def downloaded(self) -> bool:
"""Check if the example has already been installed."""

path = self.get_local_path()
if os.path.isdir(path):
self.installed = True
Expand Down Expand Up @@ -180,7 +177,7 @@ def download(self, path=get_data_home()):
if not self.downloaded():
request = requests.get(self.download_url)
archive = zipfile.ZipFile(io.BytesIO(request.content))
target = join(path, self.root)
target = os.path.join(path, self.root)
print("Downloading {} to {}".format(self.name, target))
archive.extractall(path=target)
self.zipfile = archive
Expand Down Expand Up @@ -213,8 +210,8 @@ def load(self, file_name) -> io.FileIO:
class Examples:
"""Manager for pysal example datasets."""

def __init__(self):
self.datasets = {}
def __init__(self, datasets={}):
self.datasets = datasets

def add_examples(self, examples):
"""Add examples to the set of datasets available."""
Expand Down Expand Up @@ -273,5 +270,23 @@ def get_installed_names(self) -> list:
ds = self.datasets
return [name for name in ds if ds[name].installed]

def get_remote_url(self, name):
if name in self.datasets:
try:
return self.datasets[name].download_url
except:
print(f'{name} is a built-in dataset, no url.')
else:
print(f'{name} is not an available dataset.')


def summary(self):
"""Report on datasets."""
available = self.available()
n = available.shape[0]
n_installed = available.Installed.sum()
n_remote = n - n_installed
print(f'{n} datasets available, {n_installed} installed, {n_remote} remote.')


example_manager = Examples()
26 changes: 22 additions & 4 deletions libpysal/examples/builtin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Handle local builtin datasets.
"""
"""Handle local builtin datasets."""

import os
from .base import get_list_of_files
Expand Down Expand Up @@ -38,11 +37,30 @@


class LocalExample:
"""
Builtin pysal example dataset
"""Builtin pysal example dataset.
Attributes
----------
name : str
Example name
dirname : str
Path holding example files
installed : boolean
If True, example is installed locally, if false it is remote.
description : str
Summary of the properties of the example
"""

def __init__(self, name, dirname):
"""Initialize LocalExample with name and dirname.
Parameters
---------
name : str
example name
dirname: str
path to directory holding example files
"""
self.name = name
self.dirname = dirname
self.installed = True
Expand Down
12 changes: 6 additions & 6 deletions libpysal/examples/remotes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Handle remote datasets.
"""
"""Handle remote datasets."""

from bs4 import BeautifulSoup
import requests
Expand All @@ -16,7 +15,6 @@ def poll_remotes():
Example datasets keyed by the dataset name.
"""

# Geoda Center Data Sets

url = "https://geodacenter.github.io/data-and-lab//"
Expand Down Expand Up @@ -95,17 +93,19 @@ def poll_remotes():
return datasets


#datasets = poll_remotes()

class Remotes:
"""Remote datasets."""

def __init__(self):
"""Initialize Remotes."""
self._datasets = None

@property
def datasets(self):
"""Create dictionary of remotes."""
if self._datasets is None:
self._datasets = poll_remotes()
return self._datasets

datasets = Remotes()

datasets = Remotes()

0 comments on commit fd8b9d8

Please sign in to comment.