In [46]:
import urllib.parse

In [47]:
import json
import os
from pathlib import Path
from os import path

cwd = Path().absolute()

In [48]:
example_dir = cwd.joinpath("libpysal",'examples')
EXAMPLE_DIR = example_dir

## Examples

The source distribution of libpysal contains a number of official examples that can be used to illustrate
functionality.

### Requirements

Each official example requires two things:

- Entry in `examples.json`
- a directory in `libpysal/examples/dirname` where `dirname` is the name of the example.

In the source distribution we will have a directory for every official example. Examples are either
local or remote. For local examples, their directories contain:

- README.md
- all data files

For remote examples, the source directory contains only:

- README.md

### Example entries in `example.json`

### README.md for examples

The README.md file has to follow a common structure so that meta data can be generated to provide users with information about the example, as well as an inventory of the files that are available for use.

### Remote examples

These are examples that are large, and therefore, the data files are not included in the source distribution. Instead meta data is provided that allows for the downloading of the data files for the example, as well as information about the example for discovery.

Remote examples will be downloaded into a default folder `~/PYSALDATA/dirname`. The location of this folder can be changed using the environment variable `PYSALDATA`.

## example userland functions

`available` will list the names and brief descriptions of the official examples, distinguishing between those examples that have been installed (local source and downloaded remote examples) as well as those that would have to be downloaded before using.

`explain` will provide the detail listing about a particular example.





In [71]:
class Example:
    def __init__(self, name, info):
        self.name = name
        self.remote = False
        if 'url' in info:
            self.remote = True
            print('TODO: check if downloaded already')
        self.dir = info['dir']
        self.readme()
    def readme(self):
        print('TODO: readme')
        pth = EXAMPLE_DIR.joinpath(self.dir, 'README.md')
        print(pth)
        if path.exists(pth):
            with open(pth, 'r') as readme:
                contents = readme.read()
                self.contents = contents
                try:
                    fid = contents.index("## Files")
                    rid = contents.index("## Reference")
                    file_contents = contents[fid+9:rid-1]
                    self.file_contents = file_contents
                except:
                    print('README.md is broken: ', pth)
                
        else:
            print('Example missing README.md: ', self.name)
        

# read examples

In [72]:
with open('libpysal/examples/examples.json', 'r') as efile:
    examples  = json.load(efile)

In [73]:
examples.keys()

dict_keys(['nat', 'south', 'rio', 'mexico', 'baltimore'])

## process examples

In [74]:
for key in examples:
    example = Example(key, examples[key])
    if 'url' in examples[key]:
        print('remote', key)
        o = urllib.parse.urlparse(examples[key]['url'])
        # check if example has already been downloaded
        
        # if not warn that it will be downloaded
    else:
        print('local', key)
       
        
    

TODO: check if downloaded already
TODO: readme
/home/jovyan/libpysal/examples/nat/README.md
Example missing README.md:  nat
remote nat
TODO: check if downloaded already
TODO: readme
/home/jovyan/libpysal/examples/south/README.md
Example missing README.md:  south
remote south
TODO: check if downloaded already
TODO: readme
/home/jovyan/libpysal/examples/rio/README.md
remote rio
TODO: readme
/home/jovyan/libpysal/examples/mexico/README.md
README.md is broken:  /home/jovyan/libpysal/examples/mexico/README.md
local mexico
TODO: readme
/home/jovyan/libpysal/examples/baltim/README.md
local baltimore


In [75]:
example.file_contents

'\n* baltim.dbf: attribute data. (k=17)\n* baltim.shp: Point shapefile. (n=211)\n* baltim.shx: spatial index.\n* baltim.tri.k12.kwt: kernel weights using a triangular kernel with 12 nearest neighbors in KWT format.\n* baltim_k4.gwt: nearest neighbor weights (4nn) in GWT format.\n* baltim_q.gal: queen contiguity weights in GAL format.\n* baltimore.geojson: spatial weights in geojson format.\n\n'

In [76]:
fc = example.file_contents

In [77]:
fc.split("\n*")

['',
 ' baltim.dbf: attribute data. (k=17)',
 ' baltim.shp: Point shapefile. (n=211)',
 ' baltim.shx: spatial index.',
 ' baltim.tri.k12.kwt: kernel weights using a triangular kernel with 12 nearest neighbors in KWT format.',
 ' baltim_k4.gwt: nearest neighbor weights (4nn) in GWT format.',
 ' baltim_q.gal: queen contiguity weights in GAL format.',
 ' baltimore.geojson: spatial weights in geojson format.\n\n']

In [81]:
for line in fc.split("\n")[1:]:
    line = line.strip()
    print(line)
    if "*" in line:
        name, desc = line.split(":")
        print(name, desc)

* baltim.dbf: attribute data. (k=17)
* baltim.dbf  attribute data. (k=17)
* baltim.shp: Point shapefile. (n=211)
* baltim.shp  Point shapefile. (n=211)
* baltim.shx: spatial index.
* baltim.shx  spatial index.
* baltim.tri.k12.kwt: kernel weights using a triangular kernel with 12 nearest neighbors in KWT format.
* baltim.tri.k12.kwt  kernel weights using a triangular kernel with 12 nearest neighbors in KWT format.
* baltim_k4.gwt: nearest neighbor weights (4nn) in GWT format.
* baltim_k4.gwt  nearest neighbor weights (4nn) in GWT format.
* baltim_q.gal: queen contiguity weights in GAL format.
* baltim_q.gal  queen contiguity weights in GAL format.
* baltimore.geojson: spatial weights in geojson format.
* baltimore.geojson  spatial weights in geojson format.




In [56]:
c = example.contents

In [57]:
c

'# baltim\n\nBaltimore house sales prices and hedonics 1978\n\n## Files\n\n* baltim.dbf: attribute data. (k=17)\n* baltim.shp: Point shapefile. (n=211)\n* baltim.shx: spatial index.\n* baltim.tri.k12.kwt: kernel weights using a triangular kernel with 12 nearest neighbors in KWT format.\n* baltim_k4.gwt: nearest neighbor weights (4nn) in GWT format.\n* baltim_q.gal: queen contiguity weights in GAL format.\n* baltimore.geojson: spatial weights in geojson format.\n\n\n## Reference\n\n\n\n'

In [61]:
lines = c.split("\n")
c.index("## Files")
c.index("## Reference")

454

In [59]:
lines

['# baltim',
 '',
 'Baltimore house sales prices and hedonics 1978',
 '',
 '## Files',
 '',
 '* baltim.dbf: attribute data. (k=17)',
 '* baltim.shp: Point shapefile. (n=211)',
 '* baltim.shx: spatial index.',
 '* baltim.tri.k12.kwt: kernel weights using a triangular kernel with 12 nearest neighbors in KWT format.',
 '* baltim_k4.gwt: nearest neighbor weights (4nn) in GWT format.',
 '* baltim_q.gal: queen contiguity weights in GAL format.',
 '* baltimore.geojson: spatial weights in geojson format.',
 '',
 '',
 '## Reference',
 '',
 '',
 '',
 '']

In [59]:
o.path

'baltim'

In [66]:
from os import walk

f = []
dirs = []
for (dirpath, dirnames, filenames) in walk('libpysal/examples'):
    f.extend(filenames)
    dirs.extend(dirnames)
    break

In [67]:
dirs

['stl',
 'wmat',
 'book',
 'chicago',
 'Polygon',
 'newHaven',
 'juvenile',
 'burkitt',
 'georgia',
 'berlin',
 'mexico',
 'networks',
 'sids2',
 'baltim',
 'Polygon_Holes',
 'geodanet',
 'arcgis',
 'Point',
 'Line',
 'clearwater',
 '10740',
 'calemp',
 'street_net_pts',
 'snow_maps',
 'us_income',
 '__pycache__',
 'columbus',
 'tests',
 'tokyo',
 'desmith',
 'virginia']

In [68]:
for dir in dirs:
    print(dir)

stl
wmat
book
chicago
Polygon
newHaven
juvenile
burkitt
georgia
berlin
mexico
networks
sids2
baltim
Polygon_Holes
geodanet
arcgis
Point
Line
clearwater
10740
calemp
street_net_pts
snow_maps
us_income
__pycache__
columbus
tests
tokyo
desmith
virginia


In [69]:
!ls libpysal/examples/stl/


README.md  stl_hom.csv	stl_hom.html	  stl_hom.shp  stl_hom.txt
stl.gal    stl_hom.dbf	stl_hom_rook.gal  stl_hom.shx  stl_hom.wkt
