In [3]:
from __future__ import print_function, division
import planet4 as p4
import pandas as pd
from planet4 import markings
from planet4.io import get_current_database_fname
from planet4 import clustering
import os
from os.path import join as pjoin
HOME = os.environ['HOME']

In [4]:
dbfile = get_current_database_fname()
store = pd.HDFStore(dbfile)
store

<class 'pandas.io.pytables.HDFStore'>
File path: /Users/klay6683/data/planet4/2015-06-21_planet_four_classifications_queryable.h5
/df            frame_table  (typ->appendable,nrows->13064413,ncols->23,indexers->[index],dc->[classification_id,image_id,image_name,user_name,marking,acquisition_date,local_mars_time])

In [5]:
image_names = store.select_column('df', 'image_name').unique()

In [4]:
len(image_names)

421

In [5]:
image_names[:3]

array(['ESP_011544_0985', 'ESP_021684_0985', 'ESP_011697_0980'], dtype=object)

In [7]:
from IPython.parallel import Client
client = Client()

In [8]:
dview = client.direct_view()
lview = client.load_balanced_view()

In [10]:
%%px
import pandas as pd
from planet4 import clustering, markings
from os.path import join as pjoin
import os
HOME = os.environ['HOME']

In [11]:
def do_clustering(p4img, fans):
    if fans:
        reduced = clustering.perform_dbscan(p4img.get_fans(), fans=fans)
    else:
        reduced = clustering.perform_dbscan(p4img.get_blotches(), fans=fans)
    if reduced is None:
        return None
    series = [cluster.data for cluster in reduced]
    n_members = [cluster.n_members for cluster in reduced]
    df = pd.DataFrame(series)
    df['image_id'] = p4img.imgid
    df['n_members'] = n_members
    return df
    
def process_image_name(image_name):
    dirname = pjoin(HOME, 'data/planet4/reduced')
    blotchfname = pjoin(dirname, image_name+'_reduced_blotches.hdf')
    fanfname = pjoin(dirname, image_name+'_reduced_fans.hdf')
    if os.path.exists(blotchfname) and\
            os.path.exists(fanfname):
        return image_name+' already done.'
    data = pd.read_hdf(dbfile, 'df', where="image_name="+image_name)
    img_ids = data.image_id.unique()
    blotches = []
    fans = []
    for img_id in img_ids:
        p4img = markings.ImageID(img_id)
        blotches.append(do_clustering(p4img, fans=False))
        fans.append(do_clustering(p4img, fans=True))
    blotches = pd.concat(blotches, ignore_index=True)
    blotches.to_hdf(blotchfname, 'df')
    fans = pd.concat(fans, ignore_index=True)
    fans.to_hdf(fanfname, 'df')
    return image_name

In [12]:
dview.push({'do_clustering':do_clustering,
            'dbfile':dbfile})

<AsyncResult: finished>

In [22]:
result = lview.map_async(process_image_name, image_names)

In [18]:
for res in result:
    print(res)

ESP_011544_0985 already done.
ESP_021684_0985 already done.
ESP_011697_0980


In [23]:
import time
import sys
while not result.ready():
    print("{:.1f} %".format(100*result.progress/len(image_names)))
    sys.stdout.flush()
    time.sleep(30)

0.7 %
0.7 %
1.2 %
1.2 %
1.4 %
1.7 %
2.1 %
2.4 %
2.9 %
2.9 %
3.1 %
3.1 %
3.6 %
4.0 %
4.0 %
4.3 %
5.0 %
5.0 %
5.5 %
5.7 %
5.7 %
6.4 %
6.4 %
6.4 %
6.7 %
7.4 %
7.6 %
7.6 %
8.1 %
8.3 %
8.8 %
9.0 %
9.5 %
9.5 %
9.7 %
10.2 %
10.9 %
11.2 %
11.4 %
11.6 %
11.9 %
12.4 %
12.4 %
12.8 %
13.1 %
13.3 %
13.3 %
13.5 %
14.3 %
14.5 %
15.0 %
15.7 %
15.9 %
16.4 %
17.1 %
17.3 %
17.6 %
17.8 %
18.1 %
18.1 %
18.5 %
18.5 %
19.2 %
19.7 %
20.4 %
20.9 %
20.9 %
20.9 %
21.4 %
21.9 %
22.3 %
22.3 %
22.6 %
23.5 %
24.0 %
24.7 %
25.2 %
25.4 %
25.9 %
26.1 %
26.4 %
26.8 %
27.1 %
27.3 %
27.6 %
27.8 %
28.0 %
28.7 %
29.2 %
29.9 %
30.2 %
30.9 %
31.4 %
31.8 %
32.1 %
32.3 %
32.8 %
33.7 %
34.4 %
35.4 %
35.6 %
35.9 %
36.3 %
36.8 %
37.3 %
38.2 %
38.7 %
39.2 %
39.7 %
40.4 %
40.6 %
41.3 %
42.3 %
43.0 %
43.5 %
44.4 %
44.7 %
45.4 %
46.3 %
46.8 %
47.5 %
48.2 %
49.2 %
49.9 %
50.4 %
51.1 %
52.0 %
52.3 %
52.7 %
52.7 %
53.2 %
53.2 %
53.2 %
53.7 %
53.7 %
53.9 %
54.4 %
54.4 %
54.6 %
54.9 %
55.1 %
55.3 %
55.6 %
55.8 %
56.1 %
56.3 %
56.3 %
56.8 %

In [24]:
reducedfiles = !ls ~/data/planet4/reduced
nooffiles = len(reducedfiles)
print("Produced", nooffiles, "files.")

Produced 842 files.


In [20]:
from planet4.io import is_catalog_production_good

In [21]:
is_catalog_production_good()

KeyboardInterrupt: 