In [5]:
from __future__ import print_function, division
import planet4 as p4
import pandas as pd
from planet4 import markings
from planet4.get_data import get_current_database_fname
from planet4 import clustering
import os
from os.path import join as pjoin
HOME = os.environ['HOME']

In [2]:
dbfile = get_current_database_fname()
store = pd.HDFStore(dbfile)
store

<class 'pandas.io.pytables.HDFStore'>
File path: /Users/klay6683/data/planet4/2015-01-25_planet_four_classifications_queryable.h5
/df            frame_table  (typ->appendable,nrows->11979081,ncols->22,indexers->[index],dc->[classification_id,image_id,image_name,user_name,marking,acquisition_date,local_mars_time])

In [3]:
image_names = store.select_column('df', 'image_name')

In [7]:
from IPython.parallel import Client
client = Client()

In [15]:
dview = client.direct_view()
lview = client.load_balanced_view()

In [49]:
%%px
import pandas as pd
from planet4 import clustering, markings
from os.path import join as pjoin
import os
HOME = os.environ['HOME']

In [50]:
img_name = "ESP_011623_0985"

def do_blotches(p4img):
    reduced = clustering.perform_dbscan(p4img.get_blotches())
    if reduced is None:
        return None
    series = [blotch.data for blotch in reduced]
    df = pd.DataFrame(series)
    df['image_id'] = p4img.imgid
    return df

def do_fans(p4img):
    reduced = clustering.perform_dbscan(p4img.get_fans(), fans=True)
    if reduced is None:
        return None
    series = [fan.data for fan in reduced]
    df = pd.DataFrame(series)
    df['image_id'] = p4img.imgid
    return df
    
def process_image_name(image_name):
    data = pd.read_hdf(dbfile, 'df', where="image_name="+image_name)
    img_ids = data.image_id.unique()
    print("Found {} unique P4 image_ids.".format(img_ids.shape[0]))
    blotches = []
    fans = []
    for img_id in img_ids:
        print(img_id)
        p4img = markings.ImageID(img_id)
        blotches.append(do_blotches(p4img))
        fans.append(do_fans(p4img))
    blotches = pd.concat(blotches, ignore_index=True)
    dirname = pjoin(HOME, 'data/planet4/reduced')
    blotches.to_hdf(pjoin(dirname, image_name + '_reduced_blotches.hdf'), 'df')
    fans = pd.concat(fans, ignore_index=True)
    fans.to_hdf(image_name + '_reduced_fans.hdf', 'df')

In [51]:
dview.push({'do_blotches':do_blotches,
            'do_fans':do_fans,
            'dbfile':dbfile})

<AsyncResult: finished>

In [52]:
result = lview.map_async(process_image_name, image_names[:2])

In [53]:
result.get()

[None, None]

In [7]:
ls *.hdf

ESP_011623_0985_reduced_blotches.hdf  ESP_011623_0985_reduced_fans.hdf


In [14]:
def cluster_p4_id(image_id):
    imgid = markings.ImageID(image_id)
    reduced_blotches = clustering.perform_dbscan(imgid.get_blotches())
    series = [b.data for b in reduced_blotches]
    df = pd.DataFrame(series)
    df['image_id'] = image_id
    return df

In [15]:
results = []
for img_id in img_ids:
    print img_id
    results.append(cluster_p4_id(img_id))

APF00001r0
Estimated number of clusters: 10
APF00001qv
Estimated number of clusters: 12
APF00001rk
Estimated number of clusters: 14
APF00001su
Estimated number of clusters: 4
APF00001qs
Estimated number of clusters: 5
APF00001qp
Estimated number of clusters: 11
APF00001sa
Estimated number of clusters: 19
APF00001t1
Estimated number of clusters: 12
APF00001s4
Estimated number of clusters: 2
APF00001rs
Estimated number of clusters: 21
APF00001r6
Estimated number of clusters: 10
APF00001rl
Estimated number of clusters: 4
APF00001rv
Estimated number of clusters: 6
APF00001sx
Estimated number of clusters: 3
APF00001rh
Estimated number of clusters: 8
APF00001ql
Estimated number of clusters: 10
APF00001sh
Estimated number of clusters: 10
APF00001sr
Estimated number of clusters: 12
APF00001qk
Estimated number of clusters: 10
APF00001r1
Estimated number of clusters: 5
APF00001s7
Estimated number of clusters: 9
APF00001s0
Estimated number of clusters: 5
APF00001sd
Estimated number of clusters: 1

In [16]:
len(results)

108

In [17]:
image_name_blotches = pd.concat(results, ignore_index=True)

In [18]:
image_name_blotches.to_csv('ESP_011623_0985_reduced_blotches.csv')