In [None]:
%matplotlib inline
import seaborn as sns
sns.set_context('notebook')

In [None]:
from planet4 import io, clustering, plotting, markings

In [None]:
def cluster_and_plot(image_id, eps=10, dynamic=False, angles=False, 
                     scaler='robust', normalize=False, 
                     dir_ext='_check1', msf=0.15):
    from planet4 import plotting, clustering
    cm = clustering.ClusteringManager(do_dynamic_min_samples=dynamic,
                                      include_angle=angles,
                                      quiet=True,
                                      output_dir='old_DBSCAN'+dir_ext,
                                      normalize=normalize,
                                      scaler=scaler,
                                      min_samples_factor=msf,
                                      use_DBSCAN=True,
                                      eps=eps)

    cm.cluster_image_id(image_id)
    plotting.plot_image_id_pipeline(image_id,
                                    save=True, 
                                    savetitle='dbscan_',
                                    cm=cm,
                                   )
#     plt.close('all')
    return dict(id_=image_id)

In [None]:
def create_and_save_randoms():
    myids = np.random.choice(ids, 100)
    np.save('myids.npy', myids)

In [None]:
myids = np.load('myids.npy')

len(myids)

In [None]:
def do_parallel(func, list_):
    from ipyparallel import Client
    from nbtools import display_multi_progress
    c = Client()
    lbview = c.load_balanced_view()
    results = lbview.map_async(func, list_)
    display_multi_progress(results, list_)

In [None]:
blotchcols = markings.Blotch.to_average
blotchcols
fancols = markings.Fan.to_average
fancols

In [None]:
imgid = '1at'
imgid = 'dch'
imgid = 'bvc'
# imgid = '1dr'
# imgid = '1fe'
p4id = markings.ImageID(imgid, scope='planet4')

In [None]:
data = p4id.data

data.query('marking=="blotch" and radius_1 < radius_2')[blotchcols]

# invert ellipse radii
idx = data.radius_1 < data.radius_2
col_orig = ['radius_1','radius_2']
col_reversed = list(reversed(col_orig))
data.loc[idx, col_orig] = data.loc[idx, col_reversed].values
data.loc[idx, 'angle'] += 90

In [None]:
cluster_and_plot('bvc', eps=20)

In [None]:
blotches = data[data.marking=='blotc']

In [None]:
q = 'x>240 and x<430 and y>500'
p4id.plot_blotches(blotches=blotches.query(q))

In [None]:
data = blotches.query(q)

In [None]:
data[blotchcols]

In [None]:
data.loc[2409476:][blotchcols]

In [None]:
p4id.plot_blotches(blotches=data.loc[2409476:])

In [None]:
q='not(angle==90.000 and radius_1==10.000 and radius_2==10.000)'

In [None]:
data = data.query(q)[blotchcols]

In [None]:
angles = data['angle']
data['xang'] = np.cos(np.deg2rad(angles))
data['yang'] = np.sin(np.deg2rad(angles))

In [None]:
data

In [None]:
current_X = data[['x','y']].values

In [None]:
current_X

In [None]:
from planet4.dbscan import DBScanner

In [None]:
dbscanner = DBScanner(current_X, eps=20, min_samples=3)

In [None]:
dbscanner.reduced_data[0]

this means all ellipses were clustered together. eps=10 picks 3 out of these 6.

In [None]:
clusterdata = data.iloc[dbscanner.reduced_data[0]]

so clusterdata is just the same as the input data, i just repeat the exact same code steps here for consistency.

In [None]:
clusterdata

In [None]:
clusterdata.xang = clusterdata.xang.abs()

In [None]:
meandata = clusterdata.mean()
meandata

In [None]:
np.rad2deg(np.arctan2(meandata.yang, meandata.xang))

In [None]:
from sklearn.preprocessing import StandardScaler, robust_scale

scaler = StandardScaler().fit(X)

tX = robust_scale(X)

In [None]:
import hdbscan
def do_hdbscan(min_cluster, min_samples=None):
    clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster, 
                                approx_min_span_tree=False,
                                min_samples=min_samples,
                               )

    imgid.plot_fans()

    db = clusterer.fit(tX)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.probabilities_ > 0.75] = True
    labels = db.labels_

    # Number of clusters in labels, ignoring noise if present.
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

    unique_labels = set(labels)
    colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
    for k, col in zip(unique_labels, colors):
        if k == -1:
            # Black used for noise.
            col = 'k'

        class_member_mask = (labels == k)

        xy = X[class_member_mask & core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
                 markeredgecolor='k', markersize=14)

        xy = X[class_member_mask & ~core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
                 markeredgecolor='k', markersize=6)

    plt.title('Estimated number of clusters: %d' % n_clusters_)
    
    reduced_data = []
    for k in unique_labels:
        class_member_mask = (labels == k)
        if k == -1:
            continue
        else:
            reduced_data.append(class_member_mask & core_samples_mask)
    return reduced_data

In [None]:
reduced_data = do_hdbscan(4, 1)

In [None]:
blotches[reduced_data[0]]['x y'.split()]

In [None]:
tX.shape

In [None]:
plt.figure()
db.condensed_tree_.plot(select_clusters=True)

In [None]:
cm.dbname

In [None]:
db = io.DBManager()

In [None]:
data = db.get_image_id_markings('bvc')

In [None]:
n_class_old = data.classification_id.nunique()
n_class_old

In [None]:
# number of classifications that include fan and blotches
f1 = data.marking == 'fan'
f2 = data.marking == 'blotch'
n_class_fb = data[f1 | f2].classification_id.nunique()
n_class_fb

In [None]:
data=data[data.marking=='blotch']

In [None]:
plotting.plot_raw_blotches('bvc')

In [None]:
data['y_R'] = 1000 - data['y']

In [None]:
data.plot(kind='scatter', x='x',y='y_R')

In [None]:
fx1 = data.x < 400 
fx2 = data.x > 300
fy1 = data.y_R > 300
fy2 = data.y_R < 400

In [None]:
data = data.reset_index()

In [None]:
data[fx1 & fx2 & fy1 & fy2].angle

In [None]:
cm.dbscanner.reduced_data