In [1]:
from backend.data_model.time_series import DataMultibandTimeSeries
from backend.data_model.time_series import TimeSeriesMongoDataBase
from backend.data_model.clusters import Cluster, ClustersMongoDataBase
from backend.offline.offline_algorithms import Birch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import string

In [2]:
def extract_feature_matrix(database, id_list):
    time_series_list = database.get_many('macho', id_list)
    feature_vectors = []
    for time_series in time_series_list:
        feature_vector = time_series.reduced_vector
        if len(feature_vector) != 0:
            feature_vectors.append(feature_vector)
        else:
            print(time_series.id)
    print('{0}, {1}'.format(len(id_list), len(feature_vectors)))
    return np.array((feature_vectors))


def plot_cluster_list(centers, clusters, database):
    plt.plot(centers[:, 0], centers[:, 1], 'x')
    colors = plt.cm.Spectral(np.linspace(0, 1, len(clusters)))
    np.random.shuffle(colors)
    for cluster_indices, col in zip(clusters, colors):
        cluster_data = extract_feature_matrix(database, cluster_indices)
        plt.plot(cluster_data[:, 0], cluster_data[:, 1], 'o', markerfacecolor=col)
    plt.show()


def plot_lightcurves(lightcurve_list):
    reduced_features = []
    for lc in lightcurve_list:
        if lc.reduced_vector is not None and len(lc.reduced_vector) > 1:
            reduced_features.append(lc.reduced_vector)
    reduced_features = np.vstack(reduced_features)
    plt.plot(reduced_features[:, 0], reduced_features[:, 1], '*')
    plt.show()

In [3]:
mongodb = TimeSeriesMongoDataBase('lightcurves')
lightcurves = mongodb.find_many('macho', {})

plot_lightcurves(lightcurves)

In [4]:
threshold = 0.75
birch = Birch(threshold, 'd1', 'r', 10, True, 1)
birch.add_many_time_series(lightcurves)


local_centers, local_clusters = birch.get_cluster_list(mode='local')
print(len(local_centers))
print(len(local_clusters))
for cluster in local_clusters:
    print str(len(cluster)) + ' ',
print ' '
plot_cluster_list(local_centers, local_clusters, mongodb)

28
28
36  50  21  39  33  44  83  58  56  24  137  88  59  83  62  84  137  42  113  26  21  132  45  63  98  74  42  125   
36, 36
50, 50
21, 21
39, 39
33, 33
44, 44
83, 83
58, 58
56, 56
24, 24
137, 137
88, 88
59, 59
83, 83
62, 62
84, 84
137, 137
42, 42
113, 113
26, 26
21, 21
132, 132
45, 45
63, 63
98, 98
74, 74
42, 42
125, 125


In [5]:
global_centers, global_clusters = birch.get_cluster_list(mode='global')
plot_cluster_list(global_centers, global_clusters, mongodb)

125, 125
21, 21
383, 383
44, 44
202, 202
45, 45
334, 334
574, 574
121, 121
26, 26


In [6]:
clusters = []
for center, cluster in zip(global_centers, global_clusters):
    time_series_list = mongodb.get_many('macho', cluster)
    clusters.append(Cluster.from_time_series_sequence(time_series_list, center))

In [7]:
clusters[0].to_list_of_dicts()

[{'distance': 0.15884294685044481,
  'id': u'1.3323.258',
  'values': [-4.289561391706135, -1.4241728433167]},
 {'distance': 0.2023430854553748,
  'id': u'1.3321.339',
  'values': [-4.0413610411044862, -1.4572285067951904]},
 {'distance': 0.21204621092424383,
  'id': u'1.3321.355',
  'values': [-4.3779309031629632, -1.2343787356129396]},
 {'distance': 0.23010405231281728,
  'id': u'1.3322.60',
  'values': [-4.2538936316910529, -1.091712713434478]},
 {'distance': 0.24174687733387748,
  'id': u'1.3320.324',
  'values': [-4.1966159924855013, -1.5506568153964952]},
 {'distance': 0.25485366331936643,
  'id': u'1.3322.483',
  'values': [-3.9862329843029376, -1.4754712222996489]},
 {'distance': 0.27133198596800695,
  'id': u'1.3320.424',
  'values': [-4.13655953027523, -1.577396902849177]},
 {'distance': 0.28316225479803286,
  'id': u'1.3322.195',
  'values': [-4.2170313776032415, -1.0288241244930345]},
 {'distance': 0.29685410263132644,
  'id': u'1.3322.15',
  'values': [-3.9811573416244848,

In [8]:
clusters_mongodb = ClustersMongoDataBase()

In [9]:
clusters_mongodb.store_clusters(clusters)