# Step 4.3: Cluster individual traces

__Step goals:__ Cluster generated individual traces with Gaussian Mixture Model.

__Step overview:__
1. Load the data and prepare the features;
2. Perform GMM;
3. Save the result.

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import math
from tqdm import tqdm
import itertools
from sklearn.mixture import GaussianMixture
plt.style.use('science')

## 1. Load the data

In [2]:
# Load the data
data_type = 'entry'
entry_individual = pd.read_csv(f"../data/interim/counts/{data_type}_individual.csv")
print(f'Clustering will be performed on : {entry_individual.shape[0]} points.')
X = entry_individual["time_stamp"].values
X = X.reshape(-1, 1)

Clustering will be performed on : 4571040 points.


## 2. Clustering

In [None]:
%%time
# Specify hyper-parameters
n_components = 10
random_state = 12345
n_init = 50

# Train the model
model = GaussianMixture(n_components, covariance_type='full', random_state=random_state, n_init=n_init, verbose=2).fit(X)

# Get the parameters
labels = model.predict(X)
weights = model.weights_
probs = np.around(model.predict_proba(X), 3)
means = model.means_
covariances = model.covariances_

Initialization 0


## 3. Save the result

In [None]:
# Save the model
filename = f'../models/individual_traces/gmm_{data_type}_{n_components}.sav'
pickle.dump(model, open(filename, 'wb')) 

# Save parameters
np.save(f"../data/interim/gmm/individual_traces/labels_{n_components}_{data_type}.npy", labels)
np.save(f"../data/interim/gmm/individual_traces/weights_{n_components}_{data_type}.npy", weights)
np.save(f"../data/interim/gmm/individual_traces/probs_{n_components}_{data_type}.npy", probs)
np.save(f"../data/interim/gmm/individual_traces/means_{n_components}_{data_type}.npy", means)
np.save(f"../data/interim/gmm/individual_traces/covariances_{n_components}_{data_type}.npy", covariances)