In [2]:
%matplotlib notebook

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from sklearn.cluster import KMeans

rowsToCluster = pd.DataFrame()
startRampPath = 'data/BLM/start_ramp'

In [3]:
def getRowWithBlmMax(blmData):
    """
    Parameters
        blmData : DataFrame -- DataFrame loaded from the BLM data file, without timestamp column
    Returns
        DataFrame with row with the max BLM value in the provided data
    """
    
    blmMax = blmData.max() # Series of column num (i.e. BLM num) -> maximum value for that column
    blmMaxOverall = blmData.values.max() # max BLM value in the whole dataset
    rowIndexWithBlmMaxOverall = blmData.idxmax()[blmMax[blmMax == blmMaxOverall].index]
    rowWithBlmMaxOverall = blmData.loc[rowIndexWithBlmMaxOverall]
    
    assert rowWithBlmMaxOverall.max().max() == blmMaxOverall
    #TODO what if more than one row contains the same blmMaxOverall?
    assert len(rowWithBlmMaxOverall.index) == 1
    
    return rowWithBlmMaxOverall

In [7]:
for file in os.listdir(startRampPath):
    # load BLM data file as a DataFrame
    blmData = pd.read_csv(startRampPath + '/' + file, sep=' ', header=None)
    
    # Delete first column (contains timestamps)
    blmData = blmData.drop(columns=0)
    
    rowsToCluster = rowsToCluster.append(getRowWithBlmMax(blmData), ignore_index=True)

In [4]:
kmeans = KMeans(n_clusters = 2)
kmeans.fit(rowsToCluster)
print("Cluster memberships:\n{}".format(kmeans.labels_))

Cluster memberships:
[0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [49]:
blmLabels = np.genfromtxt('data/blm_labels.txt', dtype='str')
beam1BlmLabels = np.genfromtxt('data/beam1_blm_labels.txt', dtype='str')
beam2BlmLabels = np.genfromtxt('data/beam2_blm_labels.txt', dtype='str')

rowsToCluster.columns = blmLabels

# Extract out of rowsToCluster BLMs associated with beam 1, and BLMs associated with Beam 2

beam1BlmData = rowsToCluster.filter(items=beam1BlmLabels); 
beam2BlmData = rowsToCluster.filter(items=beam2BlmLabels);

beam2BlmData.columns = beam1BlmLabels; # so that Beam 1 and Beam 2 data can merge under the same 'logical' BLM labels
# NB - chose to use beam1BlmLabels arbitrarily - could have just as well used beam2.

mergedBlmData = pd.concat([beam1BlmData, beam2BlmData], keys=['B1', 'B2'])