In [1]:
from fcsugar import *
import pandas as pd
from bokeh.plotting import figure as figure, gridplot
from bokeh.io import output_notebook, push_notebook, show
output_notebook()
from scipy import fftpack
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import numpy as np

# out = Output()
    

class DataFrameContainer(Container):
    def __init__(self, df: pd.DataFrame):
        Container.__init__(self)
        self.df = df
        self.gui = {}
    
    @classmethod
    def from_hdf5(cls, path: str, key: str):
        df = pd.read_hdf(path, key=key, mode='r')
        return cls(df)
    
@node
def splice(container, data_column: str, start: int, stop: int):
    container.df['spliced'] = container.df[data_column].apply(lambda a: a[start:stop])
    return container

@node
def normalize(container, data_column: str):
    func = lambda a: ((a - np.min(a)) / (np.max(a - np.min(a))))
    container.df['normalize'] = container.df[data_column].apply(func)
    return container


@node
def log(container, data_column: str):
    container.df['log'] = container.df[data_column].apply(lambda x: np.log10(x))
    return container
    

@node
def absval(container, data_column: str):
    container.df['absval'] = container.df[data_column].apply(lambda x: np.abs(x))
    return container

@node
def rfft(container, data_column: str):
    data = np.vstack(container.df[data_column].values)

    container.df['fft'] = fftpack.rfft(data).tolist()
    container.df['fft'] = container.df['fft'].apply(np.array)

    return container


@node
def LDA(container, data_column: str, labels_column: str, n_components: int):
    X = np.vstack(container.df[data_column].values)
    y = container.df[labels_column]
    
    lda = LinearDiscriminantAnalysis(n_components=n_components)
    
    X_ = lda.fit_transform(X, y)
    
    container.df['lda_transform'] = X_.tolist()
    container.df['lda_transform'] = container.df['lda_transform'].apply(np.array)
    
    return container

In [2]:
f1 = figure(plot_width=300, plot_height=300)#, y_range=(-0.1, 1.1))
g1 = f1.scatter(np.arange(100), np.arange(100))
p = gridplot([[f1]])

show(p, notebook_handle=True)

def plot_data(container):
    data = np.vstack(container.df['lda_transform'].values)
    g1.data_source.data['x'] = data[:, 0]
    g1.data_source.data['y'] = data[:, 1]
    push_notebook()

cont = DataFrameContainer.from_hdf5('./data.h5', key='data')
cont.connect(plot_data)

Textarea(value='', description='Status', layout=Layout(width='80%'))

In [3]:
cont \
>> splice('_RAW_CURVE', 0, 2990)\
>> normalize('spliced')\
>> rfft('normalize')\
>> absval('fft')\
>> log('absval')\
>> splice('log', 0, 1000)\
>> LDA('spliced', labels_column='FCLUSTER_LABELS', n_components=2)

HTML(value='<b>splice</b>')

Text(value='_RAW_CURVE', description='data_column')

IntText(value=0, description='start')

IntText(value=2990, description='stop')

HTML(value='<b>normalize</b>')

Text(value='spliced', description='data_column')

HTML(value='<b>rfft</b>')

Text(value='normalize', description='data_column')

HTML(value='<b>absval</b>')

Text(value='fft', description='data_column')

HTML(value='<b>log</b>')

Text(value='absval', description='data_column')

HTML(value='<b>splice</b>')

Text(value='log', description='data_column')

IntText(value=0, description='start')

IntText(value=1000, description='stop')

HTML(value='<b>LDA</b>')

Text(value='spliced', description='data_column')

Text(value='FCLUSTER_LABELS', description='labels_column')

IntText(value=2, description='n_components')

<__main__.DataFrameContainer at 0x7f60793da320>

# You must clear the pipeline within the container before creating a new one

In [4]:
cont.pipeline.clear()

In [7]:
cont \
>> splice('_RAW_CURVE', 0, 2990)\
>> normalize('spliced')\
>> rfft('normalize')\
>> absval('fft')\
>> splice('absval', 0, 1000)\
>> LDA('spliced', labels_column='FCLUSTER_LABELS', n_components=2)

HTML(value='<b>splice</b>')

Text(value='_RAW_CURVE', description='data_column')

IntText(value=0, description='start')

IntText(value=2990, description='stop')

HTML(value='<b>normalize</b>')

Text(value='spliced', description='data_column')

HTML(value='<b>rfft</b>')

Text(value='normalize', description='data_column')

HTML(value='<b>absval</b>')

Text(value='fft', description='data_column')

HTML(value='<b>splice</b>')

Text(value='absval', description='data_column')

IntText(value=0, description='start')

IntText(value=1000, description='stop')

HTML(value='<b>LDA</b>')

Text(value='spliced', description='data_column')

Text(value='FCLUSTER_LABELS', description='labels_column')

IntText(value=2, description='n_components')

<__main__.DataFrameContainer at 0x7f51a05312e8>

In [8]:
cont.pipeline

[<fcsugar.core.node.<locals>.wrapper.<locals>._Node at 0x7f5144c8c0b8>,
 <fcsugar.core.node.<locals>.wrapper.<locals>._Node at 0x7f5144ca86a0>,
 <fcsugar.core.node.<locals>.wrapper.<locals>._Node at 0x7f5144c3c5c0>,
 <fcsugar.core.node.<locals>.wrapper.<locals>._Node at 0x7f5144c3cb70>,
 <fcsugar.core.node.<locals>.wrapper.<locals>._Node at 0x7f5144c3f898>,
 <fcsugar.core.node.<locals>.wrapper.<locals>._Node at 0x7f5144c40d30>]