transcriptic/analysis/kinetics.py

try:
    import plotly as py
    import plotly.graph_objs as go
    import pandas as pd
except ImportError:
    raise ImportError("Please run `pip install transcriptic[analysis] if you "
                      "would like to use the Transcriptic analysis module.")


class _Kinetics(object):
    """
    A Kinetics object generalizes the parsing of a time series of datasets
    Parameters
    ----------
    datasets: List[dataset]
        List of Datasets
    """
    def __init__(self, datasets):
        self.datasets = datasets
        self.readings = pd.concat([ds.data for ds in datasets])
        self.readings.index = pd.to_datetime([ds.attributes["warp"]["completed_at"] for ds in datasets])
        self.readings = self.readings.transpose()


class Spectrophotometry(_Kinetics):
    """
    A Spectrophotomery object is used to analyze a kinetic series of PlateRead datasets

    Attributes
    ----------
    properties: DataFrame
        DataFrame of aliquot properties for each well, useful for groupby operations during plots
    readings: DataFrame
        DataFrame of readings for each well at different time points
    operation: str
        Operation used for generating these growth curves (e.g. Absorbance)

    """
    def __init__(self, datasets):
        """
        Parameters
        ----------
        datasets: List[dataset]
            List of Datasets objects. Currently restricted to those generated by 'absorbance', 'fluorescence'
            and 'luminescence' operations
        """
        operation_set = set([ds.operation for ds in datasets])
        if len(operation_set) > 1:
            raise RuntimeError("Input Datasets must all be of the same type.")
        self.operation = operation_set.pop()
        if self.operation not in ["absorbance", "fluorescence", "luminescence"]:
            raise RuntimeError("%s has to be of type absorbance, fluorescence or luminescence" % self.operation)
        super(Spectrophotometry, self).__init__(datasets)
        # Assume that well names are consistent across all runs
        ref_dataset = datasets[0]
        ref_container = ref_dataset.container
        # Check if well_map is defined
        if len(ref_container.well_map) != 0:
            self.properties = pd.DataFrame.from_dict(ref_container.well_map, orient='index')
        else:
            self.properties = pd.DataFrame.from_dict({ref_container.container_type.robotize(x): x
                                                      for x in ref_dataset.data.columns
                                                      if x not in ["GAIN"]},
                                                     orient='index')
        self.properties.columns = ['name']
        self.properties.insert(1, "column", (self.properties.index % ref_container.container_type.col_count))
        self.properties.insert(1, "row", (self.properties.index // ref_container.container_type.col_count))
        self.properties.row = self.properties.row.apply(lambda x: "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[x])
        self.properties.index = [ref_container.container_type.humanize(int(x)) for x in list(self.properties.index)]

    def plot(self, wells="*", groupby=None, title=None, xlabel=None, ylabel=None, max_legend_len=20):
        """
        This generates a plot of the kinetics curve. Note that this function is meant for use under a Jupyter notebook
        environment

        Example Usage:

        .. code-block:: python

            from transcriptic.analysis.kinetics import Spectrophotometry
            growth_curve = Spectrophotometry(myRun.data.Datasets)
            growth_curve.plot(wells=["A1", "A2", "B1", "B2"])
            growth_curve.plot(wells=["A1", "A2", "B1", "B2"], groupby="row", title="Row Groups")
            growth_curve.plot(wells=["A1", "A2", "B1", "B2"], groupby="name", ylabel="Absorbance Units")
            growth_curve.plot(groupby="name", max_legend_len=40)

        Parameters
        ----------
        wells: Optional[list or str]
            If not specified, this plots all the wells associated with the Datasets given. Otherwise, specifiy
            a list of well indices (["A1", "B1"]) or a specific well ("A1")
        groupby: Optional[str]
            When specified, this groups the wells with the same property value together. On the plot, each group will
            be represented by a single curve with the mean values and error bars of 1 std. dev. away from the mean
        title: Optional[str]
            Plot title. Default: "Kinectics Curve (`run-id`)"
        xlabel: Optional[str]
            Plot x-axis label. Default: "Time"
        ylabel: Optional[str]
            Plot y-axis label. Default: "`Operation` (`Wavelength`)"
        max_legend_len
            Maximum number of characters for the legend labels before truncating. Default: 20

        Returns
        -------
        IPlot
            Plotly iplot object. Will be rendered nicely in Jupyter notebook instance
        """
        # TODO: Shift init_notebook_mode() to start of notebook instance
        py.offline.init_notebook_mode()

        if isinstance(wells, str):
            if wells != "*":
                wells = [wells]
            else:
                well_readings = self.readings
                wells = list(self.properties.index)
        if isinstance(wells, list):
            well_readings = self.readings.loc[wells]

        if not groupby:
            traces = [go.Scatter(x=self.readings.columns, y=well_readings.loc[well],
                                 name=self.properties["name"].loc[well]) for well in wells]
        else:
            if groupby not in self.properties.columns:
                raise ValueError("\'%s\' not found in the properties table. Please specify a column which exists" %
                                 groupby)
            grouped = self.properties.groupby(groupby)
            index_list = [grouped.get_group(group).index for group in grouped.groups]
            reading_map = []
            for indx in index_list:
                common_set = set(well_readings.index).intersection(set(indx))
                if len(common_set) != 0:
                    reading_map.append(well_readings.loc[common_set])
            if len(reading_map) != 0:
                traces = [go.Scatter(x=self.readings.columns,
                                     y=reading.mean(),
                                     name=self._truncate_name(self.properties[groupby].loc[reading.iloc[0].name],
                                                              max_legend_len),
                                     error_y=dict(type='data', array=reading.std(), visible=True)
                                     )
                          for reading in reading_map]
            else:
                raise ValueError("No common groups found for specified groupby: %s" % groupby)

        # Assume all data is generated from the same run-id for now
        if not title:
            title = "Kinetics Curve (%s)" % self.datasets[0].attributes["instruction"]["run"]["id"]
        if not xlabel:
            xlabel = 'Time'
        if not ylabel:
            if self.operation == "absorbance":
                ylabel = "RAU (%s)" % self.datasets[0].attributes["instruction"]["operation"]["wavelength"]
            elif self.operation == "fluorescence":
                ylabel = "RFU (%s/%s)" % (self.datasets[0].attributes["instruction"]["operation"]["excitation"],
                                          self.datasets[0].attributes["instruction"]["operation"]["emission"])
            elif self.operation == "luminescence":
                ylabel = "Luminescence"

        layout = go.Layout(
            title=title,
            xaxis=dict(
                title=xlabel,
                titlefont=dict(
                    family='Courier New, monospace',
                    size=18,
                    color='#7f7f7f'
                )
            ),
            yaxis=dict(
                title=ylabel,
                titlefont=dict(
                    family='Courier New, monospace',
                    size=18,
                    color='#7f7f7f'
                )
            ),
            legend=dict(
                x=100,
                y=1
            )
        )

        fig = go.Figure(data=traces, layout=layout)
        return py.offline.iplot(fig)

    @staticmethod
    def _truncate_name(string, max_len=20):
        """Truncates string to max_len number of characters, adds ellipses instead if its too long"""
        if len(string) > max_len:
            return string[:(max_len - 3)] + "..."
        else:
            return string