-
Notifications
You must be signed in to change notification settings - Fork 10
/
kinetics.py
193 lines (175 loc) · 8.44 KB
/
kinetics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
try:
import plotly as py
import plotly.graph_objs as go
import pandas as pd
except ImportError:
raise ImportError("Please run `pip install transcriptic[analysis] if you "
"would like to use the Transcriptic analysis module.")
class _Kinetics(object):
"""
A Kinetics object generalizes the parsing of a time series of datasets
Parameters
----------
datasets: List[dataset]
List of Datasets
"""
def __init__(self, datasets):
self.datasets = datasets
self.readings = pd.concat([ds.data for ds in datasets])
self.readings.index = pd.to_datetime([ds.attributes["warp"]["completed_at"] for ds in datasets])
self.readings = self.readings.transpose()
class Spectrophotometry(_Kinetics):
"""
A Spectrophotomery object is used to analyze a kinetic series of PlateRead datasets
Attributes
----------
properties: DataFrame
DataFrame of aliquot properties for each well, useful for groupby operations during plots
readings: DataFrame
DataFrame of readings for each well at different time points
operation: str
Operation used for generating these growth curves (e.g. Absorbance)
"""
def __init__(self, datasets):
"""
Parameters
----------
datasets: List[dataset]
List of Datasets objects. Currently restricted to those generated by 'absorbance', 'fluorescence'
and 'luminescence' operations
"""
operation_set = set([ds.operation for ds in datasets])
if len(operation_set) > 1:
raise RuntimeError("Input Datasets must all be of the same type.")
self.operation = operation_set.pop()
if self.operation not in ["absorbance", "fluorescence", "luminescence"]:
raise RuntimeError("%s has to be of type absorbance, fluorescence or luminescence" % self.operation)
super(Spectrophotometry, self).__init__(datasets)
# Assume that well names are consistent across all runs
ref_dataset = datasets[0]
ref_container = ref_dataset.container
# Check if well_map is defined
if len(ref_container.well_map) != 0:
self.properties = pd.DataFrame.from_dict(ref_container.well_map, orient='index')
else:
self.properties = pd.DataFrame.from_dict({ref_container.container_type.robotize(x): x
for x in ref_dataset.data.columns
if x not in ["GAIN"]},
orient='index')
self.properties.columns = ['name']
self.properties.insert(1, "column", (self.properties.index % ref_container.container_type.col_count))
self.properties.insert(1, "row", (self.properties.index // ref_container.container_type.col_count))
self.properties.row = self.properties.row.apply(lambda x: "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[x])
self.properties.index = [ref_container.container_type.humanize(int(x)) for x in list(self.properties.index)]
def plot(self, wells="*", groupby=None, title=None, xlabel=None, ylabel=None, max_legend_len=20):
"""
This generates a plot of the kinetics curve. Note that this function is meant for use under a Jupyter notebook
environment
Example Usage:
.. code-block:: python
from transcriptic.analysis.kinetics import Spectrophotometry
growth_curve = Spectrophotometry(myRun.data.Datasets)
growth_curve.plot(wells=["A1", "A2", "B1", "B2"])
growth_curve.plot(wells=["A1", "A2", "B1", "B2"], groupby="row", title="Row Groups")
growth_curve.plot(wells=["A1", "A2", "B1", "B2"], groupby="name", ylabel="Absorbance Units")
growth_curve.plot(groupby="name", max_legend_len=40)
Parameters
----------
wells: Optional[list or str]
If not specified, this plots all the wells associated with the Datasets given. Otherwise, specifiy
a list of well indices (["A1", "B1"]) or a specific well ("A1")
groupby: Optional[str]
When specified, this groups the wells with the same property value together. On the plot, each group will
be represented by a single curve with the mean values and error bars of 1 std. dev. away from the mean
title: Optional[str]
Plot title. Default: "Kinectics Curve (`run-id`)"
xlabel: Optional[str]
Plot x-axis label. Default: "Time"
ylabel: Optional[str]
Plot y-axis label. Default: "`Operation` (`Wavelength`)"
max_legend_len
Maximum number of characters for the legend labels before truncating. Default: 20
Returns
-------
IPlot
Plotly iplot object. Will be rendered nicely in Jupyter notebook instance
"""
# TODO: Shift init_notebook_mode() to start of notebook instance
py.offline.init_notebook_mode()
if isinstance(wells, str):
if wells != "*":
wells = [wells]
else:
well_readings = self.readings
wells = list(self.properties.index)
if isinstance(wells, list):
well_readings = self.readings.loc[wells]
if not groupby:
traces = [go.Scatter(x=self.readings.columns, y=well_readings.loc[well],
name=self.properties["name"].loc[well]) for well in wells]
else:
if groupby not in self.properties.columns:
raise ValueError("\'%s\' not found in the properties table. Please specify a column which exists" %
groupby)
grouped = self.properties.groupby(groupby)
index_list = [grouped.get_group(group).index for group in grouped.groups]
reading_map = []
for indx in index_list:
common_set = set(well_readings.index).intersection(set(indx))
if len(common_set) != 0:
reading_map.append(well_readings.loc[common_set])
if len(reading_map) != 0:
traces = [go.Scatter(x=self.readings.columns,
y=reading.mean(),
name=self._truncate_name(self.properties[groupby].loc[reading.iloc[0].name],
max_legend_len),
error_y=dict(type='data', array=reading.std(), visible=True)
)
for reading in reading_map]
else:
raise ValueError("No common groups found for specified groupby: %s" % groupby)
# Assume all data is generated from the same run-id for now
if not title:
title = "Kinetics Curve (%s)" % self.datasets[0].attributes["instruction"]["run"]["id"]
if not xlabel:
xlabel = 'Time'
if not ylabel:
if self.operation == "absorbance":
ylabel = "RAU (%s)" % self.datasets[0].attributes["instruction"]["operation"]["wavelength"]
elif self.operation == "fluorescence":
ylabel = "RFU (%s/%s)" % (self.datasets[0].attributes["instruction"]["operation"]["excitation"],
self.datasets[0].attributes["instruction"]["operation"]["emission"])
elif self.operation == "luminescence":
ylabel = "Luminescence"
layout = go.Layout(
title=title,
xaxis=dict(
title=xlabel,
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
),
yaxis=dict(
title=ylabel,
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
),
legend=dict(
x=100,
y=1
)
)
fig = go.Figure(data=traces, layout=layout)
return py.offline.iplot(fig)
@staticmethod
def _truncate_name(string, max_len=20):
"""Truncates string to max_len number of characters, adds ellipses instead if its too long"""
if len(string) > max_len:
return string[:(max_len - 3)] + "..."
else:
return string