/
spectrophometry.py
171 lines (155 loc) · 8.07 KB
/
spectrophometry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os, requests, json, re
import plotly.plotly as py
import pandas
import matplotlib.pyplot as plt
import plotly.tools as tls
import numpy as np
from plotly.graph_objs import *
from IPython.display import HTML, Javascript, display
from transcriptic.util import natural_sort
# TODO: Update group_well params to take in list of wells ["A1","A2",...]
# TODO: Plot Beer's Law chart, get expected volumes?
class Absorbance(object):
"""
Takes in dataset object and parses into absorbance object for easy
statistical analysis and visualization.
"""
def __init__(self, dataset, groups, group_well_list=None, control_abs=None, name=None):
"""
Parameters
----------
dataset: dataset
Single dataset selected from datasets object.
group_labels: list[str]
Labels for each of the respective groups.
group_wells: list[int]
List of list of wells (robot form) belonging to each group in order. E.g. [[1,3,5],[2,4,6]]
control_abs: Absorbance object
Absorbance of object of water/control blank. If specified, will create adjusted dataframe (df)adj)
by subtracting from existing df.
"""
self.name = name
self.dataset = dataset
self.control_abs = control_abs
if "data_keys" not in self.dataset.props or len(self.dataset.props["data_keys"])==0:
raise RuntimeError("No data found in given dataset.")
data_dict = get_dataset(self.dataset.props["id"])
if self.dataset.props["instruction"]["operation"]["op"] != "absorbance":
raise RuntimeError("Data given is not from absorbance operation.")
# Populate measurement params
measure_params_dict = {}
measure_params_dict["wavelength"] = self.dataset.props["instruction"]["operation"]["wavelength"]
measure_params_dict["reader"] = self.dataset.props["warp"]["device_id"]
self.params = make_dottable_dict(measure_params_dict)
# Populate plate field
plate_info_dict = {}
plate_info_dict["id"] = self.dataset.props["container_type"]["id"]
plate_info_dict["col_count"] = self.dataset.props["container_type"]["col_count"]
plate_info_dict["well_count"] = self.dataset.props["container_type"]["well_count"]
self.params.plate = make_dottable_dict(plate_info_dict)
# Parse into Dataframe
# if not group_len_list and (sum(group_len_list) > len(data_dict)):
# raise ValueError("Sum of group lengths exceeds total no. of wells.")
sorted_keys = natural_sort(data_dict.keys())
df_dict = {}
well_count = self.dataset.props["container_type"]["well_count"]
col_count = self.dataset.props["container_type"]["col_count"]
#If no group well list specified, default to including all well data values in one group
if not group_well_list:
df_dict[groups[0]] = [x[0] for x in data_dict.values()]
#if given list of all int, assume one group with all wells in list
elif all(isinstance(i, int) for i in group_well_list):
if len(group_well_list) > len(data_dict):
raise ValueError("Sum of group lengths exceeds total no. of wells.")
try:
df_dict[groups[0]] = [data_dict[humanize(well,well_count,col_count).lower()][0] for well in group_well_list]
except:
raise ValueError("Well %s is not in the absorbance dataset" % well)
elif all(isinstance(i, list) for i in group_well_list):
if group_well_list and sum([len(i) for i in group_well_list]) > len(data_dict):
raise ValueError("Sum of group lengths exceeds total no. of wells.")
for (idx, well_list) in enumerate(group_well_list):
try:
df_dict[groups[idx]] = [data_dict[humanize(well,well_count,col_count).lower()][0] for well in well_list]
except:
raise ValueError("Well %s is not in the absorbance dataset" % well)
else:
raise ValueError("Format Error: Group Well List should be a list of list of wells in robot format")
# To ensure pandas dataframe compatiblity: Check that group len elements are of the same length, pad with NaN otherwise
if group_well_list and all(isinstance(i, list) for i in group_well_list):
group_len_list = [len(x) for x in group_well_list]
if group_len_list.count(group_len_list[0]) != len(group_len_list):
max_len = max(group_len_list)
for (idx, group_len) in enumerate(group_len_list):
while len(df_dict[groups[idx]]) < max_len:
df_dict[groups[idx]].append(float("NaN"))
self.df = pandas.DataFrame(df_dict, columns=groups)
#If control absorbance object specified, create df_abj variable by subtracting control df from original
if control_abs:
self.df_adj = self.df - control_abs.df
# try:
# self.df_adj = self.df - control_abs.df
# #Best thing to do here?
# except:
# raise ValueError("Control Absorbance object dataframe not compatible with %s dataframe" % self.name)
self.cv = self.df.std()/self.df.mean()*100
def plot(self, mpl=False):
# Generates matplotlib obj
mpl_fig, ax = plt.subplots()
ax.set_ylabel("Absorbance " + self.params.wavelength)
ax.set_xlabel("Groups")
self.df.boxplot(ax=ax)
#labels = [item.get_text() for item in ax.get_xticklabels()]
if mpl:
#return mpl_fig
return None
else:
return plot(tls.mpl_to_plotly(mpl_fig))
def beers_law(self, conc_list=None, use_adj=True, **kwargs):
if "title" not in kwargs:
if self.name:
kwargs["title"] = "Beer's Law Check (%s)" % self.name
else:
kwargs["title"] = "Beer's Law Check"
if "yerr" not in kwargs:
kwargs["yerr"] = self.df.std()
#handle this with new dataf^?
#Use df_adj for beer's law if control abs object was given
if use_adj and self.control_abs:
dataf = self.df_adj
else:
dataf = self.df
# Use default labels if concentration not provided
if not conc_list:
if "xlim" not in kwargs:
kwargs["xlim"] = (-1, len(dataf.mean()))
dataf.mean().plot(**kwargs)
else:
plot_obj = pandas.DataFrame({"values":dataf.mean(), "conc":np.asarray(conc_list)})
result = np.polyfit(plot_obj["conc"], plot_obj["values"], 1, full=True)
gradient, intercept = result[0]
mpl_fig, ax = plt.subplots()
ax.set_ylabel("Absorbance " + self.params.wavelength)
plot_obj.plot(x="conc", y="values", kind="scatter", ax=ax, **kwargs)
plt.plot(plot_obj["conc"], gradient*plot_obj["conc"] + intercept, '-')
# Calculate R^2 from residuals
ss_res = result[1]
ss_tot = np.sum(np.square((plot_obj["values"] - plot_obj["values"].mean())))
print ("%s R^2: %s" % (self.name, (1-ss_res/ss_tot)))
def compare_standards(abs_obj, std_abs_obj):
# Compare against mean of standard absorbance
# Check to ensure CVs are at least 2 apart
for indx in range(len(abs_obj.cv)):
cv_ratio = abs_obj.cv.iloc[indx]/std_abs_obj.cv.iloc[indx]
if cv_ratio < 2:
print "Warning for %s: Sample CV is only %s times that of Standard CV. RMSE may be inaccurate." % (abs_obj.cv.index[indx], cv_ratio)
# RMSE (normalized wrt to standard mean)
RMSE = np.sqrt(np.square(abs_obj.df - std_abs_obj.df.mean())).mean() / std_abs_obj.df.mean()*100
RMSE = pandas.DataFrame(RMSE, columns=["RMSE % (normalized to standard mean)"])
sampleVariance = pandas.DataFrame(abs_obj.df.var(), columns=["Sample Variance"])
sampleCV = pandas.DataFrame(abs_obj.cv, columns=["Sample (%) CV"])
if abs_obj.name:
display(HTML("<b>Standards Comparison (%s)</b>" % abs_obj.name))
print sampleVariance
print sampleCV
print RMSE