In [1]:
import numpy as np
import csv
import matplotlib.pyplot as plt
import scipy
import scipy.stats


In [2]:
np.set_printoptions(precision = 4, suppress = True)

size = 16*16*24

y = scipy.stats.norm.rvs(size=size)
#print(np.average(y))
#print(np.std(y))

y = np.reshape(y, (16, 16, 24), order = 'C')
#print(y)

overall_avg = np.average(y)
print(overall_avg)


[[[ 0.4624 -0.992  -0.3854 ..., -1.0856 -1.7451 -0.0396]
  [ 1.94   -0.5713 -0.3283 ...,  1.1243 -1.1493  3.0191]
  [-0.9969  0.6298  2.2533 ..., -1.1779  0.6164  0.276 ]
  ..., 
  [ 0.5461 -0.9488  1.6696 ...,  1.7534  1.9126 -1.6602]
  [ 2.5335 -1.4722  0.7522 ...,  0.2472  0.4363 -2.2248]
  [-0.8778  0.1006  0.1273 ..., -1.6756  0.596   0.7697]]

 [[ 0.5667  0.546  -1.0467 ..., -0.3076  0.0278  0.8137]
  [-0.1553  0.8456 -0.829  ...,  0.2247 -0.2984 -0.4394]
  [-0.0133  0.4357 -0.6911 ..., -0.2464  1.3893  0.7322]
  ..., 
  [-2.074  -1.4678  0.8156 ..., -1.0265 -1.5083  0.6313]
  [-0.5495  1.1004 -0.888  ...,  0.2971 -1.2351  1.1613]
  [-1.0286 -2.0976 -0.1346 ...,  0.6343 -0.3888 -0.2616]]

 [[ 0.5107 -0.5272 -0.0564 ...,  1.4395  0.3511 -0.0664]
  [ 0.5185  0.9693 -0.1875 ...,  0.3576  0.0724  1.0563]
  [ 0.0555  1.9954 -0.2083 ..., -1.5132 -0.8782 -0.5083]
  ..., 
  [-0.5546 -1.1042  1.3922 ...,  1.1063 -0.8704 -1.1178]
  [ 0.6721 -0.9004  0.0902 ...,  0.5622 -0.1806  1.7188]
  [

In [5]:
np.set_printoptions(precision = 4, suppress = True)

variances = []
averages = []

for i in range(0, 8):
    for j in range (0, 16):
        for k in range (0, 24):
            first_repl = y[i][j][k]
            second_repl = y[i + 8][j][k]
            sample_avg = (first_repl + second_repl) / 2
            sample_variance = ((first_repl - sample_avg) ** 2 + (second_repl - sample_avg) ** 2) / 1
            variances.append(sample_variance)
            averages.append(sample_avg)

#first_var = y[0][0][0]
#second_var = y[8][0][0]
#avg = (first_var + second_var) / 2
#print(((first_var - avg) ** 2 + (second_var - avg) ** 2) / 1)
#print(variances)

#according to paper wright and simon 2003, the sample variances multipled by two constants
#a and b follow an F distribution with parameters (n-k) and 2a, where n is number of
#replicates, k is number of group (experimental, control, etc)
#in my data, I have duplicates and 1 group, so n = 2, k = 1.
param = scipy.stats.f.fit(variances)
print(param)

#after fitting we want to find the value of a and b, since these are the parameters for
#the putative inverse gamma distribution that is the true distribution of the variances
#of the small molecule screen.  finding a and b will help us specify the inverse
#gamma distribution, which will improve the power of our t tests (wright and simon 2003)
#find parameter a: since the fitted distribution has parameters (n-k) and 2a, we can
#take the second parameter and divide by 2 to get a
invgammaparam_a = param[1] / 2
#we fit an F distribution to our variances, and we see that the scaling s is stored in the
#fourth parameter.  a*b*variances fits to an F distribution with area under the curve = 1
#since F is a probability distribution (scaling = 1)
#thus when we simply fit our variances to an F distribution,
#we may get a scaling s =/= 1 (area under the curve not equal 1)
#since multiplying variates by a constant changes the scaling of the fitted F distribution
#we can figure out what a*b is by knowing that multiplying the variances
#by a*b brings the scaling up to 1; hence a*b equals the multiplicative inverse
#of the current scaling.  from here we can find b because we already have a
invgammaparam_b = (1 / param[3]) / invgammaparam_a
print(invgammaparam_a)
print(invgammaparam_b)

(1.0716492764171148, 23.416651399112787, 6.6877200632563207e-08, 0.89469509586554985)
11.7083256996
0.0954619163


In [6]:
np.set_printoptions(precision = 4, suppress = True)

#array = [1, 2, -1]
#array2 = array + 1
#print(array2)

#print(variances)
#convert lists to numpy objects ndarrays to be able to easily perform math operations
variances = np.asarray(variances)
#print(variances.shape[0])
#print(variances)
averages = np.asarray(averages)

#print(variances * 2)

#variances that have been fitted to the inverse gamma distribution
rvm_variances = ((2 - 1) * variances + 2 * invgammaparam_a * (1 / (invgammaparam_a * invgammaparam_b)))/((2 - 1) + 2 * invgammaparam_a)          
#print(rvm_variances)
#print(variances)

denominator = np.sqrt(rvm_variances * 1 / 2)
#print(denominator)

#print(averages)
#calculate t statistic for each compound, using the rvm_variances
t_stats = averages / denominator
#print(t_stats)


len_t_stats = t_stats.shape[0]

p_val = []
df = 2 * invgammaparam_a
#t_stats[0] = 1

#p values for 2 tailed t tests
for i in range(0, len_t_stats):
    if t_stats[i] <= 0:
        prob = scipy.stats.t.cdf(t_stats[i], df)
        prob *= 2
        p_val.append(prob)
    else:
        prob = scipy.stats.t.sf(t_stats[i], df)
        prob *= 2
        p_val.append(prob)
#print(p_val)

p_val = np.asarray(p_val)
p_val = p_val[:, np.newaxis]
#print(t_stats)
#calculate p values from the t statistics
#for i in range()

coordinates = []

#list out the plate, row, and col coordinates to be concatenated with the p values
#this helps keep track of where each p value came from in the physical location
#on the plates after the p values are sorted in order to do FDR controlling
#such as benjamini hochberg
for i in range(0, 8):
    for j in range(0, 16):
        for k in range(0, 24):
            coordinates.append(i)
            coordinates.append(j)
            coordinates.append(k)

#print(coordinates)
coordinates = np.asarray(coordinates)
len_coord = len(coordinates)
#print(len_coord)
coordinates = np.reshape(coordinates, (len_coord / 3, 3), order = 'C')
#print(coordinates)

p_val_coord = np.concatenate((p_val, coordinates), axis = 1)
#print(p_val_coord)
#print(p_val_coord[1,3])
p_val_coord = np.ndarray.tolist(p_val_coord)
#print(p_val_coord)

#f8 represents 8 bit floating numbers
#f0 represents the first field along axis 0 (i.e. the first field along a row?)
p_val_coord.sort()
print(p_val_coord)


[[0.0002699727017543349, 5.0, 9.0, 12.0], [0.0010860292054272875, 0.0, 5.0, 9.0], [0.0021363668579751, 6.0, 11.0, 2.0], [0.0023543668955001763, 2.0, 4.0, 10.0], [0.002392311777231243, 7.0, 0.0, 2.0], [0.0025758113471494016, 5.0, 9.0, 15.0], [0.0030396801203844822, 5.0, 3.0, 7.0], [0.0031226406995283483, 0.0, 7.0, 6.0], [0.0033723946761858676, 4.0, 0.0, 4.0], [0.0040522214383743186, 3.0, 12.0, 3.0], [0.004055844466843739, 6.0, 11.0, 16.0], [0.004867002295247133, 5.0, 13.0, 21.0], [0.00493439334114124, 3.0, 1.0, 9.0], [0.005352507158132355, 5.0, 15.0, 22.0], [0.005469739454226098, 7.0, 13.0, 0.0], [0.005529921386727353, 4.0, 13.0, 0.0], [0.005578778512417799, 1.0, 1.0, 7.0], [0.005729648375775712, 5.0, 3.0, 20.0], [0.007004554803578237, 0.0, 8.0, 14.0], [0.007266755857940119, 4.0, 14.0, 12.0], [0.007603970270386279, 1.0, 8.0, 6.0], [0.007674805688852123, 1.0, 1.0, 15.0], [0.00782056154428481, 3.0, 0.0, 18.0], [0.00784293820849737, 1.0, 9.0, 10.0], [0.007919546951357307, 1.0, 13.0, 20.0],