In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from math import *
from itertools import combinations

import warnings
warnings.simplefilter("ignore")

### Delta Ct

In [3]:
df = pd.read_excel("Final PCR students f=2000 +gene names 2022-06-06.xlsx"
                  , nrows = 45).fillna("")
df = df.drop("Unnamed: 3", axis = 1)

In [4]:
Genes = np.array(["CDC42BPA", "LBR", "ROCK1", "ACTB", "GAPDH"])
Ct_all = ([df.iloc[i:i+6]["Ct"].values.astype(float) 
    for i in range(0, 46, 9) if i < 38])

In [5]:
delta_Ct = pd.DataFrame(np.zeros((len(Genes), len(Genes)),),
             columns = Genes, index = Genes)

for gene1, gene2 in combinations(Genes, 2):
    ind1, ind2 = np.where(Genes == gene1)[0][0], np.where(Genes == gene2)[0][0]
    delta = np.std(Ct_all[ind1] - Ct_all[ind2])
    
    delta_Ct.loc[gene1, gene2] = delta
    delta_Ct.loc[gene2, gene1] = delta
delta_Ct

Unnamed: 0,CDC42BPA,LBR,ROCK1,ACTB,GAPDH
CDC42BPA,0.0,0.292499,0.492161,0.362476,1.449138
LBR,0.292499,0.0,0.576387,0.397562,1.339983
ROCK1,0.492161,0.576387,0.0,0.537742,1.082692
ACTB,0.362476,0.397562,0.537742,0.0,1.449617
GAPDH,1.449138,1.339983,1.082692,1.449617,0.0


### GeoNorm

In [6]:
data = pd.read_excel("Eff ref.genes for students 20220606.xlsx").fillna(" ")
data = data.drop("Unnamed: 3", axis = 1)

data["Ct"] = data["Ct"].replace(' ', inf).astype(float)
data = data[data["Df"]!="Neg.Ctrl"]

In [7]:
def efficiency(Ct, Df):
    log_Df =  np.log10(1 / Df)
    
    lr = LinearRegression().fit(log_Df.reshape(-1, 1), Ct)
    Ct_pred = lr.predict(log_Df.reshape(-1, 1))
    a = lr.coef_[0]

    p = np.sqrt(np.sum(np.power(Ct - Ct_pred, 2)) / (len(Ct) - 1))
    E = pow(10, -1/a)
    SE = pow(a, -2) * np.log(10) * p * E
    
    return round(E, 6), round(SE, 6)

In [8]:
E_ = {}.fromkeys(data["Gene"].unique(), 0)
for gene in E_:
    table = data[(data['Gene'] == gene)]
    
    Ct = np.array(table["Ct"].values)
    Df = np.array(list(map(lambda i: float(i.strip("dil_")), table["Df"].values)))
    
    E_[gene] = efficiency(Ct, Df)[0]

In [9]:
E_

{'LBR (ref)': 2.091646,
 'ROCK1 (ref)': 2.283884,
 'CDC42BPA (ref)': 2.134281,
 'GAPDH': 2.011684,
 'ACTB': 2.023635}

In [10]:
E = np.array([[E_[i]]*6 for i in E_]).reshape(1, -1)[0]
Ct = np.array(Ct_all).reshape(1, -1)[0]
E_Ct = dict(zip(Genes, np.power(E, Ct).reshape(5, 6)))

In [11]:
GeNorm = pd.DataFrame(np.zeros((len(Genes), len(Genes))),
             columns = Genes, index = Genes)

for gene1, gene2 in combinations(Genes, 2):
    logE = np.log2(E_Ct[gene1] / E_Ct[gene2])
    GeNorm.loc[gene1, gene2] = np.std(logE)
    GeNorm.loc[gene2, gene1] = np.std(logE)
GeNorm

Unnamed: 0,CDC42BPA,LBR,ROCK1,ACTB,GAPDH
CDC42BPA,0.0,0.313485,0.530388,0.376806,1.493148
LBR,0.313485,0.0,0.653869,0.422039,1.414203
ROCK1,0.530388,0.653869,0.0,0.564056,1.109412
ACTB,0.376806,0.422039,0.564056,0.0,1.470719
GAPDH,1.493148,1.414203,1.109412,1.470719,0.0
