In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
from patsy import dmatrices
import statsmodels.api as sm

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [2]:
from bean.mapping._supporting_fn import _get_allele_from_alignment
from bean import Allele,Edit

In [3]:
edit_dict = {"A":"G", "T":"C"}
revcomp_dict = {"A":"T", "T":"A", "C":"G", "G":"C"}

## Load variant info

In [4]:
var_id = "rs35081008_Min_ABE_465"

In [5]:
var_info_tbl = pd.read_excel("../../resources/LDLvar/20221013_LDLvar_simpleZscores_credset.xlsx", index_col=0, header=1)

In [6]:
amplicon_info = pd.read_csv("../../resources/atac_seq/030123_ATACseq_info.csv", ).set_index("gRNA")

In [7]:
var_alleles = var_info_tbl.loc[var_id, ["A1", "A2"]].reset_index()
var_alleles['edited'] = var_alleles[var_id].map(lambda b: edit_dict[b] if b in edit_dict.keys() else np.nan)
var_alleles = var_alleles.loc[~var_alleles.edited.isnull()]
var_alleles.columns=["allele", "ref", "edited"]
var_chrom = int(var_info_tbl.loc[var_id, "CHR"])
var_pos = int(var_info_tbl.loc[var_id, 'position_hg19'])

In [8]:
amplicon_start = amplicon_info.loc[var_id, "Varpos_fw"]-4
amplicon_seq = amplicon_info.loc[var_id, "Amplicon_fw (reference)"].replace("N", "")
amplicon_len = len(amplicon_seq)
amplicon_end = amplicon_start + amplicon_len

In [9]:
phased_snps = pd.read_table(f"../../results/atac_seq/{var_id}_phasedSNPs.vcf", comment="#", header=None)
phased_snps.columns=["CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", "FORMAT", "HepG2"]
phased_snps["GT"] = phased_snps.HepG2.map(lambda s: s.split(":")[0])
phased_snps = phased_snps.loc[phased_snps.GT.isin(["1|0", "0|1"]) & (phased_snps.FILTER == "PASS")]

In [10]:
def get_allele(row):
    ids = row.GT.split("|")
    alleles = [row[{"0":"REF", "1":"ALT"}[aid]] for aid in ids]
    return alleles

def _get_allele_from_alignment(
    ref_aligned: str,
    query_aligned: str,
    offset: int,
    strand: int,
    start_pos: int,
    end_pos: int,
    positionwise_quality: np.ndarray = None,
    quality_thres: float = -1,
):
    # Include N, no quality filter
    assert len(ref_aligned) == len(query_aligned)
    allele = Allele()
    ref_gaps = 0
    alt_gaps = 0
    alt_seq_len = len(query_aligned) - query_aligned.count("-")
    if positionwise_quality is None:
        # alt_position_is_good_quality = np.ones(alt_seq_len, dtype=bool)
        alt_position_is_good_quality = np.array(
            [c != "N" for c in query_aligned.replace("-", "")]
        )
    else:
        assert len(positionwise_quality) == alt_seq_len
        alt_position_is_good_quality = positionwise_quality > quality_thres
    for i in range(len(ref_aligned)):
        if ref_aligned[i] == query_aligned[i]:
            continue
        ref_base = ref_aligned[i]
        alt_base = query_aligned[i]
        if alt_base != "-":
            alt_base_is_good_quality = alt_position_is_good_quality[i - alt_gaps]
        else:
            alt_base_is_good_quality = True
        if ref_base == "-":
            ref_gaps += 1
        elif alt_base == "-":
            alt_gaps += 1
        ref_pos = i - ref_gaps
        allele.add(
            Edit(
                rel_pos=ref_pos,
                ref_base=ref_base,
                alt_base=alt_base,
                offset=offset,
                strand=strand,
            )
        )
    return allele

In [11]:
if len(phased_snps) == 1:
    raise ValueError("Cannot phase variant of interest.")

phased_snps['alleles'] = phased_snps.apply(get_allele, axis=1)
phased_snps['amplicon_pos'] = phased_snps.POS - var_pos + amplicon_start
phased_snps["amplicon_base"] = phased_snps.amplicon_pos.map(
    lambda i: amplicon_seq[i]
)

In [12]:
phased_snps_nonvar = phased_snps.loc[phased_snps.amplicon_pos != amplicon_start]

In [13]:
editable_phase = np.where(np.array([p in edit_dict.keys() for p in phased_snps.loc[phased_snps.amplicon_pos == amplicon_start, "alleles"].item()]))[0]

In [14]:
phased_snps

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,HepG2,GT,alleles,amplicon_pos,amplicon_base
0,chr19,58662212,rs78560062,C,T,736.77,PASS,AN=2;DB;MUMAP_REF=59.6667;MUMAP_ALT=60;AO=18;R...,GT:BX:PS:PQ:JQ,1|0:ACAAGGGGTGTAAGTA-1_69;ACGATCATCGGGTCCA-1_7...,1|0,"[T, C]",105,C
1,chr19,58662235,rs35081008,C,T,920.77,PASS,AN=2;DB;MUMAP_REF=60;MUMAP_ALT=59.619;AO=21;RO...,GT:BX:PS:PQ:JQ,0|1:TATGCCCTCTGTTGTT-1_69;ATTGTTCTCGTAGGAG-1_6...,0|1,"[C, T]",128,C


In [15]:
phases = pd.DataFrame(
    phased_snps_nonvar.alleles.tolist(), 
    columns=[f'phase{i}' for i in range(len(phased_snps_nonvar.alleles.tolist()[0]))]
).sum(axis=0)
'''
phase0    TT
phase1    CC
'''
#pd.DataFrame(df2["teams"].to_list(), columns=['team1', 'team2'])

'\nphase0    TT\nphase1    CC\n'

In [16]:
phases

phase0    T
phase1    C
dtype: object

In [17]:
phased_snps

Unnamed: 0,CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,HepG2,GT,alleles,amplicon_pos,amplicon_base
0,chr19,58662212,rs78560062,C,T,736.77,PASS,AN=2;DB;MUMAP_REF=59.6667;MUMAP_ALT=60;AO=18;R...,GT:BX:PS:PQ:JQ,1|0:ACAAGGGGTGTAAGTA-1_69;ACGATCATCGGGTCCA-1_7...,1|0,"[T, C]",105,C
1,chr19,58662235,rs35081008,C,T,920.77,PASS,AN=2;DB;MUMAP_REF=60;MUMAP_ALT=59.619;AO=21;RO...,GT:BX:PS:PQ:JQ,0|1:TATGCCCTCTGTTGTT-1_69;ATTGTTCTCGTAGGAG-1_6...,0|1,"[C, T]",128,C


## Phase reads

In [18]:
def get_edit(row, target_pos):
    return _get_allele_from_alignment(row.Reference_Sequence, row.Aligned_Sequence, offset=0, strand=1, start_pos=target_pos-20, end_pos=target_pos+20)

def get_all_edits(row):
    return _get_allele_from_alignment(row.Reference_Sequence, row.Aligned_Sequence, offset=0, strand=1, start_pos=phased_snps.amplicon_pos.min()-1, end_pos = phased_snps.amplicon_pos.max()+1)
    

def filter_allele_by_pos_qual(allele, target_pos):
    # filters allele by variant included in list_pos or near target_pos
    edits = []
    n_out_edits = 0
    n_lq = 0
    n_crit_lq = 0
    for edit in allele.split(","):
        if edit == "": continue
        alt = edit.split(">")[-1]
        pos = int(edit.split(":")[0])
        if alt == "N": 
            n_lq += 1
            if abs(pos-target_pos) < 5:
                n_crit_lq += 1
        elif abs(pos-target_pos) < 5:
            edits.append(edit)
        else:
            n_out_edits += 1
    return n_out_edits, n_lq, n_crit_lq, ",".join(sorted(edits))

def filter_allele_by_pos(list_pos, allele, target_pos):
    edits = []
    for edit in allele.split(","):
        if edit == "": continue
        pos = int(edit.split(":")[0])
        if pos in list_pos or abs(pos-target_pos) < 5:
            edits.append(edit)
    return ",".join(sorted(edits))

def filter_out_indels(allele):
    edits = []
    for edit in allele.edits:
        if edit.ref_base != "-" and edit.alt_base != "-":
            edits.append(str(edit))
    return ",".join(sorted(edits))



def get_phase(allele, phased_snps, var_pos):
    phase = []
    allele_edits = {}
    if allele != "":
        edits = allele.split(",")
        for edit in edits:
            try:
                allele_edits[int(edit.split(":")[0])] = edit.split(":")[-1]
            except ValueError:
                print(edit)
    for pos in phased_snps.amplicon_pos.tolist():
        if pos == var_pos: continue
        if pos in allele_edits.keys():
            phase.append(allele_edits[pos].split(">")[-1])
        else:
            phase.append(phased_snps[['amplicon_pos', 'amplicon_base']].set_index("amplicon_pos").loc[pos, "amplicon_base"])
    return phase


def mask_seq(seq, mask_pos):
    seq_array = np.array(list(seq))
    seq_array[mask_pos] = "N"
    return "".join(seq_array.tolist())
    
def assign_phase(phases, allele_phase):
    match= np.where(phases == allele_phase[0])[0]
    if len(match) != 1:
        return -1
    return match[0]
def filter_out_indels(allele):
    edits = []
    for edit in allele.edits:
        if edit.ref_base != "-" and edit.alt_base != "-":
            edits.append(str(edit))
    return ",".join(sorted(edits))

def get_pos_edit_rate(aftbl):
    total_counts = aftbl['#Reads'].sum()
    aftbl = aftbl.loc[aftbl.be_allele != "",:].copy()
    aftbl['edits'] = aftbl.be_allele.map(lambda a: a.split(","))
    edit_tbl = aftbl.explode("edits")
    edit_tbl['pos'] = edit_tbl.edits.map(lambda e: int(e.split(":")[0]))
    per_pos_edits = edit_tbl.groupby("pos")["#Reads"].sum()
    per_pos_edit_rates = per_pos_edits / aftbl['#Reads'].sum()
    return np.where(per_pos_edit_rates > 0.5)[0]

def format_allele_freq_table(path, varpos_amplicon):
    aftbl = pd.read_csv(path, sep='\t')
    aftbl["allele"] = aftbl.apply(lambda row:get_all_edits(row),axis=1)
    aftbl["allele_around_target"] = aftbl.apply(lambda row:get_edit(row, varpos_amplicon),axis=1)
    aftbl['be_allele'] = aftbl.allele.map(filter_out_indels)
    aftbl['be_allele_around_target'] = aftbl.allele_around_target.map(filter_out_indels)
    aftbl['err'] = aftbl.be_allele_around_target.map(lambda a:filter_allele_by_pos_qual(a, varpos_amplicon)[0])
    aftbl['N'] = aftbl.be_allele_around_target.map(lambda a:filter_allele_by_pos_qual(a, varpos_amplicon)[1])
    aftbl['target_proximal_N'] = aftbl.be_allele_around_target.map(lambda a:filter_allele_by_pos_qual(a, varpos_amplicon)[2])
    aftbl['phased_allele'] = aftbl.be_allele.map(lambda a: filter_allele_by_pos(phased_snps.amplicon_pos.tolist(), a, varpos_amplicon))
    aftbl = aftbl.loc[(aftbl.err <= 1) & (aftbl.target_proximal_N == 0) & (aftbl.N <= 2)]
    aftbl['phase'] = aftbl.phased_allele.map(lambda a: get_phase(a, phased_snps, amplicon_start))
    aftbl["assigned_phase"] = aftbl.phase.map(lambda ap: assign_phase(phases, ap))
    return aftbl

def get_phased_edit_rate(aftbl):
    var_amplicon_base = phased_snps.loc[phased_snps.amplicon_pos == amplicon_start, "amplicon_base"].item()
    aftbl.loc[~aftbl.assigned_phase.isin(editable_phase), "edited"] = np.nan
    if var_amplicon_base == var_alleles.ref.item():
        aftbl.loc[aftbl.assigned_phase.isin(editable_phase),'edited'] = aftbl.loc[aftbl.assigned_phase.isin(editable_phase),:].phased_allele.map(lambda s: f"{amplicon_start}:{amplicon_start}:+:{var_alleles.ref}>{var_alleles.edited.item()}" in s)
    elif var_amplicon_base == var_alleles.edited.item():
        aftbl.loc[aftbl.assigned_phase.isin(editable_phase),'edited'] = aftbl.loc[aftbl.assigned_phase.isin(editable_phase),:].phased_allele.map(lambda s: f"{amplicon_start}:{amplicon_start}:+:{var_amplicon_base}>{var_alleles.ref.item()}" not in s)
    aftbl['edited_clean'] = aftbl['edited'].map(lambda s: False if np.isnan(s) else s)
    return aftbl.groupby(['assigned_phase', 'edited_clean'])['#Reads'].sum()

In [19]:
results = {}
for rep in tqdm([1,2,3, 'C']):
    for cond in tqdm(["Ser", "SS"]):
        exp_id = f"{rep}-{cond}_20-locus"
        atac_path = f"../../results/atac_seq/crispresso_runs_indiv_demuxed/CRISPResso_on_{rep}-{cond}_20-locus_ATAC_Seq.{var_id}/Alleles_frequency_table.txt"
        gdna_path = f"../../results/atac_seq/crispresso_runs_indiv_demuxed/CRISPResso_on_{rep}-{cond}_20-locus_GDNA.{var_id}/Alleles_frequency_table.txt"
        
        if not os.path.exists(atac_path):
            os.system(f"unzip ../../results/atac_seq/crispresso_runs_indiv_demuxed/CRISPResso_on_{rep}-{cond}_20-locus_ATAC_Seq.{var_id}/Alleles_frequency_table.zip -d ../../results/atac_seq/crispresso_runs_indiv_demuxed/CRISPResso_on_{rep}-{cond}_20-locus_ATAC_Seq.{var_id}/")
        aftbl_atac = format_allele_freq_table(atac_path, amplicon_start)
            
        if not os.path.exists(gdna_path):
            os.system(f"unzip ../../results/atac_seq/crispresso_runs_indiv_demuxed/CRISPResso_on_{rep}-{cond}_20-locus_GDNA.{var_id}/Alleles_frequency_table.zip -d ../../results/atac_seq/crispresso_runs_indiv_demuxed/CRISPResso_on_{rep}-{cond}_20-locus_GDNA.{var_id}/")
        aftbl_gdna = format_allele_freq_table(gdna_path, amplicon_start)
        results[exp_id] = [get_phased_edit_rate(aftbl_atac), get_phased_edit_rate(aftbl_gdna)]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

In [20]:
res = []
for k, dfs in results.items():
    catdf = dfs[0].to_frame().join(dfs[1].to_frame(), how='outer', rsuffix='gdna').fillna(0)
    catdf.columns = ["atac", "gdna"]
    print(catdf)
    if len(catdf) == 1:
        catdf.loc[(0,False),:] = (0,0)
    if len(catdf) == 2: 
        cols= catdf.index.get_level_values(0)
    if len(catdf) == 3:
        catdf.loc[(-1,False),:] = (0,0)
        catdf = catdf.loc[[(-1,False),(0,False),(1,False),(1,True)],:]
        print(catdf)
    if len(catdf) == 4:
        cols = ["Unassigned", "Phase0", "Phase1_unedited", "Phase1_edited"]
    catdf = catdf.T
    catdf.columns=cols
    catdf['exp'] = k
    res.append(catdf)
results_df = pd.concat(res)

                               atac    gdna
assigned_phase edited_clean                
-1             False            202     247
 0             False         185752  235731
 1             False           2328   19584
               True             112     309
                               atac    gdna
assigned_phase edited_clean                
-1             False            117     320
 0             False         110293  306440
 1             False           1392   20428
               True              92     407
                               atac    gdna
assigned_phase edited_clean                
-1             False            171     269
 0             False         164381  256479
 1             False           3220   17066
               True             216     796
                              atac    gdna
assigned_phase edited_clean               
-1             False            42     328
 0             False         40050  279510
 1             False          2188  

In [21]:
results_df

Unnamed: 0,Unassigned,Phase0,Phase1_unedited,Phase1_edited,exp
atac,202,185752,2328,112,1-Ser_20-locus
gdna,247,235731,19584,309,1-Ser_20-locus
atac,117,110293,1392,92,1-SS_20-locus
gdna,320,306440,20428,407,1-SS_20-locus
atac,171,164381,3220,216,2-Ser_20-locus
gdna,269,256479,17066,796,2-Ser_20-locus
atac,42,40050,2188,91,2-SS_20-locus
gdna,328,279510,19387,817,2-SS_20-locus
atac,123,105297,1072,172,3-Ser_20-locus
gdna,268,228521,10911,2499,3-Ser_20-locus


In [22]:
results_df

Unnamed: 0,Unassigned,Phase0,Phase1_unedited,Phase1_edited,exp
atac,202,185752,2328,112,1-Ser_20-locus
gdna,247,235731,19584,309,1-Ser_20-locus
atac,117,110293,1392,92,1-SS_20-locus
gdna,320,306440,20428,407,1-SS_20-locus
atac,171,164381,3220,216,2-Ser_20-locus
gdna,269,256479,17066,796,2-Ser_20-locus
atac,42,40050,2188,91,2-SS_20-locus
gdna,328,279510,19387,817,2-SS_20-locus
atac,123,105297,1072,172,3-Ser_20-locus
gdna,268,228521,10911,2499,3-Ser_20-locus


In [23]:
results_df['is_atac'] = results_df.index == 'atac'
results_df['phase0_frac'] = results_df["Phase0"] / results_df.iloc[:,:4].sum(axis=1)
results_df['phase1_frac'] = (results_df["Phase1_unedited"]+results_df["Phase1_edited"]) / results_df.iloc[:,:4].sum(axis=1)

In [24]:
results_df["Phase1"] = results_df[["Phase1_edited", "Phase1_unedited"]].sum(axis=1)

In [25]:
results_df['exp'] = results_df.exp.map(lambda x: x.split("_")[0])
results_df[['rep', 'cond']] = results_df.exp.str.split("-", expand=True)
results_df['is_trt'] = results_df.rep != "C"

In [65]:
y, X = dmatrices('Phase1_edited + Phase1_unedited ~ is_trt', data = results_df, return_type='dataframe')

In [66]:
glm_binom = sm.GLM(y, X, family=sm.families.Binomial())

In [67]:
glm_res = glm_binom.fit()

In [68]:
result_html = glm_res.summary().tables[1].as_html()
glm_res_tbl = pd.read_html(result_html, header=0, index_col=0)[0]

In [69]:
glm_res_tbl

Unnamed: 0,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-4.0925,0.037,-109.285,0.0,-4.166,-4.019
is_trt[T.True],1.5883,0.039,40.751,0.0,1.512,1.665


In [70]:
glm_res_tbl.to_csv("rs35081008_Min_ABE_465_ctrl_result.csv")

In [51]:
y, X = dmatrices('Phase0 + Phase1 ~ rep + cond + is_atac + cond*is_atac', data = results_df, return_type='dataframe')

In [52]:
glm_binom = sm.GLM(y, X, family=sm.families.Binomial())

In [53]:
glm_res = glm_binom.fit()

In [54]:
result_html = glm_res.summary().tables[1].as_html()
glm_res_tbl = pd.read_html(result_html, header=0, index_col=0)[0]

In [55]:
glm_res_tbl

Unnamed: 0,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,2.5674,0.005,468.633,0.0,2.557,2.578
rep[T.2],-0.0331,0.007,-4.755,0.0,-0.047,-0.019
rep[T.3],0.1765,0.007,23.754,0.0,0.162,0.191
rep[T.C],-0.1695,0.007,-24.17,0.0,-0.183,-0.156
cond[T.Ser],0.1108,0.005,20.483,0.0,0.1,0.121
is_atac[T.True],1.5553,0.012,125.836,0.0,1.531,1.58
cond[T.Ser]:is_atac[T.True],-0.1493,0.017,-8.955,0.0,-0.182,-0.117


In [56]:
glm_res_tbl.to_csv("rs35081008_Min_ABE_465_caqtl_result.csv")

Phase 0 is enriched in ATAC-seq

In [32]:
results_df

Unnamed: 0,Unassigned,Phase0,Phase1_unedited,Phase1_edited,exp,is_atac,phase0_frac,phase1_frac,Phase1,rep,cond,is_trt
atac,202,185752,2328,112,1-Ser,True,0.985976,0.012952,2440,1,Ser,True
gdna,247,235731,19584,309,1-Ser,False,0.921288,0.077746,19893,1,Ser,True
atac,117,110293,1392,92,1-SS,True,0.985692,0.013263,1484,1,SS,True
gdna,320,306440,20428,407,1-SS,False,0.935423,0.0636,20835,1,SS,True
atac,171,164381,3220,216,2-Ser,True,0.978528,0.020454,3436,2,Ser,True
gdna,269,256479,17066,796,2-Ser,False,0.933975,0.065045,17862,2,Ser,True
atac,42,40050,2188,91,2-SS,True,0.945222,0.053787,2279,2,SS,True
gdna,328,279510,19387,817,2-SS,False,0.93157,0.067337,20204,2,SS,True
atac,123,105297,1072,172,3-Ser,True,0.987184,0.011663,1244,3,Ser,True
gdna,268,228521,10911,2499,3-Ser,False,0.943526,0.055368,13410,3,Ser,True


In [33]:
results_df.columns=results_df.columns.map(lambda c: c.replace(" ", "_"))

### Edits

In [34]:
y, X = dmatrices('Phase1_edited + Phase1_unedited ~ is_trt', data = results_df, return_type='dataframe')

In [35]:
glm_binom = sm.GLM(y, X, family=sm.families.Binomial())

In [36]:
glm_res = glm_binom.fit()

In [37]:
glm_res.summary()

0,1,2,3
Dep. Variable:,"['Phase1_edited', 'Phase1_unedited']",No. Observations:,16.0
Model:,GLM,Df Residuals:,14.0
Model Family:,Binomial,Df Model:,1.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-4131.8
Date:,"Wed, 14 Jun 2023",Deviance:,8145.9
Time:,13:49:49,Pearson chi2:,8900.0
No. Iterations:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-4.0925,0.037,-109.285,0.000,-4.166,-4.019
is_trt[T.True],1.5883,0.039,40.751,0.000,1.512,1.665


In [38]:
y, X = dmatrices('Phase1_edited + Phase1_unedited ~ rep + cond + is_atac + cond*is_atac', data = results_df.loc[results_df.rep != "C",:], return_type='dataframe')

In [39]:
glm_binom = sm.GLM(y, X, family=sm.families.Binomial())

In [40]:
glm_res = glm_binom.fit()

In [41]:
glm_res.summary()

0,1,2,3
Dep. Variable:,"['Phase1_edited', 'Phase1_unedited']",No. Observations:,12.0
Model:,GLM,Df Residuals:,6.0
Model Family:,Binomial,Df Model:,5.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-150.59
Date:,"Wed, 14 Jun 2023",Deviance:,209.45
Time:,13:49:49,Pearson chi2:,244.0
No. Iterations:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.8585,0.035,-108.984,0.000,-3.928,-3.789
rep[T.2],0.7717,0.041,18.952,0.000,0.692,0.852
rep[T.3],2.4007,0.036,66.459,0.000,2.330,2.472
cond[T.Ser],-0.0376,0.024,-1.577,0.115,-0.084,0.009
is_atac[T.True],0.1174,0.056,2.079,0.038,0.007,0.228
cond[T.Ser]:is_atac[T.True],0.0790,0.076,1.036,0.300,-0.070,0.228


In [42]:
glm_res.pvalues

Intercept                      0.000000e+00
rep[T.2]                       4.240342e-80
rep[T.3]                       0.000000e+00
cond[T.Ser]                    1.147559e-01
is_atac[T.True]                3.759523e-02
cond[T.Ser]:is_atac[T.True]    3.003828e-01
dtype: float64

In [44]:
result_html = glm_res.summary().tables[1].as_html()
glm_res_tbl = pd.read_html(result_html, header=0, index_col=0)[0]

In [45]:
glm_res_tbl

Unnamed: 0,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.8585,0.035,-108.984,0.0,-3.928,-3.789
rep[T.2],0.7717,0.041,18.952,0.0,0.692,0.852
rep[T.3],2.4007,0.036,66.459,0.0,2.33,2.472
cond[T.Ser],-0.0376,0.024,-1.577,0.115,-0.084,0.009
is_atac[T.True],0.1174,0.056,2.079,0.038,0.007,0.228
cond[T.Ser]:is_atac[T.True],0.079,0.076,1.036,0.3,-0.07,0.228


In [46]:
glm_res_tbl.to_csv("rs35081008_Min_ABE_465_result.csv")

Edited Phase 1 is enriched in ATAC-seq