# DESeq2_processing_for_intersect.ipynb
## This is intended to process DESeq2 results from <br> comparisons of non-gene genomic regions, like <br> those generated by MACS2, fStitch, or Tfit
### The chromosome position boundaries depend on a certain <br> nomenclature from the original region bedfiles

### Import libraries

In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib
import matplotlib.pyplot as plt

### Open tab delimited file, split, dump into variable 'regions'

In [2]:
file1 = ["/home/lsanford/Documents/data/nascent_ATAC_comparison/"
         "Sasse/ATAC/tfea/broad/output_p1e-6/"
         "B2B30ATACseq-TE_v_B2B30ATACseq-TD/temp_files/DESeq.res.txt"]

with open(file1[0]) as f:
  regions = []
  for line in f:
    reg = tuple(line.strip().split("\t"))
    regions.append(reg)

### Create lists for relevant variables, depending on if DESeq2 <br> regions had an "id" identifier on them

In [5]:
chroms = []
begs = []
ends = []
means = []
L2FC = []
padj = []

for i in range(len(regions)):
    if regions[0][0] == "\"id\"":
        if i > 0:
            if regions[i][8] != 'NA':
                win = regions[i][0].strip("\"").split(":")
                bounds = win[1].split("-")
                chroms.append(win[0])
                begs.append(bounds[0])
                ends.append(bounds[1])        
                means.append(float(regions[i][2]))
                L2FC.append(float(regions[i][6]))
                padj.append(float(regions[i][8]))
    else:
        if i > 0:
            if regions[i][7] != 'NA':
                win = regions[i][0].strip("\"").split(":")
                bounds = win[1].split("-")
                chroms.append(win[0])
                begs.append(bounds[0])
                ends.append(bounds[1])        
                means.append(float(regions[i][1]))
                L2FC.append(float(regions[i][2]))
                padj.append(float(regions[i][7]))

### Create a dataframe that puts relevant variables in bed format <br> and filters to a certain cutoff

In [288]:
de_regions = pd.DataFrame(data={'Chromosome':chroms,'coord1':begs,
                                'coord2':ends,'Base_mean':means,
                                'Log2FC':L2FC, 'FDR':padj})
de_regions = de_regions[de_regions.FDR < 0.05]

### Make new filename and export data in approximately bed format

In [289]:
indir = file1.split('/')
outdir = '/'.join(indir[0:-1])
outfile = ''.join([outdir,'/','processed_DESeq2_results.txt'])
de_regions.to_csv(outfile,sep="\t",index=False,index_label=False,header=False)