In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
chip_path = "source/ahr_chip.bed"
atac_path = "ahr_atac_chip_intersect.sorted.bed"

ahr_motif_path = "motif_ahr_atac_intersect.bed"
tead1_motif_path = "motif_tead1_atac_intersect.bed"
tead2_motif_path = "motif_tead2_atac_intersect.bed"
tead3_motif_path = "motif_tead3_atac_intersect.bed"
tead4_motif_path = "motif_tead4_atac_intersect.bed"

In [3]:
output_folder = "_output_ucsc"

In [4]:
df_ahr_chip = pd.read_csv(chip_path, sep='\t', header=None, names=['chrom','start','end','name','score','strand'])
df_ahr_chip

Unnamed: 0,chrom,start,end,name,score,strand
0,chr1,3050022,3050437,ALL_peak_1,9.30394,+
1,chr1,3065136,3065388,ALL_peak_2,5.31654,+
2,chr1,3091303,3091689,ALL_peak_3,3.22546,+
3,chr1,3096524,3097046,ALL_peak_4,5.64882,+
4,chr1,3099479,3099675,ALL_peak_5,4.33843,+
...,...,...,...,...,...,...
121760,chrY,90832135,90832441,ALL_peak_123648,7.97481,+
121761,chrY,90836613,90836799,ALL_peak_123649,6.31339,+
121762,chrY_JH584300_random,1276,1571,ALL_peak_123650,4.80298,+
121763,chrY_JH584300_random,22769,22991,ALL_peak_123651,4.83819,+


In [5]:
ahr_chip_mean = df_ahr_chip['score'].mean()
ahr_chip_sd = df_ahr_chip['score'].std()
ahr_chip_norm = ahr_chip_mean + (ahr_chip_sd * 3)

print('Mean: ' + str(ahr_chip_mean))
print('SD: ' + str(ahr_chip_sd))
print('Norm: ' + str(ahr_chip_norm))

Mean: 5.413080697491069
SD: 1.4520722536001818
Norm: 9.769297458291614


In [6]:
df_ahr_chip['score_norm'] = (df_ahr_chip['score'] / ahr_chip_norm) * 1000
df_ahr_chip

Unnamed: 0,chrom,start,end,name,score,strand,score_norm
0,chr1,3050022,3050437,ALL_peak_1,9.30394,+,952.365310
1,chr1,3065136,3065388,ALL_peak_2,5.31654,+,544.209041
2,chr1,3091303,3091689,ALL_peak_3,3.22546,+,330.162943
3,chr1,3096524,3097046,ALL_peak_4,5.64882,+,578.221722
4,chr1,3099479,3099675,ALL_peak_5,4.33843,+,444.088228
...,...,...,...,...,...,...,...
121760,chrY,90832135,90832441,ALL_peak_123648,7.97481,+,816.313561
121761,chrY,90836613,90836799,ALL_peak_123649,6.31339,+,646.248108
121762,chrY_JH584300_random,1276,1571,ALL_peak_123650,4.80298,+,491.640266
121763,chrY_JH584300_random,22769,22991,ALL_peak_123651,4.83819,+,495.244415


In [7]:
df_ahr_chip_sub = df_ahr_chip[['chrom','start','end','name','score_norm']]
df_ahr_chip_sub

Unnamed: 0,chrom,start,end,name,score_norm
0,chr1,3050022,3050437,ALL_peak_1,952.365310
1,chr1,3065136,3065388,ALL_peak_2,544.209041
2,chr1,3091303,3091689,ALL_peak_3,330.162943
3,chr1,3096524,3097046,ALL_peak_4,578.221722
4,chr1,3099479,3099675,ALL_peak_5,444.088228
...,...,...,...,...,...
121760,chrY,90832135,90832441,ALL_peak_123648,816.313561
121761,chrY,90836613,90836799,ALL_peak_123649,646.248108
121762,chrY_JH584300_random,1276,1571,ALL_peak_123650,491.640266
121763,chrY_JH584300_random,22769,22991,ALL_peak_123651,495.244415


In [8]:
with open(os.path.join(output_folder, "ahr_chip.bed"), 'w') as file:
    file.write("track name=ahr_chip description=\"Ahr ChIP-Seq\" useScore=1\n")
    df_ahr_chip_sub.to_csv(file, sep="\t", index=False, header=False)

In [9]:
df_ahr_atac = pd.read_csv(atac_path, sep='\t', header=None, names=['chrom','start','end','name','score','chrom_chip','start_chip','end_chip','name_chip','score_chip','strand_chip','overlap'])
df_ahr_atac

Unnamed: 0,chrom,start,end,name,score,chrom_chip,start_chip,end_chip,name_chip,score_chip,strand_chip,overlap
0,chr1,9630062,9631842,Interval_158,1.173096,chr1,9630849,9631125,ALL_peak_277,3.80946,+,276
1,chr1,40830253,40831798,Interval_984,0.707852,chr1,40829923,40830305,ALL_peak_1751,6.19776,+,52
2,chr1,44551390,44554287,Interval_1096,0.449203,chr1,44552591,44552998,ALL_peak_1942,19.45451,+,407
3,chr1,54584565,54586395,Interval_1363,0.720508,chr1,54585873,54586115,ALL_peak_2368,5.26075,+,242
4,chr1,54584565,54586395,Interval_1363,0.720508,chr1,54584645,54584907,ALL_peak_2367,11.29765,+,262
...,...,...,...,...,...,...,...,...,...,...,...,...
280,chrX,169973050,169976568,Interval_85659,-1.793529,chrX,169974528,169974765,ALL_peak_122942,7.89112,+,237
281,chrX,169993210,169995244,Interval_85663,-2.445382,chrX,169994725,169995022,ALL_peak_122944,2.66392,+,297
282,chrX,170004014,170006039,Interval_85664,-1.988200,chrX,170004057,170004246,ALL_peak_122950,2.91074,+,189
283,chrX,170004014,170006039,Interval_85664,-1.988200,chrX,170004648,170004862,ALL_peak_122951,2.32970,+,214


In [10]:
df_ahr_atac_sub = df_ahr_atac[['chrom','start','end','name']]
df_ahr_atac_sub

Unnamed: 0,chrom,start,end,name
0,chr1,9630062,9631842,Interval_158
1,chr1,40830253,40831798,Interval_984
2,chr1,44551390,44554287,Interval_1096
3,chr1,54584565,54586395,Interval_1363
4,chr1,54584565,54586395,Interval_1363
...,...,...,...,...
280,chrX,169973050,169976568,Interval_85659
281,chrX,169993210,169995244,Interval_85663
282,chrX,170004014,170006039,Interval_85664
283,chrX,170004014,170006039,Interval_85664


In [11]:
with open(os.path.join(output_folder, "ahr_atac.bed"), 'w') as file:
    file.write("track name=ahr_atac description=\"Ahr ATAC Overlap\"\n")
    df_ahr_atac_sub.to_csv(file, sep="\t", index=False, header=False)

In [12]:
df_ahr_motif = pd.read_csv(ahr_motif_path, sep='\t', header=None, names=['chrom','start','end','name','score','chrom_2','start_2','end_2','name_2','score_2','chrom_3','start_3','end_3','name_3','score_3','strand_3','overlap'])
df_ahr_motif

Unnamed: 0,chrom,start,end,name,score,chrom_2,start_2,end_2,name_2,score_2,chrom_3,start_3,end_3,name_3,score_3,strand_3,overlap
0,chr1,44552810,44552816,chr1:44552810-44552816,0.07256,chr1,44551390,44554287,Interval_1096,0.449203,chr1,44552591,44552998,ALL_peak_1942,19.45451,+,407
1,chr1,44552816,44552822,chr1:44552816-44552822,0.07256,chr1,44551390,44554287,Interval_1096,0.449203,chr1,44552591,44552998,ALL_peak_1942,19.45451,+,407
2,chr1,44553674,44553680,chr1:44553674-44553680,0.35039,chr1,44551390,44554287,Interval_1096,0.449203,chr1,44552591,44552998,ALL_peak_1942,19.45451,+,407
3,chr1,54585546,54585552,chr1:54585546-54585552,-0.40494,chr1,54584565,54586395,Interval_1363,0.720508,chr1,54585873,54586115,ALL_peak_2368,5.26075,+,242
4,chr1,54585546,54585552,chr1:54585546-54585552,-0.40494,chr1,54584565,54586395,Interval_1363,0.720508,chr1,54584645,54584907,ALL_peak_2367,11.29765,+,262
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289,chr9,99414736,99414742,chr9:99414736-99414742,-0.99902,chr9,99414258,99416167,Interval_82474,0.854551,chr9,99414142,99414474,ALL_peak_119602,9.96851,+,216
290,chr9,116190169,116190175,chr9:116190169-116190175,-0.69566,chr9,116186831,116191248,Interval_83191,0.330017,chr9,116187996,116188199,ALL_peak_120583,6.15282,+,203
291,chr9,122824114,122824120,chr9:122824114-122824120,0.80745,chr9,122823194,122824139,Interval_83553,0.597318,chr9,122823002,122823261,ALL_peak_121026,5.98111,+,67
292,chrX,169974662,169974668,chrX:169974662-169974668,0.00000,chrX,169973050,169976568,Interval_85659,-1.793529,chrX,169974528,169974765,ALL_peak_122942,7.89112,+,237


In [13]:
motif_mean = df_ahr_motif['score'].mean()
motif_sd = df_ahr_motif['score'].std()
motif_norm = motif_mean + (motif_sd * 3)

print('Ahr')
print('Mean: ' + str(motif_mean))
print('SD: ' + str(motif_sd))
print('Norm: ' + str(motif_norm))

Ahr
Mean: 0.04576078231292517
SD: 0.4799856340978774
Norm: 1.4857176846065574


In [14]:
df_ahr_motif['score_norm'] = (df_ahr_motif['score'] / motif_norm) * 1000
df_motif_sub = df_ahr_motif[['chrom','start','end','name','score_norm']]

with open(os.path.join(output_folder, "ahr_motif.bed"), 'w') as file:
    file.write("track name=ahr_motifs useScore=1\n")
    df_motif_sub.to_csv(file, sep="\t", index=False, header=False)

In [15]:
df_tead1_motif = pd.read_csv(tead1_motif_path, sep='\t', header=None, names=['chrom','start','end','name','score','chrom_2','start_2','end_2','name_2','score_2','chrom_3','start_3','end_3','name_3','score_3','strand_3','overlap'])

motif_mean = df_tead1_motif['score'].mean()
motif_sd = df_tead1_motif['score'].std()
motif_norm = motif_mean + (motif_sd * 3)

print('TEAD1')
print('Mean: ' + str(motif_mean))
print('SD: ' + str(motif_sd))
print('Norm: ' + str(motif_norm))

df_tead1_motif['score_norm'] = (df_tead1_motif['score'] / motif_norm) * 1000
df_motif_sub = df_tead1_motif[['chrom','start','end','name','score_norm']]

with open(os.path.join(output_folder, "tead1_motif.bed"), 'w') as file:
    file.write("track name=tead1_motifs useScore=1\n")
    df_motif_sub.to_csv(file, sep="\t", index=False, header=False)

TEAD1
Mean: -0.04489666666666666
SD: 0.5970886166055506
Norm: 1.7463691831499852


In [16]:
df_tead2_motif = pd.read_csv(tead2_motif_path, sep='\t', header=None, names=['chrom','start','end','name','score','chrom_2','start_2','end_2','name_2','score_2','chrom_3','start_3','end_3','name_3','score_3','strand_3','overlap'])

motif_mean = df_tead2_motif['score'].mean()
motif_sd = df_tead2_motif['score'].std()
motif_norm = motif_mean + (motif_sd * 3)

print('TEAD2')
print('Mean: ' + str(motif_mean))
print('SD: ' + str(motif_sd))
print('Norm: ' + str(motif_norm))

df_tead2_motif['score_norm'] = (df_tead2_motif['score'] / motif_norm) * 1000
df_motif_sub = df_tead2_motif[['chrom','start','end','name','score_norm']]

with open(os.path.join(output_folder, "tead2_motif.bed"), 'w') as file:
    file.write("track name=tead2_motifs useScore=1\n")
    df_motif_sub.to_csv(file, sep="\t", index=False, header=False)

TEAD2
Mean: -0.10812110497237569
SD: 0.5940401800008472
Norm: 1.6739994350301661


In [17]:
df_tead3_motif = pd.read_csv(tead3_motif_path, sep='\t', header=None, names=['chrom','start','end','name','score','chrom_2','start_2','end_2','name_2','score_2','chrom_3','start_3','end_3','name_3','score_3','strand_3','overlap'])

motif_mean = df_tead3_motif['score'].mean()
motif_sd = df_tead3_motif['score'].std()
motif_norm = motif_mean + (motif_sd * 3)

print('TEAD3')
print('Mean: ' + str(motif_mean))
print('SD: ' + str(motif_sd))
print('Norm: ' + str(motif_norm))

df_tead3_motif['score_norm'] = (df_tead3_motif['score'] / motif_norm) * 1000
df_motif_sub = df_tead3_motif[['chrom','start','end','name','score_norm']]

with open(os.path.join(output_folder, "tead3_motif.bed"), 'w') as file:
    file.write("track name=tead3_motifs useScore=1\n")
    df_motif_sub.to_csv(file, sep="\t", index=False, header=False)

TEAD3
Mean: -0.1254169333333333
SD: 0.6031569793173724
Norm: 1.6840540046187842


In [18]:
df_tead4_motif = pd.read_csv(tead4_motif_path, sep='\t', header=None, names=['chrom','start','end','name','score','chrom_2','start_2','end_2','name_2','score_2','chrom_3','start_3','end_3','name_3','score_3','strand_3','overlap'])

motif_mean = df_tead4_motif['score'].mean()
motif_sd = df_tead4_motif['score'].std()
motif_norm = motif_mean + (motif_sd * 3)

print('TEAD4')
print('Mean: ' + str(motif_mean))
print('SD: ' + str(motif_sd))
print('Norm: ' + str(motif_norm))

df_tead4_motif['score_norm'] = (df_tead4_motif['score'] / motif_norm) * 1000
df_motif_sub = df_tead4_motif[['chrom','start','end','name','score_norm']]

with open(os.path.join(output_folder, "tead4_motif.bed"), 'w') as file:
    file.write("track name=tead4_motifs useScore=1\n")
    df_motif_sub.to_csv(file, sep="\t", index=False, header=False)

TEAD4
Mean: -0.09246995
SD: 0.60539008272585
Norm: 1.7237002981775502
