### Step 1: Download BigWig Files

In [None]:
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-DNase.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H2A.Z.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K4me1.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K4me2.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K4me3.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K9ac.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K9me3.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K27ac.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K27me3.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K36me3.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H3K79me2.fc.signal.bigwig
!wget -nc -P ./data/feature/ https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/foldChange/E116-H4K20me1.fc.signal.bigwig

### Step2: Process BigWig Files to Generate Epigenomic Signals

In [3]:
import os
import pyBigWig
import pandas as pd
import numpy as np
from tqdm import tqdm


input_file_list = ['data/input/epigenomic_feature/E116-DNase.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K36me3.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K27me3.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K9me3.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H4K20me1.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H2A.Z.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K27ac.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K4me1.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K4me2.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K4me3.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K79me2.fc.signal.bigwig',
                   'data/input/epigenomic_feature/E116-H3K9ac.fc.signal.bigwig']

bins_file = 'data/input/epigenomic_feature/GM12878_100000_bins.txt'
bins = pd.read_csv(bins_file)
bins = np.array(bins)



results = np.zeros((len(input_file_list), len(bins), 100), dtype=np.float32)

for i, file_path in enumerate(tqdm(input_file_list)):
    bw = pyBigWig.open(file_path)
    for j, bin_info in enumerate(bins):
        chrom, start = bin_info
        end = min(start+100000, bw.chroms(chrom))
        values = np.nan_to_num(bw.values(chrom, start, end)) # shape: (100000,)
        values = np.pad(values, (0, 100000-len(values)), 'constant', constant_values=0) # shape: (100000,)
        windowed_means = np.array([np.mean(values[k:k+1000]) for k in range(0, len(values), 1000)]) # shape: (100,)
        results[i, j] = windowed_means # final shape: (n_files, n_bins, 100)
    bw.close()

# reshape the results to (n_bins, 100, n_files)
results = np.transpose(results, (1, 2, 0))

# save
np.save('data/input/epigenomic_feature/signals_1kb.npy', results)

100%|██████████| 12/12 [24:10<00:00, 120.90s/it]
