As of 2023-12-12, the default genome version is hg38, so I don't have to worry too much about version incompatability.
If you are using this code in the future, make sure that's still the case. 

In [57]:
from biomart import BiomartServer
import pandas as pd
import glob
import os

In [58]:
basepath="/home/mcn26/palmer_scratch/joe/data/deseq/"

In [59]:
def fetch_gene_coordinates(ensembl_ids, batch_size=25):
    server = BiomartServer("http://www.ensembl.org/biomart")
    db = server.datasets['hsapiens_gene_ensembl']

    for i in range(0, len(ensembl_ids), batch_size):
        batch_ids = ensembl_ids[i:i + batch_size]

        response = db.search({
            'filters': {
                'ensembl_gene_id': batch_ids
            },
            'attributes': [
                'ensembl_gene_id',
                'external_gene_name',
                'chromosome_name',
                'start_position',
                'end_position',
                'strand'
            ]
        })

        for line in response.iter_lines():
            yield line

In [None]:
csv_files = glob.glob(basepath+"*.csv")

for currentpath in csv_files:
    
    fullname=os.path.basename(currentpath)
    name=os.path.splitext(fullname)[0]
    
    print("processing"+fullname)
    df=pd.read_csv(basepath+fullname)

    ids=df.iloc[:,0].to_list()

    print("retrieving data from biomart...")
    data=[]
    gene_data = fetch_gene_coordinates(ids)
    for line in gene_data:
        data.append(line)
        
    output_file=name+".bed"
    print("writing "+output_file)
    with open("/home/mcn26/palmer_scratch/joe/data/de_beds/"+output_file, 'w') as file:
        for line in data:
            fields=line.decode("utf-8").split("\t")
    
            chrom = fields[2]
            start = fields[3]
            end = fields[4]
            name = fields[1]
            score = '0'
            strand = '+' if fields[5] == '1' else '-'
    
            bed_line = f'{chrom}\t{start}\t{end}\t{name}\t{score}\t{strand}\n'
            file.write(bed_line)
        

processingup_in_ko_deseq_ectoderm_condition_wt_vs_alkbh1ko_abs_fc_gt_2_and_p_lt_10e-16.csv
retrieving data from biomart...
writing up_in_ko_deseq_ectoderm_condition_wt_vs_alkbh1ko_abs_fc_gt_2_and_p_lt_10e-16.bed
processingup_in_ko_deseq_esc_condition_wt_vs_alkbh1ko_abs_fc_gt_2_and_p_lt_10e-16.csv
retrieving data from biomart...
writing up_in_ko_deseq_esc_condition_wt_vs_alkbh1ko_abs_fc_gt_2_and_p_lt_10e-16.bed
processingup_in_ko_endoderm_condition_wt_vs_alkbh1ko_abs_fc_gt_2_and_p_lt_10e-16.csv
retrieving data from biomart...
writing up_in_ko_endoderm_condition_wt_vs_alkbh1ko_abs_fc_gt_2_and_p_lt_10e-16.bed
processingup_in_ko_mesoderm_condition_wt_vs_alkbh1ko_abs_fc_gt_2_and_p_lt_10e-16.csv
retrieving data from biomart...
