In [3]:
import pandas as pd

# Load the BED file
bed_file_path = './amp.bed'
bed_data = pd.read_csv(bed_file_path, sep='\t', header=None, names=['chrom', 'start', 'end', 'q_value'])

# Ensure 'start' and 'end' columns are integers
bed_data['start'] = pd.to_numeric(bed_data['start'], errors='coerce')
bed_data['end'] = pd.to_numeric(bed_data['end'], errors='coerce')

# Drop any rows that couldn't be converted to numeric values
bed_data = bed_data.dropna(subset=['start', 'end'])

# Calculate the maximum of end - start for each chromosome
max_length_per_chrom = bed_data.groupby('chrom').apply(lambda x: (x['end'] - x['start']).max())

# Sort the index of max_length_per_chrom by converting chromosome names to a sortable format
# Here we remove 'chr' prefix and convert the chromosome number to an integer for sorting
max_length_per_chrom_sorted = max_length_per_chrom.sort_index(key=lambda x: x.str.replace('chr', '').astype(int))

# Display the sorted results
max_length_per_chrom_sorted

new_bed_data = pd.DataFrame({
    '#chrom': max_length_per_chrom_sorted.index,
    'chromStart': 0,
    'chromEnd': max_length_per_chrom_sorted.values.astype(int),
    'name': max_length_per_chrom_sorted.index
})

# Save the data to a new BED file
new_bed_file_path = './dist_amp.bed'
new_bed_data.to_csv(new_bed_file_path, sep='\t', index=False, header=False)

new_bed_file_path



'./dist_amp.bed'