In [None]:
import xml.etree.ElementTree as ET

def read_drugbank_ids_from_file(file_path):
    with open(file_path, 'r') as file:
        drugbank_ids = [line.strip() for line in file]
    return drugbank_ids

def extract_text_from_element(element):
    return element.text.strip() if element is not None and element.text is not None else ""

def truncate_text(text, max_length):
    return text[:max_length] if len(text) > max_length else text

def extract_drug_info(xml_file, drugbank_ids, max_description_length=75):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    drugs_info = []

    for drug in root.findall(".//{http://www.drugbank.ca}drug"):
        drugbank_id_element = drug.find(".//{http://www.drugbank.ca}drugbank-id[@primary='true']")
        if drugbank_id_element is not None:
            drugbank_id = drugbank_id_element.text.strip()
            if drugbank_id in drugbank_ids:
                name_element = drug.find(".//{http://www.drugbank.ca}name")
                mass_element = drug.find(".//{http://www.drugbank.ca}average-mass")
                smiles_element = drug.find(".//{http://www.drugbank.ca}calculated-properties/"
                                           "{http://www.drugbank.ca}property[{http://www.drugbank.ca}kind='SMILES']")
                description_element = drug.find(".//{http://www.drugbank.ca}description")

                name = extract_text_from_element(name_element)
                mass = extract_text_from_element(mass_element)
                description = truncate_text(extract_text_from_element(description_element), max_description_length)

                smiles = extract_text_from_element(smiles_element.find(".//{http://www.drugbank.ca}value")) if smiles_element is not None and smiles_element.find(".//{http://www.drugbank.ca}value") is not None else ""

                drugs_info.append({"DrugBankID": drugbank_id, "Name": name, "Mass": mass, "SMILES": smiles, "Description": description})

    return drugs_info

def save_to_tsv(data, tsv_file):
    with open(tsv_file, 'w', newline='', encoding='utf-8') as tsvfile:
        tsvfile.write("DrugBankID\tName\tMass\tSMILES\tDescription\n")
        for entry in data:
            tsvfile.write(f"{entry['DrugBankID']}\t{entry['Name']}\t{entry['Mass']}\t{entry['SMILES']}\t{entry['Description']}\n")

# Prompt the user for input
xml_file_path = "drugbank_short.xml"  

# Get the input TXT file path from the user
drugbank_ids_file_path = input("Enter the path to the input TXT file: ")

# Get the output TSV file path from the user
tsv_file_path = input("Enter the path to the output TSV file: ")

# Read DrugBank IDs from the file
drugbank_ids_to_extract = read_drugbank_ids_from_file(drugbank_ids_file_path)

# Extract drug information for the specified IDs
drugs_info_list = extract_drug_info(xml_file_path, drugbank_ids_to_extract)

# Save the information to a TSV file
save_to_tsv(drugs_info_list, tsv_file_path)

print(f"Data saved to {tsv_file_path}")