<a href="https://colab.research.google.com/github/kalyani234/Drug_Project/blob/main/drug_interaction_description.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs
import time

# Base URL for DrugBank interactions
BASIC_URL = "https://go.drugbank.com/drugs/"

# Load the Excel file with drug IDs
input_file = '/content/drug_list_full.xlsx'  # Ensure the file path is correct
drug_df = pd.read_excel(input_file)

# Assuming the Excel file has only the "Drug_ID" column
drug_ids = drug_df['Drug_ID'].tolist()

def fetch_interactions(drug_id):
    """Fetches and formats interaction details for a given drug ID."""
    interaction_url = f"{BASIC_URL}{drug_id}/drug_interactions.json"
    interactions = []

    try:
        # Fetch the response to get interaction details
        response = requests.get(interaction_url, timeout=10)  # Timeout after 10 seconds
        response.raise_for_status()  # Raise an error for bad responses
        data = response.json()

        records_total = data.get('recordsTotal', 0)
        print(f"{drug_id}: Total records = {records_total}")

        # Process each interaction record
        for record in data.get('data', []):
            # Extract interaction drug ID and name using BeautifulSoup
            interaction_html = bs(record[0], 'lxml')
            interaction_id = interaction_html.find('a')['href'].split('/')[-1]
            interaction_name = interaction_html.text.strip()
            interaction_description = record[1]

            # Append each interaction detail to the list as a tuple
            interaction_detail = (drug_id, interaction_id, interaction_name, interaction_description)
            interactions.append(interaction_detail)

    except Exception as e:
        print(f"An error occurred with {drug_id}: {e}")

    return interactions

# Collect all interactions
all_interactions = []
for index, drug_id in enumerate(drug_ids):
    interactions = fetch_interactions(drug_id)
    all_interactions.extend(interactions)

    # Implementing a dynamic delay based on the number of processed IDs
    if (index + 1) % 100 == 0:  # Every 100 IDs
        print(f"Processed {index + 1} IDs. Sleeping for 5 seconds...")
        time.sleep(5)  # Sleep for 5 seconds every 100 requests
    else:
        time.sleep(1)  # Sleep for 1 second for other requests

# Convert the list of interactions to a DataFrame
columns = ["Drug_ID", "Interaction_ID", "Interaction_Name", "Interaction_Description"]
df = pd.DataFrame(all_interactions, columns=columns)

# Save the DataFrame to an Excel file
output_file = "/content/drug_interactions_output.xlsx"
df.to_excel(output_file, index=False)

print(f"Data saved to {output_file}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
DB13656: Total records = 0
DB13657: Total records = 1386
DB13658: Total records = 0
DB13659: Total records = 0
DB13660: Total records = 192
DB13661: Total records = 0
DB13662: Total records = 665
DB13663: Total records = 716
DB13664: Total records = 137
DB13665: Total records = 715
DB13666: Total records = 516
DB13667: Total records = 796
DB13668: Total records = 292
DB13669: Total records = 0
DB13670: Total records = 0
DB13671: Total records = 0
DB13672: Total records = 0
DB13673: Total records = 46
DB13674: Total records = 0
DB13675: Total records = 1035
DB13676: Total records = 714
DB13677: Total records = 39
DB13678: Total records = 516
DB13679: Total records = 705
DB13680: Total records = 0
DB13681: Total records = 3
DB13682: Total records = 796
DB13683: Total records = 1043
DB13684: Total records = 0
DB13685: Total records = 538
DB13686: Total records = 192
DB13687: Total records = 665
DB13688: Total records = 0
DB1