<a href="https://colab.research.google.com/github/kalyani234/Drug_Project/blob/main/NLP_Process.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install spacy
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m54.1 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [3]:
import spacy
import pandas as pd

# Load SpaCy's English model
nlp = spacy.load('en_core_web_sm')

# Load the data
drug_list_df = pd.read_excel("/content/drugbank_names.xlsx")
drug_interactions_df = pd.read_excel("/content/drug_interactions.xlsx")

# Extract list of drug names from the drug list
druglist = drug_list_df['drugname'].tolist()

# Ensure Interaction_Description column is present and clean
df_interaction = drug_interactions_df[['Interaction_Description']].copy()
df_interaction.columns = ['interaction']  # Rename for consistency

# Define NLP processing function using SpaCy
def NLPProcess(druglist, df_interaction):
    mechanism = []
    action = []
    drugA = []
    drugB = []

    # Initialize lists with empty values to ensure they match the number of interactions
    for _ in range(len(df_interaction)):
        mechanism.append('')
        action.append('')
        drugA.append('')
        drugB.append('')

    for i in range(len(df_interaction)):
        try:
            doc = nlp(df_interaction['interaction'][i])
            dependency = []

            # Collect dependency information
            for token in doc:
                dependency.append(token)

            sons = {token.i: [] for token in doc}  # Store dependencies by index
            sonsNum = {token.i: 0 for token in doc}  # Count of dependencies for each token
            flag = False
            count = 0

            # Identify root and drugs
            for token in dependency:
                if token.dep_ == 'ROOT':
                    root = token.i
                    action[i] = token.lemma_
                if token.text in druglist:
                    if count < 2:
                        if flag:
                            drugB[i] = token.text
                            count += 1
                        else:
                            drugA[i] = token.text
                            flag = True
                            count += 1
                sonsNum[token.head.i] += 1
                sons[token.head.i].append(token.i)

            quene = []
            for idx in sons[root]:
                token = dependency[idx]
                if token.dep_ in ['dobj', 'nsubjpass']:  # Handling objects and subjects
                    quene.append(token.i)
                    break

            quene.sort()
            mechanism[i] = " ".join([dependency[i].text for i in quene])

            # Handle special case corrections
            if mechanism[i] == "the fluid retaining activities":
                mechanism[i] = "the fluid"
            if mechanism[i] == "atrioventricular blocking ( AV block )":
                mechanism[i] = 'the atrioventricular blocking ( AV block ) activities increase'

        except Exception as e:
            print(f"Error processing interaction {i}: {e}")

    return mechanism, action, drugA, drugB

# Run NLP Process and get results
mechanism, action, drugA, drugB = NLPProcess(druglist, df_interaction)

# Store results in a DataFrame
result_df = pd.DataFrame({
    'Interaction_Description': df_interaction['interaction'],
    'Mechanism': mechanism,
    'Action': action,
    'DrugA': drugA,
    'DrugB': drugB
})

# Save the result to an Excel file
result_df.to_excel("/content/drug_interaction_analysis_results.xlsx", index=False)

# Display the result
print(result_df.head())



                             Interaction_Description   Mechanism    Action  \
0  (1,2,6,7-3H)Testosterone may increase the anti...  activities  increase   
1  The risk or severity of bleeding can be increa...        risk  increase   
2  The risk or severity of bleeding can be increa...        risk  increase   
3  1-Testosterone may increase the anticoagulant ...  activities  increase   
4  18-methyl-19-nortestosterone may increase the ...  activities  increase   

          DrugA      DrugB  
0     Lepirudin             
1     Lepirudin             
2     Lepirudin             
3  Testosterone  Lepirudin  
4     Lepirudin             
