# Word Sense Disambiguation


In [1]:
import pandas as pd
import math
from collections import defaultdict

In [9]:
df=pd.read_csv('/content/Bank - Sheet1.csv')
#df.columns=df.columns.str.strip()
df.head()

Unnamed: 0.1,Unnamed: 0,Sentences for Training,"Sense of the word ""Bank"""
0,,I deposited my paycheck at the bank.,Financial Institution
1,,The bank of the river was eroded by the heavy ...,River Border
2,,She works at the bank as a teller.,Financial Institution
3,,Let's have a picnic by the bank of the river.,River Border
4,,The bank approved my loan application.,Financial Institution


In [10]:
df=df.dropna(subset=['Sense of the word "Bank"'])
test_df=df[df['Sense of the word "Bank"']=='?']
df=df[df['Sense of the word "Bank"']!='?']

In [11]:
test_df

Unnamed: 0.1,Unnamed: 0,Sentences for Training,"Sense of the word ""Bank"""
94,,The children built a dam on the bank of the ri...,?
95,,We need to withdraw some cash from the bank fo...,?
96,,I need to update my contact information with t...,?
97,,The bank provides online banking services for ...,?
98,,The beavers constructed a dam along the bank o...,?
99,,I need to check my transaction history at the ...,?
100,,She works as a financial consultant at the bank.,?


In [22]:
training_data=list(zip(df['Sentences for Training '],df['Sense of the word "Bank"']))
word_counts={
    'Financial Institution':defaultdict(int),
    'River Border':defaultdict(int)
}
sense_counts=defaultdict(int)
total_words=0

In [33]:
for sentence, sense in training_data:
    words = sentence.lower().split()
    sense_counts[sense] += 1
    for word in words:
        word_counts[sense][word] += 1
        total_words += 1

In [34]:
for sentence,sense in training_data:
  words=sentence.lower().split()
  sense_counts[sense]+=1
  for word in words:
    word_counts[sense][word]+=1
    total_words+=1
word_counts

{'Financial Institution': defaultdict(int,
             {' ': 1209,
              'I': 51,
              'd': 180,
              'e': 750,
              'p': 108,
              'o': 354,
              's': 330,
              'i': 408,
              't': 549,
              'm': 120,
              'y': 75,
              'a': 627,
              'c': 249,
              'h': 273,
              'k': 192,
              'b': 168,
              'n': 510,
              '.': 135,
              'S': 18,
              'w': 60,
              'r': 324,
              'l': 108,
              'T': 63,
              'v': 75,
              'W': 9,
              'f': 132,
              'u': 102,
              '9': 9,
              '5': 9,
              'g': 75,
              'H': 3,
              'A': 6,
              'M': 6,
              "'": 21,
              'q': 9,
              '(': 6,
              'F': 6,
              ')': 6,
              'x': 6,
              '-': 3,
              'B': 0,
      

In [35]:
total_sentences=len(training_data)
prior_probabilities={
    sense:count/total_sentences for sense,count in sense_counts.items()
}
prior_probabilities

{'Financial Institution': 4.354838709677419, 'River Border': 4.645161290322581}

In [36]:
vocabulary=set(word for sense in word_counts for word in word_counts[sense])
vocab_size=len(vocabulary)

conditional_probabilities={
    sense:{word:(word_counts[sense][word]+1)/(sum(word_counts[sense].values())+vocab_size) for word in vocabulary} for sense in word_counts
}
conditional_probabilities

{'Financial Institution': {'W': 0.000994233446013124,
  'painted': 9.942334460131238e-05,
  'p': 0.01083714456154305,
  '(river': 9.942334460131238e-05,
  'erosion.': 9.942334460131238e-05,
  'password.': 0.0006959634122091867,
  'flooding.': 9.942334460131238e-05,
  'drink': 9.942334460131238e-05,
  'cards': 0.0006959634122091867,
  'navigated': 9.942334460131238e-05,
  'raced': 9.942334460131238e-05,
  'blanket': 9.942334460131238e-05,
  'atm': 0.001292503479817061,
  'the': 0.030522966792602902,
  'new': 0.001292503479817061,
  'years': 9.942334460131238e-05,
  'dam': 9.942334460131238e-05,
  'frogs': 9.942334460131238e-05,
  'calm': 9.942334460131238e-05,
  "'": 0.0021873135812288725,
  'trees': 9.942334460131238e-05,
  'small': 0.0006959634122091867,
  'customers.': 0.0006959634122091867,
  'x': 0.0006959634122091867,
  'prevented': 0.0006959634122091867,
  'located': 0.001292503479817061,
  'playfully': 9.942334460131238e-05,
  'softly': 9.942334460131238e-05,
  'interest': 0.001

In [38]:
def classify_sense(sentence):
  words=sentence.lower().split()
  scores={}
  for sense in prior_probabilities:
    score=math.log(prior_probabilities[sense])
    for word in words:
      if word in conditional_probabilities[sense]:
        score += math.log(conditional_probabilities[sense][word])
      else:
        score+=1/(sum(word_counts[sense].values())+vocab_size)

    scores[sense]=score
  return max(scores,key=scores.get)

In [39]:
test_sentences=test_df['Sentences for Training ']

In [47]:
result={}
for sentence in test_sentences:
  result[sentence]=classify_sense(sentence)

for x,y in result.items():
  test_df['Sense of the word "Bank"']=y

test_df

Unnamed: 0.1,Unnamed: 0,Sentences for Training,"Sense of the word ""Bank"""
94,,The children built a dam on the bank of the ri...,Financial Institution
95,,We need to withdraw some cash from the bank fo...,Financial Institution
96,,I need to update my contact information with t...,Financial Institution
97,,The bank provides online banking services for ...,Financial Institution
98,,The beavers constructed a dam along the bank o...,Financial Institution
99,,I need to check my transaction history at the ...,Financial Institution
100,,She works as a financial consultant at the bank.,Financial Institution


In [49]:
test_sentences = [
    "The children built a dam on the bank of the river using rocks and sticks.",
    "We need to withdraw some cash from the bank for groceries.",
    "I need to update my contact information with the bank.",
    "The bank provides online banking services for convenience.",
    "The beavers constructed a dam along the bank of the river.",
    "I need to check my transaction history at the bank.",
    "she works as a financial consultant at the bank."
]

results={}
for sentence in test_sentences:
    results[sentence]=classify_sense(sentence)
print(f"Sentence\t\t\t\t\t\t\t\t\t\tClassified")
for sentence, result in results.items():
    print(f"{sentence}\t\t{result}\n")

Sentence										Classified
The children built a dam on the bank of the river using rocks and sticks.		River Border

We need to withdraw some cash from the bank for groceries.		Financial Institution

I need to update my contact information with the bank.		Financial Institution

The bank provides online banking services for convenience.		Financial Institution

The beavers constructed a dam along the bank of the river.		River Border

I need to check my transaction history at the bank.		Financial Institution

she works as a financial consultant at the bank.		Financial Institution

