<a href="https://colab.research.google.com/github/mohammedterry/NLP_Lab/blob/master/Entity_Linking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DBPedia

In [0]:
example_name = '_'.join([word[0].upper() + word[1:] for word in input().split()])
example_name

In [0]:
import requests

dbpedia_json = f"http://dbpedia.org/data/{example_name}.json"
data = requests.get(dbpedia_json).json()

In [0]:
related_entities = {}
for entity,relations in data.items():
  entity = entity.split('/')[-1]
  for relation in relations:
    if "ontology" in relation:
      relation = relation.split('/ontology/')[-1]
      if entity != example_name:
        if relation in related_entities:
          related_entities[relation].add(entity)
        else:
          related_entities[relation] = {entity}
person = data[dbpedia_json.replace('/data/','/resource/')[:-5]]
for relation,entities in person.items():
  relation = relation.split('/')[-1]
  if relation.isalpha():
    for entity in entities:
      entity = str(entity["value"]).split('/')[-1].split('Category:')[-1]
      if relation in related_entities:
        related_entities[relation].add(entity)
      else:
        related_entities[relation] = {entity}
related_entities

In [0]:
from random import choice
relation = choice(list(related_entities.keys()))
print(f"{example_name.replace('_',' ')} - {relation} - {[entity.replace('_',' ') for entity in related_entities[relation]]}")

# Wikipedia

In [0]:
import requests

wikipedia_url = "https://en.wikipedia.org/wiki/" + example_name
result = requests.get(wikipedia_url)

In [0]:
from bs4 import BeautifulSoup as bs

soup = bs(result.text, "html.parser")
related_entities = set()
for a in soup.find_all("a"):
    href = a.get("href")
    if href is not None and "/wiki/" in href and ":" not in href and "Main_Page" not in href:
      entity = a.text.strip()
      if entity not in ("read","article") and len(entity) > 0:
        related_entities.add(entity)

print(related_entities) 

In [0]:
from matplotlib import pyplot as plt
from wordcloud import WordCloud

wordcloud = WordCloud(width = 800, height = 800, background_color ='white', min_font_size = 10).generate(' '.join(related_entities))

plt.figure(figsize=[20,10])
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")

# Expanding Date Entities

In [0]:
example_date = input()

In [0]:
from dateutil import parser
try:
  datetime = parser.parse(example_date)
except:
  datetime = "unrecognised date!"
datetime

In [0]:
stndrd,th = ["st","nd","rd"], ["th"] 

import pandas as pd 
dt = pd.to_datetime(datetime)
date_time = {
                    "phrase":example_date,
                    "date": f"{dt.day}/{dt.month}/{dt.year}",
                    "time": f"{dt.hour}:{dt.minute}:{dt.second}",
                    "month":("January","February","March","April","May","June","July","August","September","October","November","December")[dt.month -1],
                    "day":("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")[dt.dayofweek],
                    "day of year":dt.dayofyear,
                    "suffix":f"{dt.day}{(stndrd + th*17 + stndrd + th * 7 + stndrd)[dt.month - 1]}",
                    "is month end":dt.is_month_end,
                    "is month start":dt.is_month_start,
                    "is quarter end":dt.is_quarter_end,
                    "is quarter start":dt.is_quarter_start,
                    "is year start":dt.is_year_start,
                    "is year end":dt.is_year_end,
                    "is weekend":dt.dayofweek in (6,7),
    }

date_time

In [0]:
example_date = f"{date_time['suffix']} {date_time['month']}"
example_date

# Google Correlate

In [0]:
from random import choice

time_frame = choice(("weekly","monthly"))
country_code = choice(("uk","us"))

print(time_frame,country_code)

In [0]:
import requests
gcorrelate_url = f"https://www.google.com/trends/correlate/search?e={example_date}&t={time_frame}&p={country_code}"
result = requests.get(gcorrelate_url)

from bs4 import BeautifulSoup as bs
soup = bs(result.text, "html.parser") 

correlated = set()
for entities in soup.find_all("li",attrs={"class":"result"}):
  for entity in entities:
    entity = entity.text.strip()
    for ent in entity.split('\n'):
      try:
        float(ent)
      except:
        correlated.add(ent)

print(correlated)

In [0]:
from matplotlib import pyplot as plt
from wordcloud import WordCloud

wordcloud = WordCloud(width = 800, height = 800, background_color ='white', min_font_size = 10).generate(' '.join(correlated))

plt.figure(figsize=[20,10])
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")