In [8]:
# Import libraries from NLTK to Tokenize a sentence
import nltk 
from nltk.tokenize import word_tokenize

# Import libraries and punctuation data
import re
from string import punctuation

# Import libraries for Lemmatization
from nltk.stem import WordNetLemmatizer

# POS Tagging
from nltk.tag import pos_tag
import stanza
from pattern.text.en import singularize
from SPARQLWrapper import SPARQLWrapper2
from tabulate import tabulate

In [2]:
# Method to normalise

def text_normalization(text):
    processed_text =  re.sub(f"[{re.escape(punctuation)}]", "", text)
    processed_text = " ".join(processed_text.split())
    return processed_text

# Method to lammatize

def text_lemmatization(processed_text):
    wordnet_lemmatizer = WordNetLemmatizer()
    tokens = word_tokenize(processed_text)
    required_words = [wordnet_lemmatizer.lemmatize(x, 'v') for x in  tokens]
    sentence_with_lemmnatized_word = " ".join(required_words)
    return sentence_with_lemmnatized_word

# Method to process text

def text_processing(text):
    text = text_normalization(text)
    text = text_lemmatization(text)
    return text

# Method for POS tagging

def pos_tags_creation(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

In [3]:
# Import Stanza to recognise Entities

def fetch_named_entities(text):
    nlp = stanza.Pipeline('en', download_method=stanza.DownloadMethod.NONE)
    results = nlp(text)
    return results.entities

In [4]:
# Bag of keywords

properties_list = [['has_genre','has genres', 'with genre','genre'], 
             ['written_by','written by', 'by author', 'author'], 
             ['receive_award', 'receive award', 'received', 'receive', 'awarded', 'award'], 
             ['first_published_country', 'first published country', 'first', 'published', 'country'], 
             ['has_category', 'has category', 'category']]

classes_list = [['book', 'books'], 
              ['author', 'authors', 'writer'], 
              ['person', 'people'], 
              ['award', 'awards'], 
              ['country', 'countries'], 
              ['category', 'categories']]

In [5]:
# Mapping the bag of keywords

def property_mappings(word, properties_list):
    mapped_property = ""
    index = [[i, prop.index(word)]
             for i, prop in enumerate(properties_list)
             if word in prop]
    if len(index) > 0:
        mapped_property = properties_list[index[0][0]][0]
    return mapped_property

def classes_mappings(word, classes_list):
    mapped_class = ""
    index = [[i, cls.index(word)]
             for i, cls in enumerate(classes_list)
             if word in cls]
    if len(index) > 0:
        mapped_class = classes_list[index[0][0]][0]
    return mapped_class

In [38]:
# Queries

text = "all books with Thriller genre"
# text = "all books by author Chetan Bhagat"
# text = "all books awarded with Goodreads Choice Awards"
# text = "all books with published country as India"
# text = "all books Top Rated category"

In [39]:
processed_text = text_processing(text)
tags = pos_tags_creation(processed_text)
tags

[('all', 'DT'),
 ('book', 'NN'),
 ('Top', 'NNP'),
 ('Rated', 'NNP'),
 ('category', 'NN')]

In [40]:
# Iterate over the named entities and print their labels

tagged_chuncks = nltk.ne_chunk(tags) 
classes = []
properties = []
individuals = {}

for entity in tagged_chuncks: 
    if hasattr(entity, "label"): 
         print("check entity: ",entity)
    else:
        if entity[1] == "NNS":
            mapped_class = classes_mappings(singularize(entity[0]), classes_list)
            if mapped_class!="":
                classes.append(mapped_class.capitalize())
        elif entity[1] == "VBP":
            mapped_property = property_mappings(entity[0], properties_list)
            if mapped_property!="": 
                properties.append(mapped_property)
            
        elif entity[1] == "NN":
            mapped_property = property_mappings(entity[0], properties_list)
            mapped_class = classes_mappings(singularize(entity[0]), classes_list)
            if mapped_property!="": 
                properties.append(mapped_property)
            elif mapped_class!="":
                classes.append(mapped_class.capitalize())
                
# CREATE INDIVIDUALS
named_entities = fetch_named_entities(text)

for e in named_entities: 
    print(e)
    mapped_ind_class = classes_mappings(singularize(e.type.lower()), classes_list)
    if mapped_ind_class!="":
        individuals[e.text] = mapped_ind_class.capitalize()
    else:
        individuals[e.text] = "not_mapped"
        #classes.append(mapped_ind_class.capitalize())

print("Classes: ",classes)
print("Properties: ",properties)
print("Individuals: ",individuals)

2023-11-26 20:09:12 INFO: Loading these models for language: en (English):
| Processor    | Package             |
--------------------------------------
| tokenize     | combined            |
| pos          | combined_charlm     |
| lemma        | combined_nocharlm   |
| constituency | ptb3-revised_charlm |
| depparse     | combined_charlm     |
| sentiment    | sstplus             |
| ner          | ontonotes_charlm    |

2023-11-26 20:09:12 INFO: Using device: cpu
2023-11-26 20:09:12 INFO: Loading: tokenize
2023-11-26 20:09:12 INFO: Loading: pos


check entity:  (PERSON Top/NNP Rated/NNP)


2023-11-26 20:09:13 INFO: Loading: lemma
2023-11-26 20:09:13 INFO: Loading: constituency
2023-11-26 20:09:13 INFO: Loading: depparse
2023-11-26 20:09:14 INFO: Loading: sentiment
2023-11-26 20:09:15 INFO: Loading: ner
2023-11-26 20:09:16 INFO: Done loading processors!


{
  "text": "Top Rated",
  "type": "WORK_OF_ART",
  "start_char": 10,
  "end_char": 19
}
Classes:  ['Book']
Properties:  ['has_category']
Individuals:  {'Top Rated': 'not_mapped'}


In [41]:
sparql = SPARQLWrapper2("http://localhost:3030/BookDiscoveryAppQueries/query")
c_triple = ""
p_triple = ""
i_triple = ""


if len(classes)>0:
    c_triple = "{?y a book:"+classes[0]+"}"
    if(len(individuals)>0):
        c_triple = c_triple + "{?x (book:|!book:)|^(book:|!book:)* ?y}"
if len(properties)>0:
    p_triple = "{?x book:"+properties[0]+"|^book:"+properties[0]+" ?y}"

    
if  len(individuals)>0:
    first_key = next(iter(individuals))
    if(individuals[first_key]=="not_mapped"):
        if(len(properties)==0):
            i_triple = "{?x a ?y} {?y book:name ?bname} {?x book:name ?name} FILTER(?name='"+first_key+"')"            
        else:
            i_triple = "{?y book:name ?bname} {?x book:name ?name} FILTER(?name='"+first_key+"')"
    else:
         i_triple = "{?x a book:"+individuals[first_key]+"} {?y book:name ?bname} {?x book:name ?name} FILTER(?name='"+first_key+"')"
            
query_start =      """
                PREFIX book:<http://www.book-discovery.com/ontologies#>
                SELECT *
                WHERE{
                """
query_end =    "}"

query = query_start+c_triple+p_triple+i_triple+query_end
print(query)
sparql.setQuery(query)
results = sparql.query().bindings


                PREFIX book:<http://www.book-discovery.com/ontologies#>
                SELECT *
                WHERE{
                {?y a book:Book}{?x (book:|!book:)|^(book:|!book:)* ?y}{?x book:has_category|^book:has_category ?y}{?y book:name ?bname} {?x book:name ?name} FILTER(?name='Top Rated')}


In [42]:
def get_query_results(table_columns, results):
    table_values = [table_columns]
    for result in results:
        column_values = [result["bname"].value, result["name"].value, result["y"].value]
        table_values.append(column_values)
    return table_values

In [19]:
# Query1: All books with Thriller genre

table_columns = ['Book', 'Genre', 'Book URI']
table_values = get_query_results(table_columns, results)
print(tabulate(table_values, headers="firstrow"))

Book          Genre     Book URI
------------  --------  -----------------------------------------------------
Life Of Pi    Thriller  http://www.book-discovery.com/ontologies#life_of_pi
Sacred Games  Thriller  http://www.book-discovery.com/ontologies#sacred_games
400 days      Thriller  http://www.book-discovery.com/ontologies#400_days


In [25]:
# Query2: All books by author Chetan Bhagat

table_columns = ['Book', 'Author', 'Book URI']
table_values = get_query_results(table_columns, results)
print(tabulate(table_values, headers="firstrow"))

Book             Author         Book URI
---------------  -------------  --------------------------------------------------------
Revolution 2020  Chetan Bhagat  http://www.book-discovery.com/ontologies#revolution_2020
400 days         Chetan Bhagat  http://www.book-discovery.com/ontologies#400_days


In [31]:
# Query3: All books awarded with Goodreads Choice Awards

table_columns = ['Book', 'Author', 'Book URI']
table_values = get_query_results(table_columns, results)
print(tabulate(table_values, headers="firstrow"))

Book             Author                   Book URI
---------------  -----------------------  --------------------------------------------------------
A Promised Land  Goodreads Choice Awards  http://www.book-discovery.com/ontologies#a_promised_land
Revolution 2020  Goodreads Choice Awards  http://www.book-discovery.com/ontologies#revolution_2020
400 days         Goodreads Choice Awards  http://www.book-discovery.com/ontologies#400_days


In [37]:
# Query4: All books with published country as India

table_columns = ['Book', 'Author', 'Book URI']
table_values = get_query_results(table_columns, results)
print(tabulate(table_values, headers="firstrow"))

Book             Author    Book URI
---------------  --------  --------------------------------------------------------
Revolution 2020  India     http://www.book-discovery.com/ontologies#revolution_2020
Sacred Games     India     http://www.book-discovery.com/ontologies#sacred_games
400 days         India     http://www.book-discovery.com/ontologies#400_days


In [43]:
# Query5: All books Top Rated category

table_columns = ['Book', 'Author', 'Book URI']
table_values = get_query_results(table_columns, results)
print(tabulate(table_values, headers="firstrow"))

Book             Author     Book URI
---------------  ---------  --------------------------------------------------------
Harry Potter     Top Rated  http://www.book-discovery.com/ontologies#harry_potter
Revolution 2020  Top Rated  http://www.book-discovery.com/ontologies#revolution_2020
400 days         Top Rated  http://www.book-discovery.com/ontologies#400_days
