# Import REGEX library and punctuation data

In [27]:
import re
from string import punctuation

In [28]:
# PREFIX books: <http://www.book-discovery.com/ontologies#>

#Query1: Thriller books with at least 4 star ratings.
#Query2: Medium or large sized books authored by Chetan bhagat and published in India.
#Query3: Books that were published after 2010 and have been reviewed as best book ever. 
#Query4: Show books whose writes received International Author Nomination and have price less than £60.
#Query5: Books that received National Book Award grouped by their Publisher.

# Query 1: =========================

#SELECT ?ISBN ?Book_Name ?Genre ?Average_Rating ?Author ?Description
#WHERE{
#   {?book a books:Book}
#   {?book books:has_genre ?genre}
#   {?book books:name ?Book_Name}
#   {?book books:has_average_rating ?review}
#   {?review books:average_rating ?Avg_Rating}
#   {?genre books:name ?Genre}
#   FILTER (?Genre = "Thriller" && ?Avg_Rating >= 4)
#   BIND(CONCAT(STR(?Avg_Rating), " stars") AS ?Average_Rating)
#   {?book books:isbn ?ISBN}
#   {?book books:description ?Description}
#   {?book books:written_by ?AuthorObj}
#   {?AuthorObj books:name ?Author}
# }
#ORDER BY ?Book_Name

# Query 2: =========================

#SELECT ?ISBN ?Book_Name ?Country_Name ?Author_Name (?Size_Name AS ?Size) ?Book_Price ?Genre
#WHERE {
#  {?book a books:Book}
#  {?book books:name ?Book_Name}
#  {?book books:first_published_country ?country}
#  {?book books:written_by ?author}
#  {?author books:name ?Author_Name}
#  {?book books:has_size ?size}
#  {?size books:name ?Size_Name}
#  FILTER (?Author_Name = "Chetan Bhagat" && (?Size_Name = "Medium" || ?Size_Name = "Large"))
#  {
#    SELECT *
#    WHERE {
#      {?country books:name ?Country_Name}
#      FILTER (?Country_Name = "India")
#    }
#  }
#  {?book books:has_genre ?genre}
#  {?genre books:name ?Genre}
#  {?book books:isbn ?ISBN}
#  {?book books:price ?Price}
#  BIND(CONCAT("£",STR(?Price)) AS ?Book_Price)
#}

# Query 3: =========================

#SELECT ?ISBN ?Book_Name ?Review ?Publish_Year ?Author_Name ?Average_Rating ?Genre
#WHERE{
#   {?book a books:Book}
#   {?book books:written_by ?author}
#   {?author books:name ?Author_Name}
#   {?book books:name ?Book_Name}
#   {?book books:publish_year ?Publish_Year}
#   {?book books:has_review ?ReviewObj}
#   {?ReviewObj books:name ?Review}
#   FILTER (?Review = "Best Book Ever" && ?Publish_Year>2010)
#   {?book books:has_average_rating ?review}
#   {?review books:average_rating ?Avg_Rating}
#   BIND(CONCAT(STR(?Avg_Rating), " stars") AS ?Average_Rating)
#   {?book books:isbn ?ISBN}
#   {?book books:has_genre ?genreObj}
#   {?genreObj books:name ?Genre}
# }
#ORDER BY ?Publish_Year


# Query 4: =========================

#SELECT ?ISBN ?Book_Name ?Author_Name ?Book_Price ?Nomination ?Genre
#WHERE{
#   {?book a books:Book}
#   {?book books:written_by ?author}
#   {?book books:name ?Book_Name}
#   {?book books:price ?Price}
#   BIND(CONCAT("£",STR(?Price)) AS ?Book_Price)
#   {?author books:name ?Author_Name}
#   {?author books:nominated_for ?nominationObj}
#   {?nominationObj books:name ?Nomination}
#   FILTER (?Nomination = "International Author Nomination" && ?Price<60)
#   {?book books:has_average_rating ?review}
#   {?review books:average_rating ?Avg_Rating}
#   BIND(CONCAT(STR(?Avg_Rating), " stars") AS ?Average_Rating)
#   {?book books:isbn ?ISBN}
#   {?book books:has_genre ?genreObj}
#   {?genreObj books:name ?Genre}
# }
#ORDER BY ?Price


# Query 5: =========================

#SELECT ?Publisher_Name (GROUP_CONCAT(?ISBN; SEPARATOR=", ") AS ?ISBNs) (GROUP_CONCAT(?Book_Name; SEPARATOR=", ") AS ?Book_Names) (GROUP_CONCAT(?Author_Name; SEPARATOR=", ") AS ?Author_Names) (GROUP_CONCAT(?Award_Name; SEPARATOR=", ") AS ?Awards) (GROUP_CONCAT(?Genre; SEPARATOR=", ") AS ?Genres)
#WHERE{
#   {?book a books:Book}
#   {?book books:written_by ?author}
#   {?author books:name ?Author_Name}
#   {?book books:name ?Book_Name}
#   {?book books:published_by ?publisher}
#   {?publisher books:name ?Publisher_Name}
#   {?book books:receive_award ?award}
#   {?award books:name ?Award_Name}
#   FILTER (?Award_Name = "National Book Award")
#   {?book books:isbn ?ISBN}
#   {?book books:has_genre ?genreObj}
#   {?genreObj books:name ?Genre}
# }
#GROUP BY ?Publisher_Name
#ORDER BY ?Author_Names


# Import libraries from NLTK to Tokenize a sentence

In [29]:
import nltk 
from nltk.tokenize import word_tokenize
#nltk.download('punkt') # commented since already downloaded

# Import libraries for Lemmatization

In [30]:
from nltk.stem import WordNetLemmatizer
#nltk.download('wordnet')

# Write functions to normalise and lammatize

In [31]:
def normalize(text):
    processed_text =  re.sub(f"[{re.escape(punctuation)}]", "", text)
    processed_text = " ".join(processed_text.split())
    return processed_text

def lemmatize(processed_text):
    wordnet_lemmatizer = WordNetLemmatizer()
    tokens = word_tokenize(processed_text)
    required_words = [wordnet_lemmatizer.lemmatize(x, 'v') for x in  tokens]
    sentence_with_lemmnatized_word = " ".join(required_words)
    return sentence_with_lemmnatized_word

def process_text(text):
    text = normalize(text)
    text = lemmatize(text)
    return text

# POS Tagging 

In [32]:
# nltk.download('averaged_perceptron_tagger')

In [33]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

def create_pos_tags(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent




# Import Stanza to recognise Entities

You may need to install stanza if it is not already there using pip install stanza

In [34]:
import stanza
# stanza.download('en')

def get_named_entities(text):
    nlp = stanza.Pipeline ('en', download_method=stanza.DownloadMethod.NONE)
    results = nlp (text)
    return results.entities

# Bag of Keywords Mapping

In [35]:
# Bag of Words
prop_list = [['has_genre','has genres', 'with genre','genres','genre'], ['has_genre', 'with genre','genres']]
class_list = [['book', 'books'],['person', 'people'],['language']]

In [36]:
def map_property(word, prop_list):
    mapped_property = ""
    index = [[i, prop.index(word)]
             for i, prop in enumerate(prop_list)
             if word in prop]
    if len(index) > 0:
        mapped_property = prop_list[index[0][0]][0]
    return mapped_property

def map_class(word, class_list):
    mapped_class = ""
    index = [[i, cls.index(word)]
             for i, cls in enumerate(class_list)
             if word in cls]
    if len(index) > 0:
        mapped_class = class_list[index[0][0]][0]
    return mapped_class

# Code

In [37]:
#text = "all of the movies     by Greg Nicotero"
#text = "all of the movies directed    by Greg Nicotero"
#text = "list cast of The Walking Dead"
#text = "movies in English"
#text = "what is duration of Titanic?"


# text = "all books with Thriller genre"
text = "all books with Thriller genre"


In [38]:
processed_text = process_text(text)
tags = create_pos_tags(processed_text)
tags

[('all', 'DT'),
 ('book', 'NN'),
 ('with', 'IN'),
 ('Thriller', 'NNP'),
 ('genre', 'NN')]

In [39]:
#nltk.download('maxent_ne_chunker')
#nltk.download('words')
#!pip install pattern
from pattern.text.en import singularize

tagged_chuncks = nltk.ne_chunk(tags) 
# Iterate over the named entities and print their labels 
classes = []
properties = []
individuals = {}

for entity in tagged_chuncks: 
    if hasattr(entity, "label"): 
         print("check entity: ",entity)
    else:
        if entity[1] == "NNS":
            mapped_class = map_class(singularize(entity[0]), class_list)
            if mapped_class!="":
                classes.append(mapped_class.capitalize())
        elif entity[1] == "VBP":
            mapped_property = map_property(entity[0], prop_list)
            if mapped_property!="": 
                properties.append(mapped_property)
            
        elif entity[1] == "NN":
            mapped_property = map_property(entity[0], prop_list)
            mapped_class = map_class(singularize(entity[0]), class_list)
            if mapped_property!="": 
                properties.append(mapped_property)
            elif mapped_class!="":
                classes.append(mapped_class.capitalize())

######################## CREATE INDIVIDUALS ##############################
named_entities = get_named_entities(text)

for e in named_entities: 
    print(e)
    mapped_ind_class = map_class(singularize(e.type.lower()), class_list)
    if mapped_ind_class!="":
        individuals[e.text] = mapped_ind_class.capitalize()
    else:
        individuals[e.text] = "not_mapped"
        #classes.append(mapped_ind_class.capitalize())

print("Classes: ",classes)
print("Properties: ",properties)
print("Individuals: ",individuals)

check entity:  (PERSON Thriller/NNP)


2023-11-26 12:49:34 INFO: Loading these models for language: en (English):
| Processor    | Package             |
--------------------------------------
| tokenize     | combined            |
| pos          | combined_charlm     |
| lemma        | combined_nocharlm   |
| constituency | ptb3-revised_charlm |
| depparse     | combined_charlm     |
| sentiment    | sstplus             |
| ner          | ontonotes_charlm    |

2023-11-26 12:49:34 INFO: Using device: cpu
2023-11-26 12:49:34 INFO: Loading: tokenize
2023-11-26 12:49:34 INFO: Loading: pos
2023-11-26 12:49:35 INFO: Loading: lemma
2023-11-26 12:49:35 INFO: Loading: constituency
2023-11-26 12:49:36 INFO: Loading: depparse
2023-11-26 12:49:36 INFO: Loading: sentiment
2023-11-26 12:49:37 INFO: Loading: ner
2023-11-26 12:49:38 INFO: Done loading processors!


{
  "text": "Thriller",
  "type": "WORK_OF_ART",
  "start_char": 15,
  "end_char": 23
}
Classes:  ['Book']
Properties:  ['has_genre']
Individuals:  {'Thriller': 'not_mapped'}


In [40]:
from SPARQLWrapper import SPARQLWrapper2

In [41]:
sparql = SPARQLWrapper2("http://localhost:3030/BookDiscoveryAppQueries/query")
c_triple = ""
p_triple = ""
i_triple = ""


if len(classes)>0:
    c_triple = "{?y a book:"+classes[0]+"}"
    if(len(individuals)>0):
        c_triple = c_triple + "{?x (book:|!book:)|^(book:|!book:)* ?y}"
if len(properties)>0:
    p_triple = "{?x book:"+properties[0]+"|^book:"+properties[0]+" ?y}"

    
if  len(individuals)>0:
    first_key = next(iter(individuals))
    if(individuals[first_key]=="not_mapped"):
        if(len(properties)==0):
            i_triple = "{?x a ?y} {?x book:name ?name} FILTER(?name='"+first_key+"')"            
        else:
            i_triple = " {?x book:name ?name} FILTER(?name='"+first_key+"')"
    else:
         i_triple = "{?x a book:"+individuals[first_key]+"} {?x book:name ?name} FILTER(?name='"+first_key+"')"
            
query_start =      """
                PREFIX book:<http://www.book-discovery.com/ontologies#>
                SELECT  ?y
                WHERE{
                """
query_end =    "}"

query = query_start+c_triple+p_triple+i_triple+query_end
print(query)
sparql.setQuery(query)
result = sparql.query().bindings


                PREFIX book:<http://www.book-discovery.com/ontologies#>
                SELECT  ?y
                WHERE{
                {?y a book:Book}{?x (book:|!book:)|^(book:|!book:)* ?y}{?x book:has_genre|^book:has_genre ?y} {?x book:name ?name} FILTER(?name='Thriller')}


In [42]:
for x in result:
    print(x["y"].value)

http://www.book-discovery.com/ontologies#life_of_pi
http://www.book-discovery.com/ontologies#sacred_games
http://www.book-discovery.com/ontologies#400_days
