In [None]:
!pip install requests
!pip install spacy
!pip install SPARQLWrapper
!python -m spacy download en_core_web_sm

In [None]:
import requests
import spacy
import re
from SPARQLWrapper import SPARQLWrapper, JSON
import requests
import json
import SPARQLWrapper
from sentence_preprocessing import *
import time
from itertools import combinations

In [None]:
nlp = spacy.load('en_core_web_sm')

def sentence_preprocessing(sentence):
    doc = nlp(sentence)

    # Lemmatization
    lemmatized_words = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]

    named_entities = {ent.text: ent.label_ for ent in doc.ents}

    keywords = [token.text for token in doc if token.pos_ in {"NOUN", "VERB", "PROPN"} and not token.is_stop]

    return {
        "lemmatized_words": lemmatized_words,
        "named_entities": named_entities,
        "keywords": keywords
    }

In [None]:
# ConceptNet API
def query_conceptnet_api(term):
    obj = {}
    url = f'http://api.conceptnet.io/c/en/{term}'
    response = requests.get(url)
    if response.status_code == 200:
        obj = json.loads(response.text)
        return obj
    return obj

# Wikidata API
def query_wikidata_api(term):
    sparql = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery(f"""
        SELECT ?item ?itemLabel ?itemDescription
        WHERE
        {{
            ?item rdfs:label "{term}"@en.
            SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
        }}
    """)
    sparql.setReturnFormat(SPARQLWrapper.JSON)
    results = sparql.query().convert()
    return results

# DBpedia API
def query_dbpedia_api(terms):
    # Pair terms with each other with combinations
    # for entity1, entity2 in combinations(terms, 2):
    # Check if two entities have a relationship with each other
    sparql = SPARQLWrapper.SPARQLWrapper("http://dbpedia.org/sparql")
    results = []
    # sparql.setQuery(f"""
    #     SELECT ? WHERE {{
    #         dbr:{term} dbo:abstract ?abstract.
    #         FILTER (lang(?abstract) = 'en')
    #     }}
    #     LIMIT 1
    # """)
    # terms is a list of keywords
    # sparql.setQuery(f"""
    # SELECT ?relation ?relatedEntity WHERE {{
    #     dbr:{term} ?relation ?relatedEntity.
    #     ?entity1 rdfs:label "{entity1}"@en.
    #     ?entity2 rdfs:label "{entity2}"@en.
    # }}
    # LIMIT 10
    # """)
    for term in terms:
        cleaned_term = term.replace(" ", "_")
        sparql.setQuery(f"""
            SELECT ?relation ?relatedEntity WHERE {{
            dbr:{cleaned_term} ?relation ?relatedEntity.
            FILTER (lang(?relatedEntity) = 'en')
        }}
        LIMIT 10
        """)
        sparql.setReturnFormat(SPARQLWrapper.JSON)
        try:
            query_results = sparql.query().convert()
            bindings = query_results.get('results', {}).get('bindings', [])
            if bindings:
                results.append({term: bindings})
            else:
                print(f'No results found for {term}')
                results.append({term: []})
        except Exception as e:
            print(f'Error querying DBpedia for {term}: {e}')
            results.append({term: []})
    return results

# Google Knowledge Graph API
# current API key
# AIzaSyCubAMJlTcNAgRiZzSMt8S8RcqtYOwgjnI
def query_google_knowledge_graph_api(term, api_key):
    url = "https://kgsearch.googleapis.com/v1/entities:search"
    params = {
        "query": term,
        "limit": 1,
        "indent": True,
        "key": api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return json.loads(response.text)
    else:
        return {}

# Semantic Scholar API
def query_semantic_scholar_api(term):
    url = 'https://api.semanticscholar.org/v1/paper/search'
    params = {
        'query': term,
        'fields': 'title,abstract,authors,year,doi,url'
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        return json.loads(response.text)
    else:
        return {}

def query_knowledge_base(sentence):
    start_time = time.time()
    # Process the sentence
    processed_sentence = sentence_preprocessing(sentence)

    # Extract parts
    lemmatized_words = processed_sentence['lemmatized_words']
    named_entities = processed_sentence['named_entities']
    keywords = processed_sentence['keywords']
    print(f'lemmatized_words: {lemmatized_words}')
    print(f'named_entities: {named_entities}')
    print(f'keywords: {keywords}')

    print(f"Time taken for preprocessing: {time.time() - start_time} seconds")

    # # Query each API
    # conceptnet_results = [query_conceptnet_api(term) for term in named_entities.keys()]
    # # conceptnet_results = parse_conceptNet_response(conceptnet_results)
    # wikidata_results = [query_wikidata_api(term) for term in named_entities.keys()]
    # wikidata_results = parse_wikidata_response(wikidata_results)
    dbpedia_results = [query_dbpedia_api([item for item in named_entities.keys()])]
    dbpedia_results = dbpedia_results[0][0]['Albert Einstein'][1]['relatedEntity']['value']
    vals = []
    for word in named_entities.keys():
        if word.lower() in dbpedia_results.lower():
            vals.append(f'{word}: True')
        else:
            vals.append(f'{word}: False')
    print(vals)

    # dbpedia_results = parse_dbpedia_response(dbpedia_results)
    # google_knowledge_graph_results = [query_google_knowledge_graph_api(sentence, "AIzaSyCubAMJlTcNAgRiZzSMt8S8RcqtYOwgjnI") for term in named_entities.keys()]
    # # google_knowledge_graph_results = parse_google_knowledge_graph_response(google_knowledge_graph_results)
    # semantic_scholar_results = [query_semantic_scholar_api(term) for term in keywords]
    # # semantic_scholar_results = parse_semantic_scholar_response(semantic_scholar_results)

    print(f"Time taken for querying APIs: {time.time() - start_time} seconds")

    return {
        # "conceptnet": conceptnet_results,
        # "wikidata": wikidata_results,
        "dbpedia": dbpedia_results #,
        # "google_knowledge_graph": google_knowledge_graph_results,
        # "semantic_scholar": semantic_scholar_results
    }

def main():
    sentence = "Albert Einstein developed the Theory of Relativity in 1905"
    results = query_knowledge_base(sentence)
    for key, value in results.items():
        print(f"Results from {key}: {value}")
        print()
        print()

if __name__ == "__main__":
    main()