In [None]:
import requests
from sketch.examples.prompt_machine import *
import json
import os
import sqlite3
import random

PM_SETTINGS["VERBOSE"] = False

In [None]:
import pandas as pd
import io
import urllib.parse

async def get_sparql_wikidata_result(sparql):
    query = sparql
    url = "https://query.wikidata.org/sparql"
    # requst to get including a header to accept text/csv
    headers = {"Accept": "text/csv"}
    async with aiohttp.ClientSession() as session:
            async with session.get(
                url, params={"query": query}, headers=headers
            ) as response:
                text = await response.text()
                data = pd.read_csv(io.StringIO(text), sep=",")
                return data


In [None]:
async def search_wikidata(topic):
    url = "https://www.wikidata.org/w/api.php?action=wbsearchentities&search={topic}&language=en&limit=30&continue=10&format=json&uselang=en&type=item&origin=*"
    async with aiohttp.ClientSession() as session:
            async with session.get(
                url.format(topic=urllib.parse.quote(topic))
            ) as response:
                data = await response.json()
                return pd.DataFrame(data.get("search", []))

In [None]:
async def search_wikidata(topic, property=False):
    if property:
        extra_args = "&type=property"
    else:
        extra_args = ""
    url = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&search={urllib.parse.quote(topic)}&language=en&limit=30&continue=10&format=json&uselang=en&type=item&origin=*{extra_args}"
    async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                data = await response.json()
                return pd.DataFrame(data.get("search", []))

In [None]:
# test = await get_sparql_wikidata_result("""
# #Pokémon!
# # Updated 2020-06-17

# # Gotta catch 'em all
# SELECT DISTINCT ?pokemon ?pokemonLabel ?pokedexNumber
# WHERE
# {
#     ?pokemon wdt:P31/wdt:P279* wd:Q3966183 .
#     ?pokemon p:P1685 ?statement.
#     ?statement ps:P1685 ?pokedexNumber;
#               pq:P972 wd:Q20005020.
#     FILTER (! wikibase:isSomeValue(?pokedexNumber) )
#     SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
# }
# ORDER BY (?pokedexNumber)
# """)

In [None]:
gpt3_zeroshot_sparql = asyncGPT3Prompt("gpt3_zeroshot_sparql",
"""
Related entities (search result) for topics and properties
{{ context }}
----
SPARQL (to be run on wikidata) for question [{{ question }}]:
```""",
stop="```",
temperature =0.4,
model_name="code-davinci-002"
)

In [None]:
gpt3_get_topics_from_question = asyncGPT3Prompt(
    "gpt3_get_topics_from_question",
"""
What are the top few (up to 5) entities (and topics) in the question, that need to be found in the wikidata database as entities: [{{ question }}]?
1."""
)

gpt3_get_properties_from_question = asyncGPT3Prompt(
    "gpt3_get_properties_from_question",
"""
What are the top few (up to 5) properties in the question, that need to be found in the wikidata database as properties of entities: [{{ question }}]?
1."""
)
import re

async def get_topics_from_question(question):
    results = await gpt3_get_topics_from_question(question=question)
    topics = [re.sub(r"^\d*\.", "", line).strip() for line in results.split("\n")]
    return topics

get_topics_from_question_prompt = asyncPrompt(
    "get_topics_from_question_prompt",
    get_topics_from_question)

async def get_properties_from_question(question):
    results = await gpt3_get_properties_from_question(question=question)
    props = [re.sub(r"^\d*\.", "", line).strip() for line in results.split("\n")]
    return props

get_properties_from_question_prompt = asyncPrompt(
    "get_properties_from_question_prompt",
    get_properties_from_question)

async def get_wikidata_entities_for_topics(topics, properties):
    results = await asyncio.gather(*[search_wikidata(topic) for topic in topics])
    def pretty_print_search(search_result, first_n=5):
        if len(search_result) == 0:
            return "No results"
        return search_result[["id", "label", "description"]].iloc[:first_n].to_csv(index=False)
    topic_part = "\n".join([f"Topic [{topic}]\n{pretty_print_search(result)}" for topic, result in zip(topics, results)])

    results = await asyncio.gather(*[search_wikidata(property, property=True) for property in properties])
    property_part = "\n".join([f"Property [{property}]\n{pretty_print_search(result, first_n=3)}" for property, result in zip(properties, results)])
    return topic_part + "\n" + property_part

async def get_context_for_question(question):
    topics = await get_topics_from_question_prompt(question=question)
    properties = await get_properties_from_question_prompt(question=question)
    return await get_wikidata_entities_for_topics(topics, properties)

get_context_for_question_prompt = asyncPrompt(
    "get_context_for_question_prompt",
    get_context_for_question)

In [None]:
await gpt3_get_topics_from_question("What are the slowest moving animals?")

In [None]:
async def get_data_for_question(question):
    # 4. Call gpt3 for a completion to sparql
    # 5. Execute sparql against wikidata
    # 6. Return the results
    wikidata_context = await get_context_for_question_prompt(question)
    sparql = await gpt3_zeroshot_sparql(context=wikidata_context, question=question)
    print("Sparql result...", {sparql})
    return await get_sparql_wikidata_result(sparql)

In [None]:
await get_data_for_question("What is the capital of france?")

In [None]:
await get_data_for_question("How many people over the age of 30 live in Japan?")

In [None]:
await get_data_for_question("What are the 5 tallest buildings in Japan, and what are their heights (in meters)?")

In [None]:
await get_data_for_question("In the country Japan, what are the 10 tallest buildings, their heights (in meters), and construction date?")

In [None]:
await get_data_for_question("What is the best Japanese food?")

In [None]:
await get_data_for_question("How many skyscrapers are there in tokyo?")

In [None]:
await get_data_for_question("What is the most visited train station in Tokyo?")

In [None]:
await get_data_for_question("What are the largest economies in the world and what do they trade?")

In [None]:
await get_data_for_question("What are the most popular human eye colors")

In [None]:
await get_data_for_question("What color eyes do most people have?")

In [None]:
await get_data_for_question("What are the locations that are best to travel to in Japan?")

In [None]:
await get_data_for_question("How many border crossing checkpoints are there between Russia and Ukraine?")