In [23]:
import requests
from SPARQLWrapper import SPARQLWrapper, JSON
import csv
import random

# DBpedia endpoint URL
endpoint = "https://dbpedia.org/sparql"

# SPARQL query to retrieve data for countries with population greater than 30,000,000
query = """
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbp: <http://dbpedia.org/property/>

SELECT DISTINCT ?country ?capital ?city1 ?city2 ?city3
WHERE {
  ?country a dbo:Country ;
           dbp:populationTotal ?pop ;
           dbo:capital ?capital ;
           dbo:largestCity ?city1 ;
           dbo:largestCity ?city2 ;
           dbo:largestCity ?city3 .
  FILTER (?pop > 30000000 && ?city1 != ?capital && ?city2 != ?capital && ?city3 != ?capital)
}
ORDER BY RAND()
LIMIT 5
"""

# Set up the SPARQL connection
sparql = SPARQLWrapper(endpoint)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Send the SPARQL query and handle the response
results = sparql.query().convert()

# Randomize the order of the results
random.shuffle(results["results"]["bindings"])

# Write the result data to a CSV file
with open("country_data.csv", "w", newline="") as f:
    writer = csv.writer(f)
    
    # Write the header row
    writer.writerow(["Country", "Capital", "Largest City 1", "Largest City 2", "Largest City 3"])
    
    # Write data for each country
    for result in results["results"]["bindings"]:
        country = result["country"]["value"].split("/")[-1]
        capital = result["capital"]["value"].split("/")[-1]
        city1 = result["city1"]["value"].split("/")[-1]
        city2 = result["city2"]["value"].split("/")[-1]
        city3 = result["city3"]["value"].split("/")[-1]
        writer.writerow([country, capital, city1, city2, city3])


In [32]:
from SPARQLWrapper import SPARQLWrapper, JSON
import random

# 设置SPARQL endpoint
sparql = SPARQLWrapper("https://dbpedia.org/sparql")

# 编写SPARQL查询语句
query = """
SELECT ?countryName
WHERE {
  ?country a dbo:Country ;
           rdfs:label ?countryName ;
           dbp:populationTotal ?population .
  FILTER (langMatches(lang(?countryName), "en") && ?population > 30000000)
}
ORDER BY RAND()
LIMIT 5
"""

# 执行SPARQL查询
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# 输出结果
print("随机5个人口大于30000000的国家名字：")
for result in results["results"]["bindings"]:
    print(result["countryName"]["value"])


随机5个人口大于30000000的国家名字：
South India
Southeastern United States
Appalachia
Northeast India
Southern United States


In [52]:
import requests
import random

# 从DBpedia SPARQL端点获取人口大于30000000的国家名字
query = '''
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX dbp: <http://dbpedia.org/property/>

SELECT ?country
WHERE {
  ?country a dbo:Country ;
           dbp:populationTotal ?pop .
  FILTER(?pop > 70000000) .
}
'''

endpoint = 'https://dbpedia.org/sparql'
params = {'query': query, 'format': 'json'}
response = requests.get(endpoint, params=params)
results = response.json()['results']['bindings']

# 随机选择10个结果并提取国家名字
countries = [result['country']['value'].split('/')[-1] for result in random.sample(results, 5)]

# 输出结果
print('随机选择的10个国家：')
for i, country in enumerate(countries):
    print(f'{i+1}. {country}')


随机选择的10个国家：
1. Appalachia
2. Southern_United_States
3. Silesia
4. Southeastern_United_States
5. Western_United_States


In [87]:
from SPARQLWrapper import SPARQLWrapper, JSON
import random


class QuestionGenerator:
    def __init__(self):
        self.sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
        self.sparql.setReturnFormat(JSON)

    def new_question(self, country_name):
        # 获取人口大于 30000000 的国家实体及其人口信息
        # 构建查询，使用变量传递实体名称
        query = f"""
            SELECT DISTINCT ?capital ?capitalLabel WHERE {{
              ?country rdfs:label "{country_name}"@en ;
                       wdt:P36 ?capital .
              SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
            }}
            """
        self.sparql.setQuery(query)
        results = self.sparql.query().convert()

        # 检查结果是否为空
        if len(results["results"]["bindings"]) == 0:
            error_message = f"Sorry, we could not find enough information about {country_name}. Please try another question."
            return {"description": error_message, "options": "", "correct_answer": None}

        # 随机选择一个国家实体并获取其首都和其他城市
        selected_country = random.choice(results["results"]["bindings"])
        country_id = selected_country["country"]["value"].split("/")[-1]  # 获取实体 ID

        # 检查字典中是否包含所需的键
        if "capitalLabel" not in selected_country or "countryLabel" not in selected_country:
            error_message = f"Sorry, we could not find enough information about {country_name}. Please try another question."
            return {"description": error_message, "options": "", "correct_answer": None}

        # 从选定的国家实体中提取首都名称和其他城市列表
        country_name = selected_country["countryLabel"]["value"]
        query = """
            SELECT DISTINCT ?capital ?capitalLabel ?city ?cityLabel WHERE {
              VALUES ?country { wd:%s }
              ?country wdt:P36 ?capital ;
                       wdt:P17 ?place .
              ?city wdt:P131+ ?place ;
                    wdt:P31/wdt:P279* wd:Q515 .
              FILTER (?capital != ?city)
              SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
            }
            """ % (country_id)  # 在查询中使用正确的实体 ID
        self.sparql.setQuery(query)
        results = self.sparql.query().convert()

        # 处理结果为空的情况
        if len(results["results"]["bindings"]) == 0:
            error_message = f"Sorry, we could not find enough information about {country_name}. Please try another question."
            return {"description": error_message, "options": "", "correct_answer": None}

        # 如果实体数量不足 3 个，则返回全部实体；否则随机排序首都和其他城市，并记录正确选项的位置
        # 如果实体数量不足 3 个，则返回全部实体
        options = []
        correct_answer = None
        if len(results["results"]["bindings"]) < 3:      
            options_set = set()
            for option in results["results"]["bindings"]:
                option_name = option["capitalLabel"]["value"]
                if option_name not in options_set:
                    options_set.add(option_name)
                    options.append(f"{len(options) + 1}. {option_name}")
                    if option == selected_country:
                        correct_answer = len(options) - 1
            while len(options) < 3:
                random_option = random.choice(results["results"]["bindings"])["capitalLabel"]["value"]
                if random_option not in options_set:
                    options_set.add(random_option)
                    options.append(f"{len(options) + 1}. {random_option}")
        else:   # 实体数量足够 3 个
            options_list = random.sample(results["results"]["bindings"], 3)
            for index, option in enumerate(options_list):
                option_name = option["capitalLabel"]["value"]
                options.append(f"{index + 1}. {option_name}")
                if option == selected_country:
                    correct_answer = index
            if correct_answer is None:
                correct_answer = random.randint(0, 2)

        # 生成题目及选项并返回
        options_text = "\n".join(options)
        question = {
            "description": f"What is the capital city of {country_name}?",
            "options": options_text,
            "correct_answer": correct_answer
        }
        return question


qg = QuestionGenerator()
country_name = "China"
for i in range(3):
    question = qg.new_question(country_name)
    print(question["description"])
    print(question["options"])
    print("-" * 10)


KeyError: 'country'

In [2]:
import requests
def _query(q):
    """
    Queriaes the dbpedia sparql endpoint.
    :param q: Query string.
    :type q: str
    :return: Query answer.
    :rtype: dict
    """
    try:
        params = {'query': q}
        resp = requests.get(db_url, params=params, headers={'Accept': 'application/json'})
        return resp.text
    except Exception as e:
        print(e)
        raise


In [3]:
db_url = "http://dbpedia.org/sparql"

In [7]:
query_string = f"""PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?label_subject ?object ?label_object ?label_predicate ?range
    WHERE {{
        ?subject dbo:wikiPageLength ?wikipagelength.
        ?subject rdfs:label ?label_subject.
        OPTIONAL {{?object rdfs:label ?label_object.}}

    }}
    ORDER BY ?wikipagelength
    LIMIT 200
    """

In [8]:
rdfs  = _query(query_string)

In [10]:
with open('output.txt', 'w') as f:
    f.write(rdfs)