# Acquisition

Acquisition de 60 images de chats et 60 images de chiens depuis "Query Wikidata"

In [3]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

endpoint_url = "https://query.wikidata.org/sparql"

In [4]:
query = """SELECT DISTINCT ?animal ?animalLabel ?paysdOrigine ?paysdOrigineLabel ?image WHERE {
  { 
    SELECT ?animal ?paysdOrigine (SAMPLE(?img) AS ?image) WHERE {
      ?animal wdt:P31 wd:Q43577;  # Chats
              wdt:P495 ?paysdOrigine;
              wdt:P18 ?img.
    } GROUP BY ?animal ?paysdOrigine
    LIMIT 60
  }
  UNION
  { 
    SELECT ?animal ?paysdOrigine (SAMPLE(?img) AS ?image) WHERE {
      ?animal wdt:P31 wd:Q39367;  # Chiens
              wdt:P495 ?paysdOrigine;
              wdt:P18 ?img.
    } GROUP BY ?animal ?paysdOrigine
    LIMIT 60
  }

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    array.append(
        (
            result["animalLabel"]["value"],
            result["paysdOrigineLabel"]["value"],
            result["image"]["value"],
        )
    )

In [5]:
dataframe = pd.DataFrame(array, columns=["animal", "pays d'origine", "image"])
dataframe = dataframe.astype(
    dtype={"animal": "<U200", "pays d'origine": "<U200", "image": "<U200"}
)
dataframe

Unnamed: 0,animal,pays d'origine,image
0,Cymric,Canada,http://commons.wikimedia.org/wiki/Special:File...
1,LaPerm,United States,http://commons.wikimedia.org/wiki/Special:File...
2,Abyssinian,United Kingdom,http://commons.wikimedia.org/wiki/Special:File...
3,Kinkalow,United States,http://commons.wikimedia.org/wiki/Special:File...
4,Oriental Shorthair,United States,http://commons.wikimedia.org/wiki/Special:File...
...,...,...,...
115,Alaskan Klee Kai,United States,http://commons.wikimedia.org/wiki/Special:File...
116,Picardy Spaniel,France,http://commons.wikimedia.org/wiki/Special:File...
117,Bergamasco Shepherd,Italy,http://commons.wikimedia.org/wiki/Special:File...
118,Löwchen,Europe,http://commons.wikimedia.org/wiki/Special:File...


In [6]:
import requests
import shutil
import os

def download_image(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    request = requests.get(url, allow_redirects=True, headers=headers, stream=True)
    if request.status_code == 200:
        with open("images/"+os.path.basename(url), "wb") as image:
            request.raw.decode_content = True
            shutil.copyfileobj(request.raw, image)
    return request.status_code

In [7]:
dataframe.image.apply(download_image)

0      200
1      200
2      200
3      200
4      200
      ... 
115    200
116    200
117    200
118    200
119    200
Name: image, Length: 120, dtype: int64