TÂCHE 1

In [1]:
!pip -q install sparqlwrapper

In [2]:
import sys
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# Sujet: montagnes (Q8502)
query = """SELECT DISTINCT ?itemLabel ?paysLabel ?image WHERE {
  ?item wdt:P31/wdt:P279* wd:Q8502 .
  OPTIONAL { ?item wdt:P17 ?pays . }
  ?item wdt:P18 ?image .
  
  FILTER(CONTAINS(STR(?image), "commons.wikimedia.org"))

  SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
LIMIT 300
"""

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0], 
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    array.append((
        result["itemLabel"]["value"],
        result.get("paysLabel", {}).get("value"),
        result["image"]["value"]
    ))

dataframe = pd.DataFrame(array, columns=["label", "pays", "image"])
dataframe = dataframe.drop_duplicates(subset=["image"]).reset_index(drop=True)

dataframe.head(10), len(dataframe)

(           label                       pays  \
 0         Ulawun  Papouasie-Nouvelle-Guinée   
 1      mont Bona                 États-Unis   
 2       Tandikat                  Indonésie   
 3          Alutu                   Éthiopie   
 4   Popocatepetl                    Mexique   
 5       Antisana                   Équateur   
 6     Chimborazo                   Équateur   
 7     Tungurahua                   Équateur   
 8  mont Damavand                       Iran   
 9        Q576542                Royaume-Uni   
 
                                                image  
 0  http://commons.wikimedia.org/wiki/Special:File...  
 1  http://commons.wikimedia.org/wiki/Special:File...  
 2  http://commons.wikimedia.org/wiki/Special:File...  
 3  http://commons.wikimedia.org/wiki/Special:File...  
 4  http://commons.wikimedia.org/wiki/Special:File...  
 5  http://commons.wikimedia.org/wiki/Special:File...  
 6  http://commons.wikimedia.org/wiki/Special:File...  
 7  http://commons.wik

In [3]:
import os

os.makedirs("images", exist_ok=True)
os.makedirs("data", exist_ok=True)

In [4]:
import requests
import shutil
from urllib.parse import urlparse
import time

def download_image(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    r = requests.get(url, allow_redirects=True, headers=headers, stream=True)

    if r.status_code == 200:
        filename = os.path.basename(urlparse(url).path)
        path = os.path.join("images", filename)

        with open(path, "wb") as img:
            r.raw.decode_content = True
            shutil.copyfileobj(r.raw, img)
    time.sleep(2)
    return r.status_code

In [5]:
# Téléchargement des 100 premières images
dataframe = dataframe.head(100)
dataframe["status"] = dataframe["image"].apply(download_image)

dataframe["status"].value_counts()

status
429    85
200    15
Name: count, dtype: int64

In [6]:
import json
from PIL import Image

metadata = []

for _, row in dataframe.iterrows():
    filename = os.path.basename(urlparse(row["image"]).path)
    filepath = os.path.join("images", filename)

    try:
        with Image.open(filepath) as img:
            width, height = img.size
            format_img = img.format

        size_kb = os.path.getsize(filepath) / 1024

        metadata.append({
            "file_name": filename,
            "width": width,
            "height": height,
            "format": format_img,
            "file_size_kb": round(size_kb, 2),
            "source_url": row["image"],
            "label": row["label"],
            "pays": row["pays"]
        })

    except Exception:
        continue

In [7]:
with open("data/images_metadata.json", "w", encoding="utf-8") as f:
    json.dump(metadata, f, indent=2, ensure_ascii=False)

len(metadata)

25

In [8]:
dataframe

Unnamed: 0,label,pays,image,status
0,Ulawun,Papouasie-Nouvelle-Guinée,http://commons.wikimedia.org/wiki/Special:File...,200
1,mont Bona,États-Unis,http://commons.wikimedia.org/wiki/Special:File...,200
2,Tandikat,Indonésie,http://commons.wikimedia.org/wiki/Special:File...,200
3,Alutu,Éthiopie,http://commons.wikimedia.org/wiki/Special:File...,200
4,Popocatepetl,Mexique,http://commons.wikimedia.org/wiki/Special:File...,200
...,...,...,...,...
95,mont Akagi,Japon,http://commons.wikimedia.org/wiki/Special:File...,429
96,puy Griou,France,http://commons.wikimedia.org/wiki/Special:File...,429
97,Sierra Nevada,Chili,http://commons.wikimedia.org/wiki/Special:File...,429
98,Doña Juana,Colombie,http://commons.wikimedia.org/wiki/Special:File...,429


TÂCHE 2