# Wikipedia Edits

## Setup and Imports

In [46]:
import requests
import json
import datetime
import boto3
import pandas as pd

# USER CONFIGURATION
USERNAME = "luciana" 
BUCKET_NAME = "luciana-wikidata"

QUERY_DATE = "2025-11-29" 

In [47]:

url_date = QUERY_DATE.replace("-", "/")
url = f"https://wikimedia.org/api/rest_v1/metrics/pageviews/top/en.wikipedia/all-access/{url_date}"
headers = {"User-Agent": f"WikiViewsPipeline/1.0 ({USERNAME}@example.com)"}

wiki_server_response = requests.get(url, headers=headers)
wiki_response_parsed = wiki_server_response.json()

top_articles = wiki_response_parsed["items"][0]["articles"]
current_time = datetime.datetime.now(datetime.timezone.utc)
json_lines = ""

for page in top_articles:
    record = {
        "title": page["article"], # Map API 'article' to 'title' per requirement
        "views": page["views"],   # Map API 'views' to 'views'
        "rank": page["rank"],
        "date": QUERY_DATE,
        "retrieved_at": current_time.replace(tzinfo=None).isoformat(),
    }
    json_lines += json.dumps(record) + "\n"

# Upload to S3
s3 = boto3.client('s3')
s3_key = f"raw-views/raw-views-{QUERY_DATE}.json"

s3.put_object(
    Bucket=BUCKET_NAME,
    Key=s3_key,
    Body=json_lines
)

print(f"Successfully transformed {len(top_articles)} records")
print(f"Uploaded to: s3://{BUCKET_NAME}/{s3_key}")

Successfully transformed 999 records
Uploaded to: s3://luciana-wikidata/raw-views/raw-views-2025-11-29.json


In [48]:
S3_WIKI_BUCKET = "luciana-wikidata"
s3 = boto3.client("s3")

bucket_names = [bucket["Name"] for bucket in s3.list_buckets()["Buckets"]]
s3_key = f"raw-views/raw-views-{date}.json"
if S3_WIKI_BUCKET not in bucket_names:
    s3.create_bucket(
        Bucket=S3_WIKI_BUCKET,
        CreateBucketConfiguration={"LocationConstraint": "eu-west-1"},
    )
    print(f"Created new bucket: {S3_WIKI_BUCKET}")
else:
    print(f"Using existing bucket: {S3_WIKI_BUCKET}")


Using existing bucket: luciana-wikidata


In [49]:
# Test Lab 1
assert USERNAME != "<username>", "Please set your USERNAME at the top of the notebook"
assert S3_WIKI_BUCKET.endswith("-wikidata"), "Bucket name must end with '-wikidata'"

try:
    s3.head_bucket(Bucket=S3_WIKI_BUCKET)
    print(f"Bucket {S3_WIKI_BUCKET} exists!")
except Exception as e:
    print(f"Bucket {S3_WIKI_BUCKET} not found: {e}")
    raise

Bucket luciana-wikidata exists!


In [50]:
# LAB 2: Upload json_lines directly to S3
# YOUR SOLUTION COMES HERE =========================
s3_key = f"raw-views/raw-views-{date.strftime('%Y-%m-%d')}.json"
s3.put_object(
    Bucket=S3_WIKI_BUCKET,
    Key=s3_key,
    Body=json_lines,
)
print(f"Uploaded {len(top_articles)} records to s3://{S3_WIKI_BUCKET}/{s3_key}")

Uploaded 999 records to s3://luciana-wikidata/raw-views/raw-views-2025-11-28.json


In [51]:
# Test Lab 2
expected_key = f"raw-views/raw-views-{date.strftime('%Y-%m-%d')}.json"
try:
    s3.head_object(Bucket=S3_WIKI_BUCKET, Key=expected_key)
    print(f"File uploaded successfully to s3://{S3_WIKI_BUCKET}/{expected_key}")
except Exception as e:
    print(f"File not found at s3://{S3_WIKI_BUCKET}/{expected_key}")
    raise

File uploaded successfully to s3://luciana-wikidata/raw-views/raw-views-2025-11-28.json
