In [1]:
import json
import requests


def get_article_revision_json_from_wikipedia(title):
    api_answers = []
    # create a base URL for the WIKI API
    wp_api_url = "http://en.wikipedia.org/w/api.php"
    # API parameters to get revision data
    parameters = {
        'action': 'query',
        'titles': title,
        'prop': 'revisions',
        'rvprop': 'flags|timestamp|user|size|ids',
        'rvlimit': 500,
        'format': 'json'
    }
    while True:
        # make API request
        call = requests.get(wp_api_url, params=parameters)
        # convert API response to JSON
        api_answer = call.json()
        # append revision data to list
        api_answers.append(api_answer)
        # 'continue' tells us there's more revisions to add
        if 'continue' in api_answer.keys():
            # update parameters with continue parameter to fetch more data
            parameters.update(api_answer['continue'])
        else:
            break
    return api_answers


# list of Avatar-related article titles
article_titles = [
    'Avatar (2009 film)',
    'Avatar: The Last Airbender',
    'Avatar: The Last Airbender (season 1)',
    'Avatar: The Last Airbender (season 2)',
    'Avatar: The Last Airbender (season 3)'
]

article_revisions_from_wikipedia = {}

# loop through article titles and download revision data
for title in article_titles:
    article_revisions_from_wikipedia[title] = get_article_revision_json_from_wikipedia(title)

with open("article_revisions_from_wikipedia.json", "w") as file_out:
    json.dump(article_revisions_from_wikipedia, file_out)

In [2]:
# 1.1 what proportion of those edits were made by users without accounts ("anon"),
for title in article_titles:
    total_edits = 0
    anon_edits = 0
    # loop through revision data and count edits by user type
    for revision in article_revisions_from_wikipedia[title]:
        for page in revision['query']['pages'].values():
            revisions = page['revisions']
            for rev in revisions:
                total_edits += 1
                if 'anon' in rev:
                    anon_edits += 1
    # calculate proportion of edits made by anonymous users
    if total_edits > 0:
        anon_prop = anon_edits / total_edits
    else:
        anon_prop = 0
    # print results for the current article
    print(f"{title}: Proportion of edits made by anonymous users: {anon_prop:.2f}")


Avatar (2009 film): Proportion of edits made by anonymous users: 0.12
Avatar: The Last Airbender: Proportion of edits made by anonymous users: 0.49
Avatar: The Last Airbender (season 1): Proportion of edits made by anonymous users: 0.35
Avatar: The Last Airbender (season 2): Proportion of edits made by anonymous users: 0.33
Avatar: The Last Airbender (season 3): Proportion of edits made by anonymous users: 0.40


In [8]:
# 1.2 what proportion of those edits were marked as "minor"
for title in article_titles:
    # initialize variables to count total number of edits and number of minor edits
    total_edits = 0
    minor_edits = 0

    # loop through revision data and count edits by type
    for revision in article_revisions_from_wikipedia[title]:
        for page in revision['query']['pages'].values():
            revisions = page['revisions']
            for rev in revisions:
                total_edits += 1
                if 'minor' in rev:
                    minor_edits += 1

    # calculate proportion of edits marked as minor
    if total_edits > 0:
        minor_prop = minor_edits / total_edits
    else:
        minor_prop = 0

    # print results for the current article
    print(f"{title}: Proportion of edits marked as minor: {minor_prop:.2f}")


Avatar (2009 film): Proportion of edits marked as minor: 0.25
Avatar: The Last Airbender: Proportion of edits marked as minor: 0.16
Avatar: The Last Airbender (season 1): Proportion of edits marked as minor: 0.15
Avatar: The Last Airbender (season 2): Proportion of edits marked as minor: 0.16
Avatar: The Last Airbender (season 3): Proportion of edits marked as minor: 0.15


In [10]:
# 1.3 make and share a visualization of the total number of edits across those 5 articles over time
edits_by_datetime = {title: {} for title in article_titles}
for title in article_titles:
    for revision in article_revisions_from_wikipedia[title]:
        for page_id, page_data in revision['query']['pages'].items():
            for rev in page_data.get('revisions', []):
                timestamp = rev['timestamp']
                edits_by_datetime[title][timestamp] = edits_by_datetime[title].get(timestamp, 0) + 1

all_dates = sorted({date for edits in edits_by_datetime.values() for date in edits.keys()})

with open('avatar_edits_from_wikipedia.tsv', 'w') as f:
    buffer = "\t".join(article_titles)
    f.write(f"date\t{buffer}\n")
    for date in all_dates:
        buffer = "\t".join([str(edits_by_datetime[title].get(date, 0)) for title in article_titles])
        f.write(f"{date}\t{buffer}\n")