2.0

In [2]:
import requests
import json

def get_article_revision_json(title):
    api_answers = []

    # create a base url for the api and then a normal url which is initially
    # just a copy of it
    # The following line is what the requests call is doing, basically.
    # f"http://en.wikipedia.org/w/api.php/?action=query&titles={title}&prop=revisions&rvprop=flags|timestamp|user|size|ids&rvlimit=500&format=json&continue="
    # e.g.: http://en.wikipedia.org/w/api.php/?action=query&titles=Soundgarden&prop=revisions&rvprop=flags|timestamp|user|size|ids&rvlimit=500&format=json
    wp_api_url = "https://lotr.fandom.com/api.php"

    parameters = {'action' : 'query',
                  'titles' : title,
                  'prop' : 'revisions',
                  'rvprop' : 'flags|timestamp|user|size|ids',
                  'rvlimit' : 500,
                  'format' : 'json',
                   }

    # we'll repeat this forever (i.e., we'll only stop when we find
    # the "break" command)
    while True:
        # this will wait for one second
        # time.sleep(1)
        
        # the first line open the urls but also handles unicode urls
        call = requests.get(wp_api_url, params=parameters)
        api_answer = call.json()
        
        # now we'll add this to whatever we are tracking
        api_answers.append(api_answer)
        
        # 'continue' tells us there's more revisions to add
        if 'continue' in api_answer.keys():
            # replace the 'continue' parameter with the contents of the
            # api_answer dictionary.
            parameters.update(api_answer['continue'])
        else:
            break
        
    return(api_answers)

In [4]:
page_titles = ['Gandalf', 'The_Council_of_Elrond', 'Sauron', 'Mordor', 'Aragorn']

lotr_fandom_file = r"C:\Users\Nicke\OneDrive\Desktop\CommunityDataScience\LOTRFandom.jsonl"

with open(lotr_fandom_file, 'w') as output_file:

    for page_title in page_titles:
        api_answers = get_article_revision_json(page_title)
        for api_answer in api_answers:
        
            print(json.dumps(api_answer), file=output_file)
            
print("done")

done


In [5]:
revisions = []

with open(lotr_fandom_file, 'r') as input_file:
    for line in input_file.readlines():
        api_answer = json.loads(line)
        
        # get the list of pages from the json object
        pages = api_answer["query"]["pages"]

        # for every page, (there should always be only one) get its revisions:
        for page_id in pages.keys():
            query_revisions = pages[page_id]["revisions"]
            title = pages[page_id]['title']
            
            for rev in query_revisions:
                #print(rev)
                # let's continue/skip this revision if the user is hidden
                if "userhidden" in rev.keys():
                    continue
                
                # 1: add a title field for the article because we're going to mix them together
                rev["title"] = title

                # 2: let's "recode" anon so it's true or false instead of present/missing
                if "anon" in rev.keys():
                    rev["anon"] = True
                else:
                    rev["anon"] = False

                # 3: let's recode "minor" in the same way
                if "minor" in rev.keys():
                    rev["minor"] = True
                else:
                    rev["minor"] = False

                # we're going to change the timestamp to make it work a little better in excel/spreadsheets
                rev["timestamp"] = rev["timestamp"].replace("T", " ")
                rev["timestamp"] = rev["timestamp"].replace("Z", "")

                # finally, save the revisions we've seen to a varaible
                revisions.append(rev)

2.1

In [7]:
Gandalf_revision_count = 0
The_Council_revision_count = 0
Sauron_revison_count = 0
Mordor_revision_count = 0
Aragorn_revision_count = 0

Gandalf_anon_count = 0
The_Council_anon_count = 0
Sauron_anon_count = 0
Mordor_anon_count = 0
Aragorn_anon_count = 0


for revision in revisions:
    if revision['title'] == "Gandalf":
        Gandalf_revision_count += 1
        if revision['anon'] == True:
            Gandalf_anon_count += 1
        
    if revision['title'] == "The Council of Elrond":
        The_Council_revision_count += 1
        if revision['anon'] == True:
            The_Council_anon_count += 1

    if revision['title'] == "Sauron":
            Sauron_revison_count += 1
            if revision['anon'] == True:
                Sauron_anon_count += 1
                
    if revision['title'] == "Mordor":
            Mordor_revision_count += 1
            if revision['anon'] == True:
                Mordor_anon_count += 1
                
    if revision['title'] == "Aragorn":
            Aragorn_revision_count += 1
            if revision['anon'] == True:
                Aragorn_anon_count += 1
                
Gandalf_percent = (Gandalf_anon_count / Gandalf_revision_count) * 100
Council_percent = (The_Council_anon_count / The_Council_revision_count) * 100
Sauron_percent = (Sauron_anon_count / Sauron_revison_count) * 100
Mordor_percent = (Mordor_anon_count / Mordor_revision_count) * 100
Aragorn_percent = (Aragorn_anon_count / Aragorn_revision_count) * 100
          
print("Gandalf Revisions", Gandalf_revision_count, "- Gandalf Anon Count", Gandalf_anon_count, "- Gandalf % Anon", "%.2f" % Gandalf_percent)
print("The Council of ELrond Revisions", The_Council_revision_count, "- The Council of Elrond Anon Count", The_Council_anon_count, "- The Council of Elrond % Anon", "%.2f" % Council_percent)
print("Sauron Revisions", Sauron_revison_count, "- Sauron Anon Count", Sauron_anon_count, "- Sauron % Anon", "%.2f" % Sauron_percent)
print("Mordor Revisions", Gandalf_revision_count, "- Mordor_revision_count", Mordor_anon_count, "- Mordor % Anon", "%.2f" % Mordor_percent)
print("Aragorn Revisions", Aragorn_revision_count, "- Aragorn Anon Count", Aragorn_anon_count, "- Aragorn % Anon", "%.2f" % Aragorn_percent)

Gandalf Revisions 859 - Gandalf Anon Count 137 - Gandalf % Anon 15.95
The Council of ELrond Revisions 1 - The Council of Elrond Anon Count 0 - The Council of Elrond % Anon 0.00
Sauron Revisions 1659 - Sauron Anon Count 513 - Sauron % Anon 30.92
Mordor Revisions 859 - Mordor_revision_count 155 - Mordor % Anon 39.24
Aragorn Revisions 10 - Aragorn Anon Count 3 - Aragorn % Anon 30.00


2.2

In [8]:
Gandalf_revision_count = 0
The_Council_revision_count = 0
Sauron_revison_count = 0
Mordor_revision_count = 0
Aragorn_revision_count = 0

Gandalf_minor_count = 0
The_Council_minor_count = 0
Sauron_minor_count = 0
Mordor_minor_count = 0
Aragorn_minor_count = 0


for revision in revisions:
    if revision['title'] == "Gandalf":
        Gandalf_revision_count += 1
        if revision['minor'] == True:
            Gandalf_minor_count += 1
        
    if revision['title'] == "The Council of Elrond":
        The_Council_revision_count += 1
        if revision['minor'] == True:
            The_Council_minor_count += 1

    if revision['title'] == "Sauron":
            Sauron_revison_count += 1
            if revision['minor'] == True:
                Sauron_minor_count += 1
                
    if revision['title'] == "Mordor":
            Mordor_revision_count += 1
            if revision['minor'] == True:
                Mordor_minor_count += 1
                
    if revision['title'] == "Aragorn":
            Aragorn_revision_count += 1
            if revision['minor'] == True:
                Aragorn_minor_count += 1
                
Gandalf_percent = (Gandalf_minor_count / Gandalf_revision_count) * 100
Council_percent = (The_Council_minor_count / The_Council_revision_count) * 100
Sauron_percent = (Sauron_minor_count / Sauron_revison_count) * 100
Mordor_percent = (Mordor_minor_count / Mordor_revision_count) * 100
Aragorn_percent = (Aragorn_minor_count / Aragorn_revision_count) * 100
          
print("Gandalf Revisions", Gandalf_revision_count, "- Gandalf Minor Count", Gandalf_minor_count, "- Gandalf % Minor", "%.2f" % Gandalf_percent)
print("The Council of ELrond Revisions", The_Council_revision_count, "- The Council of Elrond Minor Count", The_Council_minor_count, "- The Council of Elrond % Minor", "%.2f" % Council_percent)
print("Sauron Revisions", Sauron_revison_count, "- Sauron Minor Count", Sauron_minor_count, "- Sauron % Minor", "%.2f" % Sauron_percent)
print("Mordor Revisions", Gandalf_revision_count, "- Mordor_revision_count", Mordor_minor_count, "- Mordor % Minor", "%.2f" % Mordor_percent)
print("Aragorn Revisions", Aragorn_revision_count, "- Aragorn Minor Count", Aragorn_minor_count, "- Aragorn % Minor", "%.2f" % Aragorn_percent)

Gandalf Revisions 859 - Gandalf Minor Count 131 - Gandalf % Minor 15.25
The Council of ELrond Revisions 1 - The Council of Elrond Minor Count 0 - The Council of Elrond % Minor 0.00
Sauron Revisions 1659 - Sauron Minor Count 213 - Sauron % Minor 12.84
Mordor Revisions 859 - Mordor_revision_count 67 - Mordor % Minor 16.96
Aragorn Revisions 10 - Aragorn Minor Count 2 - Aragorn % Minor 20.00


2.3

In [9]:
edits_by_month = {}
for rev in revisions:
    month_string = rev['timestamp'][0:7]

    if month_string in edits_by_month.keys():
        edits_by_month[month_string] = edits_by_month[month_string] + 1
    else:
        edits_by_month[month_string] = 1
        
# write out a TSV file we could analyze in google docs
with open(r"C:\Users\Nicke\OneDrive\Desktop\CommunityDataScience\lotr_fandom_monthly_summary_data.tsv", "w", encoding='utf-8') as lotr_file:
    
    print("date\trevisions", file=lotr_file)

    # iterate through every day and print out data into the file
    for month_string in edits_by_month.keys():
        print(f"{month_string}-01\t{edits_by_month[month_string]}", file=lotr_file)

Excel Worksheet https://1drv.ms/x/s!Ag-V2XcGZQo8h03jvFZLm9YA3O3k?e=Pe6AxV