### Step 1: Getting the Article and Population Data

In [37]:
import pandas as pd
import numpy as np
import time
# These are standard python modules
import json, time, urllib.parse
#
# The 'requests' module is not a standard Python module. You will need to install this with pip/pip3 if you do not already have it
import requests

In [2]:
DATA_PATH = '/Users/qwert/Documents/UW_Data_Science/Human_Centered_Data_Science/Homeworks/data-512-homework_2/data/excel_files/' 

In [3]:
df_politicians = pd.read_excel(DATA_PATH + 'politicians_by_country_SEPT_2022.xlsx')
print("The length of the politician excel file is {}".format(len(df_politicians)))

The length of the politician excel file is 7584


In [4]:
df_politicians.head(2)

Unnamed: 0,name,url,country
0,Shahjahan Noori,https://en.wikipedia.org/wiki/Shahjahan_Noori,Afghanistan
1,Abdul Ghafar Lakanwal,https://en.wikipedia.org/wiki/Abdul_Ghafar_Lak...,Afghanistan


In [5]:
df_politicians.drop_duplicates(inplace=True)
print("The length of the politician excel file after removing duplicates is {}".format(len(df_politicians)))

The length of the politician excel file after removing duplicates is 7582


In [6]:
df_population = pd.read_excel(DATA_PATH + 'population_by_country_2022.xlsx')

In [7]:
print("The length of the population excel file is {}".format(len(df_population)))
df_population.head(5)

The length of the population excel file is 233


Unnamed: 0,Geography,Population (millions)
0,WORLD,7963.0
1,AFRICA,1419.0
2,NORTHERN AFRICA,251.0
3,Algeria,44.9
4,Egypt,103.5


In [8]:
df_population.drop_duplicates(inplace=True)
print("The length of the population excel file is {}".format(len(df_population)))

The length of the population excel file is 233


In [14]:
df_population['Continent'] = df_population['Geography']

In [15]:
df_population

Unnamed: 0,Geography,Population (millions),Region,Continent
0,WORLD,7963.0,WORLD,WORLD
1,AFRICA,1419.0,AFRICA,AFRICA
2,NORTHERN AFRICA,251.0,NORTHERN AFRICA,NORTHERN AFRICA
3,Algeria,44.9,Algeria,Algeria
4,Egypt,103.5,Egypt,Egypt
...,...,...,...,...
228,Samoa,0.2,Samoa,Samoa
229,Solomon Islands,0.7,Solomon Islands,Solomon Islands
230,Tonga,0.1,Tonga,Tonga
231,Tuvalu,0.0,Tuvalu,Tuvalu


In [16]:
def check_upper(x):
    if x.isupper():
        return x
    else:
        return None

In [17]:
df_population['Continent'] = df_population['Continent'].apply(check_upper)

In [22]:
df_population['Continent'] = df_population['Continent'].fillna(method='ffill')
df_population.head(3)

Unnamed: 0,country,Population (millions),Region,Continent
0,WORLD,7963.0,WORLD,WORLD
1,AFRICA,1419.0,AFRICA,AFRICA
2,NORTHERN AFRICA,251.0,NORTHERN AFRICA,NORTHERN AFRICA


In [23]:
df_population.columns = df_population.columns.str.replace('Geography', 'country')

In [24]:
df_population

Unnamed: 0,country,Population (millions),Region,Continent
0,WORLD,7963.0,WORLD,WORLD
1,AFRICA,1419.0,AFRICA,AFRICA
2,NORTHERN AFRICA,251.0,NORTHERN AFRICA,NORTHERN AFRICA
3,Algeria,44.9,NORTHERN AFRICA,NORTHERN AFRICA
4,Egypt,103.5,NORTHERN AFRICA,NORTHERN AFRICA
...,...,...,...,...
228,Samoa,0.2,OCEANIA,OCEANIA
229,Solomon Islands,0.7,OCEANIA,OCEANIA
230,Tonga,0.1,OCEANIA,OCEANIA
231,Tuvalu,0.0,OCEANIA,OCEANIA


In [25]:
pd.merge(df_politicians, df_population, on='country', how='left')

Unnamed: 0,name,url,country,Population (millions),Region,Continent
0,Shahjahan Noori,https://en.wikipedia.org/wiki/Shahjahan_Noori,Afghanistan,41.1,SOUTH ASIA,SOUTH ASIA
1,Abdul Ghafar Lakanwal,https://en.wikipedia.org/wiki/Abdul_Ghafar_Lak...,Afghanistan,41.1,SOUTH ASIA,SOUTH ASIA
2,Majah Ha Adrif,https://en.wikipedia.org/wiki/Majah_Ha_Adrif,Afghanistan,41.1,SOUTH ASIA,SOUTH ASIA
3,Haroon al-Afghani,https://en.wikipedia.org/wiki/Haroon_al-Afghani,Afghanistan,41.1,SOUTH ASIA,SOUTH ASIA
4,Tayyab Agha,https://en.wikipedia.org/wiki/Tayyab_Agha,Afghanistan,41.1,SOUTH ASIA,SOUTH ASIA
...,...,...,...,...,...,...
7577,Rekayi Tangwena,https://en.wikipedia.org/wiki/Rekayi_Tangwena,Zimbabwe,16.3,EASTERN AFRICA,EASTERN AFRICA
7578,Josiah Tongogara,https://en.wikipedia.org/wiki/Josiah_Tongogara,Zimbabwe,16.3,EASTERN AFRICA,EASTERN AFRICA
7579,Langton Towungana,https://en.wikipedia.org/wiki/Langton_Towungana,Zimbabwe,16.3,EASTERN AFRICA,EASTERN AFRICA
7580,Herbert Ushewokunze,https://en.wikipedia.org/wiki/Herbert_Ushewokunze,Zimbabwe,16.3,EASTERN AFRICA,EASTERN AFRICA


### Population File

In [26]:
df_population_2 = pd.read_csv('/Users/qwert/Documents/UW_Data_Science/Human_Centered_Data_Science/Homeworks/data-512-homework_2/data/Cleaned_data/population_by_country_2022_cleaned.csv')

In [27]:
df_population_2

Unnamed: 0,continent,region,country,population
0,AFRICA,NORTHERN AFRICA,Algeria,44.9
1,AFRICA,NORTHERN AFRICA,Egypt,103.5
2,AFRICA,NORTHERN AFRICA,Libya,6.8
3,AFRICA,NORTHERN AFRICA,Morocco,36.7
4,AFRICA,NORTHERN AFRICA,Sudan,46.9
...,...,...,...,...
204,OCEANIA,OCEANIA,Samoa,0.2
205,OCEANIA,OCEANIA,Solomon Islands,0.7
206,OCEANIA,OCEANIA,Tonga,0.1
207,OCEANIA,OCEANIA,Tuvalu,0.0


## Step 2: Getting Article Quality Predictions

In [28]:
# The basic English Wikipedia API endpoint
API_ENWIKIPEDIA_ENDPOINT = "https://en.wikipedia.org/w/api.php"

# We'll assume that there needs to be some throttling for these requests - we should always be nice to a free data resource
API_LATENCY_ASSUMED = 0.002       # Assuming roughly 2ms latency on the API and network
API_THROTTLE_WAIT = (1.0/100.0)-API_LATENCY_ASSUMED

# When making automated requests we should include something that is unique to the person making the request
# This should include an email - your UW email would be good to put in there
REQUEST_HEADERS = {
    'User-Agent': '<sdawark@uw.edu>, University of Washington, MSDS DATA 512 - AUTUMN 2022',
}


# This is a string of additional page properties that can be returned see the Info documentation for
# what can be included. If you don't want any this can simply be the empty string
PAGEINFO_EXTENDED_PROPERTIES = "talkid|url|watched|watchers"
#PAGEINFO_EXTENDED_PROPERTIES = ""

# This template lists the basic parameters for making this
PAGEINFO_PARAMS_TEMPLATE = {
    "action": "query",
    "format": "json",
    "titles": "",           # to simplify this should be a single page title at a time
    "prop": "info",
    "inprop": PAGEINFO_EXTENDED_PROPERTIES
}

In [29]:
def request_pageinfo_per_article(article_title = None, 
                                 endpoint_url = API_ENWIKIPEDIA_ENDPOINT, 
                                 request_template = PAGEINFO_PARAMS_TEMPLATE,
                                 headers = REQUEST_HEADERS):
    # Make sure we have an article title
    if not article_title: return None
    
    request_template['titles'] = article_title
        
    # make the request
    try:
        # we'll wait first, to make sure we don't exceed the limit in the situation where an exception
        # occurs during the request processing - throttling is always a good practice with a free
        # data source like Wikipedia - or any other community sources
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(endpoint_url, headers=headers, params=request_template)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response

In [30]:
# The current ORES API endpoint
API_ORES_SCORE_ENDPOINT = "https://ores.wikimedia.org/v3"
# A template for mapping to the URL
API_ORES_SCORE_PARAMS = "/scores/{context}/{revid}/{model}"

# Use some delays so that we do not hammer the API with our requests
API_LATENCY_ASSUMED = 0.002       # Assuming roughly 2ms latency on the API and network
API_THROTTLE_WAIT = (1.0/100.0)-API_LATENCY_ASSUMED

# When making automated requests we should include something that is unique to the person making the request
# This should include an email - your UW email would be good to put in there
REQUEST_HEADERS = {
    'User-Agent': '<uwnetid@uw.edu>, University of Washington, MSDS DATA 512 - AUTUMN 2022'
}

# A dictionary of English Wikipedia article titles (keys) and sample revision IDs that can be used for this ORES scoring example
ARTICLE_REVISIONS = { 'Bison':1085687913 , 'Northern flicker':1086582504 , 'Red squirrel':1083787665 , 'Chinook salmon':1085406228 , 'Horseshoe bat':1060601936 }

# This template lists the basic parameters for making an ORES request
ORES_PARAMS_TEMPLATE = {
    "context": "enwiki",        # which WMF project for the specified revid
    "revid" : "",               # the revision to be scored - this will probably change each call
    "model": "articlequality"   # the AI/ML scoring model to apply to the reviewion
}
#
# The current ML models for English wikipedia are:
#   "articlequality"
#   "articletopic"
#   "damaging"
#   "version"
#   "draftquality"
#   "drafttopic"
#   "goodfaith"
#   "wp10"
#
# The specific documentation on these is scattered so if you want to use one you'll have to look around.
#

In [31]:
def request_ores_score_per_article(article_revid = None, 
                                   endpoint_url = API_ORES_SCORE_ENDPOINT, 
                                   endpoint_params = API_ORES_SCORE_PARAMS, 
                                   request_template = ORES_PARAMS_TEMPLATE,
                                   headers = REQUEST_HEADERS,
                                   features=False):
    # Make sure we have an article revision id
    if not article_revid: return None
    
    # set the revision id into the template
    request_template['revid'] = article_revid
    
    # now, create a request URL by combining the endpoint_url with the parameters for the request
    request_url = endpoint_url+endpoint_params.format(**request_template)
    
    # the features used by the ML model can sometimes be returned as well as scores
    if features:
        request_url = request_url+"?features=true"
    
    # make the request
    try:
        # we'll wait first, to make sure we don't exceed the limit in the situation where an exception
        # occurs during the request processing - throttling is always a good practice with a free
        # data source like ORES - or other community sources
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response


In [32]:
def outoutToJson(filename, data):
    out_path = filename +'.json'
    with open(out_path, 'w') as f:
        json.dump(data, f)

In [38]:
ARTICLE_NO_REVISION = []
ARTICLE_REVISIONS = {}



ARTICLE_TITLES = df_politicians['name']



for i in range(0, len(ARTICLE_TITLES)):
    info = request_pageinfo_per_article(ARTICLE_TITLES[i])
    obj = info['query']['pages']
    info_key = list(obj.keys())[0]
    revision_id = 0
    if 'lastrevid' in info['query']['pages'][info_key]:
        revision_id = info['query']['pages'][info_key]['lastrevid']
    # Check if article as a revision
    if revision_id and revision_id>0:
        # Update ARTICLE_REVISIONS dict with the article and last revision number
        ARTICLE_REVISIONS.update({ARTICLE_TITLES[i]:revision_id})
    else:
        # update the list of articles with no revision
        ARTICLE_NO_REVISION.append(ARTICLE_TITLES[i])
    print("Processed article:", ARTICLE_TITLES[i], revision_id)
outputToJson("/Users/qwert/Documents/UW_Data_Science/Human_Centered_Data_Science/Homeworks/data-512-homework_2/data/json/article_revisions", ARTICLE_REVISIONS)
print('Completed')

Processed article: Shahjahan Noori 1099689043
Processed article: Abdul Ghafar Lakanwal 943562276
Processed article: Majah Ha Adrif 852404094
Processed article: Haroon al-Afghani 1095102390
Processed article: Tayyab Agha 1104998382
Processed article: Ahmadullah Wasiq 1109361754
Processed article: Aziza Ahmadyar 1087211008
Processed article: Muqadasa Ahmadzai 1082489593
Processed article: Mohammad Sarwar Ahmedzai 1038918070
Processed article: Amir Muhammad Akhundzada 1069322182
Processed article: Nasrullah Baryalai Arsalai 1095526840
Processed article: Mohammad Asim Asim 1013838830
Processed article: Atiqullah Atifmal 1112407669
Processed article: Abdul Rahim Ayoubi 1108886061
Processed article: Alhaj Mutalib Baig 1111494041
Processed article: Ismael Balkhi 1112534409
Processed article: Abdul Baqi Turkistani 889226470
Processed article: Mohammad Ghous Bashiri 1102150221
Processed article: Abas Basir 1098419766
Processed article: Jan Baz 997027082
Processed article: Ahmad Behzad 110394829

Processed article: Behxhet Nepravishta 1114419169
Processed article: Adrian Neritani 1112281129
Processed article: Sali Nivica 1074384873
Processed article: Thoma Orologa 1114419178
Processed article: Bedri Pejani 1080373501
Processed article: Pertef Pogoni 1071576384
Processed article: Kadri Prishtina 1114907355
Processed article: Ymer Prizreni 1114419180
Processed article: Avni Rustemi 1113688253
Processed article: Chatin Sarachi 1089622596
Processed article: Nebi Sefa 978173646
Processed article: Eduard Shalsi 1090651886
Processed article: Ylli Vesel Shehu 1002857261
Processed article: Athanas Shundi 1090001664
Processed article: Xhelal Sveçla 1110419834
Processed article: Mehmet Tahsini 1089358942
Processed article: Ibrahim Temo 1114899959
Processed article: Pavli Terka 1107079359
Processed article: Afrim Tole 1091240960
Processed article: Sejfi Vllamasi 1073823015
Processed article: Syrja Vlora 1109383587
Processed article: Visarion Xhuvani 1115321286
Processed article: Visar Ymer

Processed article: Miguel Ángel Pesce 1096459313
Processed article: Carlos Petroni 1100374393
Processed article: Alejo Peyret 1091481103
Processed article: Aldo Pignanelli 1106856167
Processed article: Enrique S. Quintana 1016343362
Processed article: Amán Rawson 1096678788
Processed article: Martín Sabbatella 1073202733
Processed article: Juan Pablo Schiavi 1069152180
Processed article: Héctor Tizón 1096176230
Processed article: Claudio Uberti 1032047313
Processed article: Arístides Villanueva 994807413
Processed article: Artashes Abeghyan 1088583026
Processed article: Aram Achekbashian 1046857776
Processed article: Hovsep Arghutian 1095822812
Processed article: Mikael Atabekyan 1008108682
Processed article: Tigran Bekzadyan 1027604306
Processed article: Vigen Chitechyan 1099308421
Processed article: Hayk Chobanyan 1093711595
Processed article: Daniel-Bek of Sassun 1114419837
Processed article: Gurgen Egiazaryan 1103745376
Processed article: Parunak Ferukhan 1114420012
Processed artic

Processed article: Ali Huseynli 1090157253
Processed article: Alisahib Huseynov 1080007304
Processed article: Ali Ibrahimov 1067793317
Processed article: Mammad Isgandarov 1029087103
Processed article: Vidadi Isgandarov 1092547948
Processed article: Ajdar Ismailov 1096490381
Processed article: Saftar Jafarov 1045812028
Processed article: Rovshan Javadov 1108873335
Processed article: Anar Karimov 1109685880
Processed article: Boris Kevorkov 1072772142
Processed article: Gurban Khalilov 1094981152
Processed article: Ahmad Kordary 1093750341
Processed article: Ilham Madatov 1011331249
Processed article: Musa Mammadov 1084134498
Processed article: Ramin Mammadov 1020887005
Processed article: Elshad Mirbashir oghlu 1084157904
Processed article: Fuad Naghiyev 1081854289
Processed article: Eldar Namazov 1087483504
Processed article: Siyavush Novruzov 1114475460
Processed article: Sadig Rahimov 1074822020
Processed article: Suleyman Rahimov 1096039626
Processed article: Huseyn Rahmanov 1106104

Processed article: Vyacheslav Nikolayevich Kuznetsov 1059194181
Processed article: Valery Levaneuski 1103967180
Processed article: Pavel Liohki 1112377212
Processed article: Uladzimir Mackievič 1073431420
Processed article: Vladimir Parfenovich 1070648178
Processed article: Jan Pazniak 1109961050
Processed article: Aleksandr Potupa 1114686271
Processed article: Pyotr Prakapovich 1105481408
Processed article: Alexander Radkov 1091859585
Processed article: Symon Rak-Michajłoŭski 1108740902
Processed article: Vladimir Vasilievich Rusakevich 1100971465
Processed article: Ihar Rynkevich 1064451133
Processed article: Aliaksandr Shakutsin 1107954358
Processed article: Nikolai Shelyagovich 1092527212
Processed article: Jan Stankievič 1089350562
Processed article: Mikola Statkevich 1105528576
Processed article: Sergei Tikhanovsky 1110696783
Processed article: Alaksandar Ćvikievič 1113785358
Processed article: Aleksandr Turchin 1086196169
Processed article: Franak Viačorka 1110821628
Processed a

Processed article: Jaime de Zudáñez 1057374473
Processed article: Elvira Abdić-Jelenović 1092410267
Processed article: Mehmed Alajbegović 1090097632
Processed article: Boško Baškot 945729760
Processed article: Hasan Brkić 1108766075
Processed article: Salko Bukvarević 1103476585
Processed article: Mirsad Ćeman 1059620444
Processed article: Selmo Cikotić 1109891116
Processed article: Sabina Ćudić 1109170821
Processed article: Milorad Dodik 1115261935
Processed article: Dženan Đonlagić 1023971751
Processed article: Nijaz Duraković 1065317570
Processed article: Edin Forto 1114128635
Processed article: Dževad Galijašević 1080524051
Processed article: Josip Grubeša 1078976634
Processed article: Ankica Gudeljević 1109892248
Processed article: Muhamed Hadžijamaković 1069745508
Processed article: Mirsad Hadžikadić 1114039434
Processed article: Sanjin Halimović 1098646648
Processed article: Husein Miljković 1098413355
Processed article: Jasmin Imamović 1060821223
Processed article: Alija Izetbe

Processed article: Aleksi Ivanov 1105069303
Processed article: Georgi Ivanov (mayor) 937372283
Processed article: Ljubica Ivošević Dimitrov 1070575609
Processed article: Grozdan Karadzhov 1066070185
Processed article: Vladimir Karolev 1099282655
Processed article: Georgi Kirkov 1005695384
Processed article: Spiro Kitinchev 1063738922
Processed article: Kalina Konstantinova 1104793980
Processed article: Kostadin Kostadinov (professor) 1110428529
Processed article: Gavril Krastevich 1081400175
Processed article: Nikola Lazarov 1055871409
Processed article: Ivan Marazov 917237234
Processed article: Venko Markovski 1114988749
Processed article: Sebihan Mehmed 1092778647
Processed article: Ilian Mihov 1114357808
Processed article: Nikola Minchev 1114820940
Processed article: Plamen Panayotov 1096862893
Processed article: Vladislav Panev 1054474125
Processed article: Asparuh Panov 1022365931
Processed article: Daniel Panov 1077856970
Processed article: Aleksandar Paunov 1003790540
Processed 

Processed article: Laoukein Kourayo Médard 872513852
Processed article: Abbo Nassour 904253122
Processed article: Tahir Hamid Nguilin 1085015143
Processed article: Gontchomé Sahoulba 1100566547
Processed article: Quatre Sou Quatre 1010289028
Processed article: Djimadoum Tiraina 1112094593
Processed article: Saadie Goukouni Weddeye 1114946220
Processed article: Patricio Achurra 1063051282
Processed article: Hermes Ahumada 1061888618
Processed article: Felipe Alessandri 1115021687
Processed article: Lidia Amarales 1109042122
Processed article: Ximena Anza 1107215667
Processed article: Aurora Argomedo 1060990009
Processed article: Claudio Arredondo 1098030047
Processed article: Francisco Astaburuaga Cienfuegos 1097538961
Processed article: Camilo Ballesteros 1097131738
Processed article: Jaime Bassa 1097539409
Processed article: Alexandra Benado 1096936718
Processed article: Diego José Benavente 1069288880
Processed article: Harald Beyer (politician) 1094177814
Processed article: José Lui

Processed article: Ibrahim Aboubacar 1104075788
Processed article: Mohamed Ahmed-Chamanga 962865723
Processed article: Saïd Ibrahim Ben Ali 1051416555
Processed article: Mohamed Bacar 1045488214
Processed article: Chamina Ben Mohamed 1091671184
Processed article: Saïd Mohamed Cheikh 1036446434
Processed article: Mohamed Dahalani 1070905174
Processed article: Dawiat Mohamed 1112068629
Processed article: Ahmed Ben Said Djaffar 1103330010
Processed article: Abdou Soulé Elbak 1039771007
Processed article: Fahmi Said Ibrahim 936630092
Processed article: Gaston Feuillard 906903655
Processed article: Siti Kassim 1112069104
Processed article: Said Ali Kemal 1054672013
Processed article: Sittou Raghadat Mohamed 1069963767
Processed article: Djaffar Ahmed Said 1112420866
Processed article: Abdallah Said Sarouma 1043132737
Processed article: Moussa Toybou 790006774
Processed article: Léon Angor 1077197673
Processed article: Joseph Badiabio 1034243441
Processed article: Frédéric Bintsamou 10586677

Processed article: William Forbes (Talamancan king) 953571529
Processed article: Jean-Jacques Béchio 1097445644
Processed article: Kandia Camara 1103563722
Processed article: Charles Koffi Diby 1108506945
Processed article: Étienne Djaument 1102789374
Processed article: Loubo Augustin Djessou 1055941458
Processed article: Laurent Dona Fologo 1114825081
Processed article: Michel Gueu 1107654367
Processed article: Marcel Zadi Kessy 1101937624
Processed article: Kaba Nialé 1096406056
Processed article: Sékou Sanogo 994546487
Processed article: Sidi Tiémoko Touré 1091776866
Processed article: Mato Arlović 995029235
Processed article: Antun Augustinčić 1107956669
Processed article: Jakov Blažević 1107642810
Processed article: Mirko Bogović 1111957117
Processed article: Zvonko Brkić 1079673510
Processed article: Andrija Torkvat Brlić 1115206658
Processed article: Josip Broz Tito 1112945222
Processed article: Nadežda Čačinovič 1112933531
Processed article: Melko Čingrija 1078887578
Processed 

Processed article: Richard Sacher 1092501154
Processed article: Tomáš Eduard Šilinger 1078089986
Processed article: Karel Štogl 1098811438
Processed article: Antonín Cyril Stojan 1081455346
Processed article: Matěj Stropnický 1079854598
Processed article: Jiří Svoboda (director) 1049133241
Processed article: Miroslav Tetter 1098599146
Processed article: Leopold, Count von Thun und Hohenstein 1096753974
Processed article: Tomáš Vandas 1110388860
Processed article: Jiří Ventruba 1106895401
Processed article: Jan Vrba (politician) 1104640063
Processed article: Christian August Broberg 1106575496
Processed article: Peter Vogelius Deinboll 1086892227
Processed article: Niko Grünfeld 1076895271
Processed article: Petra Petersen 956506009
Processed article: Henning Podebusk 1065426004
Processed article: Mads Rørvig 1011343841
Processed article: Grethe Rostbøll 1098592394
Processed article: Theresa Scavenius 1053774991
Processed article: Erna Sørensen 923585617
Processed article: Elisa Ussing 

Processed article: Abraham Kidane 1007942515
Processed article: Mohammed Saeed Nawed 723260380
Processed article: Ibrahim Omer 1106896920
Processed article: Romodan Mohammed Nur 1104147526
Processed article: Lehte Hainsalu 1063515288
Processed article: Carl Robert Jakobson 1109789577
Processed article: August Janson 1053019919
Processed article: Elmar Järvesoo 1038769527
Processed article: Sergei Jürgens 1028657999
Processed article: Karl Kark 1098677715
Processed article: Aleksander Kask 1048418741
Processed article: Harald Laksberg 1092962963
Processed article: Jüri Liim 1093563524
Processed article: Jaan Lippmaa 1108623962
Processed article: Aino-Eevi Lukas 1112291314
Processed article: Tiit Matsulevitš 1083062314
Processed article: Friedrich Niggol 1098878036
Processed article: Arvo Niitenberg 1091114797
Processed article: Eha Pärn 1058679187
Processed article: Verner Hans Puurand 1082103275
Processed article: Theodor Rõuk 1114964021
Processed article: Arved Ruusa 1098879983
Proces

Processed article: Charles Cassal 924509169
Processed article: Jacques la Caze 1109449561
Processed article: Max-Théodore Cerfberr 1039643142
Processed article: Robert Chapuis 1107371389
Processed article: Jean Charbonnel 969097582
Processed article: Jean Auguste de Chastenet de Puységur 1098455433
Processed article: Pierre-Jean-Baptiste Chaussard 982872895
Processed article: Henri Philippe de Chauvelin 1067874732
Processed article: Armand Chouffet 1085636232
Processed article: Pierre-Suzanne-Augustin Cochin 1026749529
Processed article: Colette Codaccioni 1014506054
Processed article: Édouard Colbert de Villacerf 1101166764
Processed article: Maurice Colrat 1035898437
Processed article: Honoré Commeurec 1082414528
Processed article: Corinne Bouchoux 1096232063
Processed article: Jean Cotereau 1010731404
Processed article: Paul Cotte 1050652500
Processed article: Georges Coulon 1097830070
Processed article: Michel Craplet 1087935075
Processed article: Jacques Antoine Creuzé-Latouche 10

Processed article: Jean-Marie Rausch 1112230445
Processed article: Jean Reboul 1078446564
Processed article: Marie Roch Louis Reybaud 1033977038
Processed article: René Ribière 1041685164
Processed article: Antoine Richard du Cantal 1014225041
Processed article: Marthe Richard 1099050150
Processed article: Philippe Rio 1104409292
Processed article: Jean Gabriel Maurice Rocques 1084299616
Processed article: Joseph Romain-Desfossés 1104367647
Processed article: José Rossi 1057534232
Processed article: Sandrine Rousseau 1114415628
Processed article: Alexandre Rousselin de Saint-Albin 1084300380
Processed article: Ernest de Royer 1097929651
Processed article: Armand Charles Emmanuel Guignard, comte de Saint-Priest 1100727957
Processed article: Jean Sauvage 1026702472
Processed article: Jean-Baptiste Say 1112994737
Processed article: Lucile Schmid 1114853737
Processed article: Michel Souplet 1102390019
Processed article: Anton Carl Ludwig von Tabouillot 968436799
Processed article: Claude T

Processed article: Eduard von Schele zu Schelenburg 785399343
Processed article: Marco Scherbaum 1035698461
Processed article: Paul Schiemann 1088399730
Processed article: Ernst Wilhelm von Schlabrendorf 1001280832
Processed article: Friedrich Gustav Carl Ulrich Franz von Schnehen 971361950
Processed article: Andreas von Schoeler 1100044716
Processed article: Joachim, Count of Schönburg-Glauchau 1107354164
Processed article: Peter Schott 1083058544
Processed article: Günther Schwarz (politician) 1096148976
Processed article: Adam, Count of Schwarzenberg 1081128413
Processed article: Mignon Schwenke 1078055144
Processed article: Veit Ludwig von Seckendorff 1060960076
Processed article: Joachim Siegerist 1114262519
Processed article: Paul Spiegel 1081789630
Processed article: Baldur Springmann 1109697793
Processed article: Ashok-Alexander Sridharan 1093387162
Processed article: Gerhard Stahl 1106125356
Processed article: Karl von Starck 1093387150
Processed article: Gustav Adolf Steengra

Processed article: Otto Pérez Molina 1109584660
Processed article: Samuel Pérez Álvarez 1053436126
Processed article: Guillermo Pineda 1113000081
Processed article: Ramiro Ponce Monroy 1070637606
Processed article: Alfonso Portillo 1108568192
Processed article: Mariano Rivera Paz 1111971102
Processed article: Rafael Spinola 1110719920
Processed article: Eduardo Stein 1106098519
Processed article: Manuel Villacorta 1099232298
Processed article: Najib Roger Accar 1112725055
Processed article: Thierno Aliou 1025501033
Processed article: Mahmoud Bah 1049369237
Processed article: Mamadou Boye Bah 1008440784
Processed article: Fodé Bangoura 1009816783
Processed article: Karim Bangoura 1073546958
Processed article: Mafory Bangoura 1113979706
Processed article: Barry III 1037294196
Processed article: Alpha Oumar Barry 1100306971
Processed article: Diawadou Barry 1112725061
Processed article: Ibrahima Sory Barry 1112725057
Processed article: Édouard Benjamin 1098226724
Processed article: Balla 

Processed article: Lajos Fodor 1064277334
Processed article: Péter Fülöp (diplomat) 1009317517
Processed article: János Fuzik 1112077989
Processed article: József Gera 1074393115
Processed article: János Ghyczy 1051406176
Processed article: István Győrkös 1105290217
Processed article: Miksa Hadik 1017820647
Processed article: Franz Haller 1094144448
Processed article: Anita Heringes 1095011115
Processed article: Ottó Herman 1041794927
Processed article: Sándor Hollán Jr. 1005483763
Processed article: Sándor Hollán Sr. 1002589509
Processed article: István Horthy 1110654449
Processed article: Miklós Horthy Jr. 1112338065
Processed article: Gábor Hraskó 1096973019
Processed article: László Hunyady de Kéthely 1111996748
Processed article: János Imreffy 785980963
Processed article: Győző Istóczy 1105102717
Processed article: István Bajkai 1085761831
Processed article: Miklós Istvánffy 1088210316
Processed article: Oszkár Jászi 1097299407
Processed article: János Jeszenák 1012213335
Processe

Processed article: Girindra Mallik 1111276196
Processed article: Jyotsna Mandi 1098477649
Processed article: Mange Ram Gupta 1103929496
Processed article: K. Manickam 1115077542
Processed article: D. Manjunath 1105767572
Processed article: Jignesh Mevani 1107663464
Processed article: Suresh Chandra Mishra 1111419852
Processed article: Mohammad Anzar Nayeemi 1111786757
Processed article: Karnail Singh Peer Mohammad 1109685474
Processed article: Mohit Mitra 1052818947
Processed article: Ahmed Mohiuddin (politician) 1111421631
Processed article: Anubrata Mondal 1112464248
Processed article: Bina Mondal 1098477898
Processed article: Hucheshwar Gurusidha Mudgal 1034063701
Processed article: Venkatreddy Mudnal 1096051788
Processed article: Mukunda Behari Mullick 1112199012
Processed article: Durga Murmu 1096045507
Processed article: L. Murugan 1115206055
Processed article: Ezhilan Naganathan 1100361526
Processed article: H. Nagesh 1091434535
Processed article: Jamyang Tsering Namgyal 1113284

Processed article: Ngitung 901722698
Processed article: Ramses Ohee 1096782591
Processed article: Panusunan Pasaribu 1107533136
Processed article: Julian Aldrin Pasha 1025723291
Processed article: Poedjono Pranyoto 1101575086
Processed article: Prijono 1085683318
Processed article: Anthony Winza Probowo 1104560182
Processed article: I Gusti Ketut Pudja 1108931157
Processed article: Tedjo Edhy Purdijatno 1113174460
Processed article: Ridwan Suwidi 1073107544
Processed article: Sa'duddin 1098426619
Processed article: Ahmad Ridha Sabana 1108576169
Processed article: Hari Sabarno 1106838713
Processed article: Bambang Santoso 1089472187
Processed article: Sidik Djojosukarto 1096392078
Processed article: Sudi Silalahi 1103540330
Processed article: W. T. P. Simarmata 1100676941
Processed article: Mahadi Sinambela 1106829598
Processed article: Mooryati Soedibyo 1085297344
Processed article: Soerjadi Soedirdja 1077290298
Processed article: Soegiono 1015211738
Processed article: Tjokorda Gde Rak

Processed article: Huda Sajjad Mahmoud Shaker 1089249414
Processed article: Mohammed Haji Mahmoud 1030279163
Processed article: George Mansour 1042992539
Processed article: Tayseer al-Mashhadani 1089275720
Processed article: Fuad Masum 1104749153
Processed article: Muhammad Jamil Al-Mayahi 994019770
Processed article: Wijdan Michael 1089276041
Processed article: Siham Mousa Hamoud Jabr Al Moussawi 1089249339
Processed article: Muhammed Ali Mahmud 1059539650
Processed article: Mawlud Mukhlis 1091139346
Processed article: Mishkat al-Mumin 1099447794
Processed article: Tariq Najm 1102749100
Processed article: Majid al-Nasrawi 1085126794
Processed article: Yahia Nasseri 1107365337
Processed article: Atheel al-Nujaifi 1093222115
Processed article: Saleh al-Ogaili 995654848
Processed article: Ali al-Qaradaghi 1112620841
Processed article: Suhaib al-Rawi 1033783078
Processed article: Ahmed Abu Risha 1115071112
Processed article: Mowaffak al-Rubaie 1110391679
Processed article: Maruf al Rusafi

Processed article: Giovanni Puoti 1085943395
Processed article: Renzo Rabellino 1115307413
Processed article: Ercole Ricotti 1089343827
Processed article: Roberto di Ridolfo 1068433922
Processed article: Italo Righi 1041401611
Processed article: Arnaldo Rivera 1101656316
Processed article: Pellegrino Rossi 1079173570
Processed article: Luca Rovinalti 1068280342
Processed article: Roberto Sajeva 1114890475
Processed article: Francesco Saverio Salfi 1105226555
Processed article: Aurelio Saliceti 1097154675
Processed article: Coluccio Salutati 1067664623
Processed article: Riccardo Sarfatti 1112451184
Processed article: Pandolfo Savelli 934179191
Processed article: Bartolomeo Scala 1086210884
Processed article: Giovanni Scuderi 1113362916
Processed article: Federico Seismit-Doda 1102489715
Processed article: Salvatore Senese 1108537931
Processed article: Piero Soderini 1114531110
Processed article: Giovanni Battista Sommariva 1113076270
Processed article: Giorgio Sonnino 1114835603
Proces

Processed article: Haifa Najjar 1112863302
Processed article: Jumah Hammad 1109066062
Processed article: Mahmoud Hanandeh 1104352074
Processed article: Ibrahim Jazi 1112864912
Processed article: Jamal Muhammad Abidat 1065825467
Processed article: Fakhri Kawar 1072944828
Processed article: Awn Al-Khasawneh 1081795927
Processed article: Bisher Al-Khasawneh 1112804963
Processed article: Rashed Al-Khuzai 1114418968
Processed article: Samih al-Maaytah 1095322195
Processed article: Kamel Mahadin 1104331361
Processed article: Ahmad Masa'deh 1049251051
Processed article: Mazin Abdellah Hilal Al Farrayeh 1112864717
Processed article: Mohammad Salameh Al Nabulsi 1111980637
Processed article: Samir Murad 1061306891
Processed article: Nayef Steitieh 1112864606
Processed article: Samir Rifai 1090813858
Processed article: Sharaf bin Rajeh 1114705527
Processed article: Laith Shubeilat 1017482477
Processed article: Muhammad Suheimat 1043197188
Processed article: Alia Abu Tayeh 1112274605
Processed art

Processed article: Kang Nung-su 987463816
Processed article: Kang Phyo-yong 1106858161
Processed article: Kang Tong-yun 1111471471
Processed article: Kang Yong-sop 1111471503
Processed article: Kim Chon-hae 1011882896
Processed article: Kim Il-chol 1110841986
Processed article: Kim Pong-chol 987289456
Processed article: Kim Rak-gyom 1078662180
Processed article: Kim Ung 1111191028
Processed article: Kim Yong-ju 1100241392
Processed article: O Sang-hon 901843863
Processed article: Pak Chun-hwa 994400474
Processed article: Pak Myong-chol 1070146029
Processed article: Pak Yong-il 1111339203
Processed article: Pak Yong-sun (politician) 1067086108
Processed article: Ri Hi-yong 1092539237
Processed article: Ri Man-gon 1070145508
Processed article: Ri Pyong-chol 1098010868
Processed article: Ri Yong-mu 1091894749
Processed article: Kang Sun-nam 0
Processed article: Sung Jusik 1028731483
Processed article: Yang Bin (businessman) 1061877239
Processed article: Yun Jong-rin 1001811053
Processed a

Processed article: Bajram Hasani 1068907924
Processed article: Adem Hodža 1089913595
Processed article: Adrijana Hodžić 1059619919
Processed article: Oliver Ivanović 1110781072
Processed article: Ragip Jashari 933663830
Processed article: Elbert Krasniqi 1100006829
Processed article: Rrahman Morina 1047859449
Processed article: Hasan Prishtina 1114419215
Processed article: Gazmend Pula 1113326325
Processed article: Goran Rakić 1105188963
Processed article: Nenad Rašić 1041150203
Processed article: Selim Selimi 1102497560
Processed article: Igor Simić 1110535502
Processed article: Rexhai Surroi 1034004343
Processed article: Rada Trajković 1109175174
Processed article: Visar Ymeri 1036757024
Processed article: Abdulghaphor Hajjieh 1097091048
Processed article: Alaa Hussein Ali 1110198052
Processed article: Ali Hussain Al-Awadhi 994095734
Processed article: Saad Bin Tefla 1098295980
Processed article: Haitham al-Ghais 1104873540
Processed article: Ali Mohammed Thunayan Al-Ghanim 111440513

Processed article: Abdullah Naker 1064641845
Processed article: Aref Ali Nayed 1099167616
Processed article: Abdel Wahab Qaid 1077575299
Processed article: Ali Sahli 997716381
Processed article: Bashir Saadawi 1097982743
Processed article: Fayez al-Sarraj 1113620301
Processed article: Mohammed El Senussi 1112955623
Processed article: Abdulrahman Sewehli 1041934642
Processed article: Ashour Suleiman Shuwail 1062396177
Processed article: Mohamed Taha Siala 1040410236
Processed article: Alfons Goop 1030085950
Processed article: Peter Kaiser (historian) 1086304418
Processed article: Aleksandras Ambrazevičius 1055137919
Processed article: Leonas Apšega 1055138262
Processed article: Mykolas Arlauskas 1102542977
Processed article: Rimantas Astrauskas 1055138314
Processed article: Adomas Ąžuolas Audickas 1034971366
Processed article: Edminas Bagdonas 1107087749
Processed article: Zbignev Balcevič 1067686942
Processed article: Vilius Baldišis 1055138352
Processed article: Vladimiras Beriozovas 

Processed article: Assaffal P. Alian 1099124313
Processed article: Awang Tengah Ali Hasan 1113182818
Processed article: Aznel Ibrahim 1110533050
Processed article: R. G. Balan 1110533095
Processed article: Abdul Aziz bin Husain 971530690
Processed article: Chai Kim Sen 1106368685
Processed article: Chen Tien 1031735962
Processed article: Chin Peng 1107270533
Processed article: Chong Ted Tsiung 1108544226
Processed article: Chong Zhemin 1095389334
Processed article: Lily Eberwein 1108044673
Processed article: Ellron Alfred Angin 1113511050
Processed article: Eu Chooi Yip 1070490957
Processed article: Shamsiah Fakeh 1101748227
Processed article: Fong Chong Pik 1001009224
Processed article: Ghulamhaidar Khan 1075437508
Processed article: Gopalakrishnan Subramaniam 1110053029
Processed article: Guan Dee Koh Hoi 1100039608
('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))


TypeError: 'NoneType' object is not subscriptable

In [39]:
def read_json(filename):
    data = {}
    with open(filename, "r") as f:
        data = json.loads(f.read())
    
    return data

In [None]:
ARTICLE_REVISIONS = read_json("../data/output/article_revisions.json")
ARTICLE_QUALITY = {}
for ARTICLE in ARTICLE_REVISIONS:
    score = request_ores_score_per_article(ARTICLE_REVISIONS[ARTICLE])
    obj = score['enwiki']['scores']
    info_key = list(obj.keys())[0]
    quality = score['enwiki']['scores'][info_key]['articlequality']['score']['prediction']
    ARTICLE_QUALITY.update({ARTICLE:quality})
    print("Processed article:", ARTICLE, "rating:", quality)
outputToJson("../data/output/article_quality", ARTICLE_QUALITY)
print('Completed')

In [None]:
[3:50 PM] Harry McNinson
ARTICLE_QUALITY = read_json("../data/output/article_quality.json")
DF_ARTICLE_QUALITY = pd.DataFrame(ARTICLE_QUALITY.items(), columns=['article_title', 'article_quality'])

ARTICLE_REVISIONS = read_json("../data/output/article_revisions.json")
DF_ARTICLE_REVISIONS = pd.DataFrame(ARTICLE_REVISIONS.items(), columns=['article_title', 'revision_Id'])


In [None]:
df_revision_quality = pd.merge(DF_ARTICLE_REVISIONS,DF_ARTICLE_QUALITY, how='left', on='article_title')

In [None]:
politicians_by_country.rename(columns = {'name':'article_title'}, inplace = True)
politicians_by_country

In [None]:
df = df_revision_quality.merge(politicians_by_country[['article_title', 'country']], how='left', on="article_title")

In [None]:
df = df_mergedwith_country.merge(population_by_country[['country', 'region', 'population']], how='left', on="country")

In [None]:
df.to_csv("../data/output/wp_politicians_by_country.csv", encoding='utf-8', index=False)