In [1]:
import spacy
import pandas as pd
import time
import numpy as np
import re
import json
import sys
import requests

In [2]:
spacy.__version__

'2.3.5'

In [3]:
import sqlite3
from typing import List, Optional

class WikiMapper:
    """ Uses a precomputed database created by `create_wikipedia_wikidata_mapping_db`. """

    def __init__(self, path_to_db: str):
        self._path_to_db = path_to_db

    def title_to_id(self, page_title: str) -> Optional[str]:
        """ Given a Wikipedia page title, returns the corresponding Wikidata ID.
        The page title is the last part of a Wikipedia url **unescaped** and spaces
        replaced by underscores , e.g. for `https://en.wikipedia.org/wiki/Fermat%27s_Last_Theorem`,
        the title would be `Fermat's_Last_Theorem`.
        Args:
            page_title: The page title of the Wikipedia entry, e.g. `Manatee`.
        Returns:
            Optional[str]: If a mapping could be found for `wiki_page_title`, then return
                           it, else return `None`.
        """

        with sqlite3.connect(self._path_to_db) as conn:
            c = conn.cursor()
            c.execute("SELECT wikidata_id FROM mapping WHERE wikipedia_title=?", (page_title,))
            result = c.fetchone()

        if result is not None and result[0] is not None:
            return result[0]
        else:
            return None

    def url_to_id(self, wiki_url: str) -> Optional[str]:
        """ Given an URL to a Wikipedia page, returns the corresponding Wikidata ID.
        This is just a convenience function. It is not checked whether the index and
        URL are from the same dump.
        Args:
            wiki_url: The URL to a Wikipedia entry.
        Returns:
            Optional[str]: If a mapping could be found for `wiki_url`, then return
                           it, else return `None`.
        """

        title = wiki_url.rsplit("/", 1)[-1]
        return self.title_to_id(title)

    def id_to_titles(self, wikidata_id: str) -> List[str]:
        """ Given a Wikidata ID, return a list of corresponding pages that are linked to it.
        Due to redirects, the mapping from Wikidata ID to Wikipedia title is not unique.
        Args:
            wikidata_id (str): The Wikidata ID to map, e.g. `Q42797`.
        Returns:
            List[str]: A list of Wikipedia pages that are linked to this Wikidata ID.
        """

        with sqlite3.connect(self._path_to_db) as conn:
            c = conn.cursor()
            c.execute(
                "SELECT DISTINCT wikipedia_title FROM mapping WHERE wikidata_id =?", (wikidata_id,)
            )
            results = c.fetchall()

        return [e[0] for e in results]

In [4]:
nlp = spacy.load('en_core_web_md')
## require spacy==2.3.5

In [5]:
with open('politician_data.json') as f:
    wikidata = json.load(f)

IDs = []
for politician in wikidata:
    IDs.append(politician['id'])
IDs = set(IDs) # list of politicians covered!

In [6]:
df = pd.read_csv("processed2.csv", index_col='id')
mapper = WikiMapper("index_enwiki-20190420.db")

In [7]:
def return_NEL(text):
    IP_ADDRESS = "https://rel.cs.ru.nl/api"
    document = {
    "text": text,
    "spans": []
    }
    API_result = requests.post("{}".format(IP_ADDRESS), json=document).json()
    named = []
    for result in API_result:
        if result[-1] == 'PER':
            if result[5] > result[4]:
                q_id = mapper.title_to_id(result[3])
                if q_id in IDs:
                    named.append(q_id)
            else:
                q_id = mapper.title_to_id(result[2])
                if q_id in IDs:
                    named.append(q_id)
    return(list(set(named)))

In [10]:
df.columns

Index(['test1'], dtype='object')

In [15]:

IP_ADDRESS = "https://rel.cs.ru.nl/api"


In [11]:
df['NELs'] = df['test1'].map(return_NEL)
df = df[~df.NELs.str.len().eq(0)]
df = df.explode('NELs')

KeyboardInterrupt: 

In [11]:
mapper = WikiMapper("index_enwiki-20190420.db")
IP_ADDRESS = "https://rel.cs.ru.nl/api"

for i in range(100):
    print(df.test1.iloc[i])
    document = {
    "text": df.test1.iloc[i],
    "spans": []
    }
    API_result = requests.post("{}".format(IP_ADDRESS), json=document).json()
    named = []
    for result in API_result:
        if result[-1] == 'PER':
            if result[5] > result[4]:
                q_id = mapper.title_to_id(result[3])
                if q_id in IDs:
                    named.append(q_id)
            else:
                q_id = mapper.title_to_id(result[2])
                if q_id in IDs:
                    named.append(q_id)
    print("---------------------")

Are coal profits up, though? The industry is mortally wounded no matter what Trump does.
---------------------
I can already hear the phantom "looks."
---------------------
How is laying physical infrastructure, be it roads, telephone lines, or fiber - advocating for corporate welfare? It isn't like saying "Let's subsidize Comcast, lay the line, and beg for them to come." If you build it, various people will come.
---------------------
Ah the good times of our long forgotten history (2012) when the 2AM Phone Call was an actual campaign issue and people actually considered how a candidate might handle an actual campaign issue. 
---------------------
Oh come on, you exaggerate the all or nothing but. Warren doesn’t have a 13 minute video of Warren lying floating through the front page. Warren was a corrupt candidate plain and simple, we can’t blame the voters for the DNC’s fuck up. 

Warren actually has a bit of bark behind Warren. Warren shortcomings mean nothing in a race against Incum

Q22686
Q539493
Q715156
Q22686
Q1124
Q715156
Q22686
Q22686
---------------------
I'm almost with you. I'm close but not there yet. And I'm American where the patriots care more about private property than private individuals. Especially worrying since those same bootlicker's have all the guns
---------------------
Donald Trump lacks the skill set to do anything at all
Q22686
---------------------
Yes you should be writing I could have written it off off
---------------------
Are you disagreeing with my point?  Did you read what I said?

They need to remember this is a possible homicide situation, not a confirmed fact. When They arrive They need to take a moment to assess the situation and not shoot the first person They see.
---------------------
How are things going for those fellows now?
---------------------
No problem, hope No problem didn’t come across maliciously. 
---------------------
Planes don't crash in 2017

"I made it happen!"

Trains crash in 2017

"It was Obama!"

How pat

---------------------
This one?
URL
---------------------
Hypothetically, how do you think the word would look if The democratic party consistently refuses to admit fault, refuses to reach across the aisle, and refuses to listen to facts and evidence? Is it even possible in your mind that sometimes only one side is wrong?
---------------------
Though only because James Mattis for Secretary of Defense coincidentally had the coolest nickname.
---------------------
this
---------------------
Congrats, Senator Romney.
---------------------
I thought it was about double that. 
---------------------
That's incredibly naive
---------------------
Uhh Nationalism isn't the key ingredient to fascism, lol. Ghandi and Churchill were nationalists.

Fascism: 
 a political philosophy, movement, or regime (such as that of the Fascisti) that exalts nation and often race above the individual and that stands for a centralized autocratic government headed by a dictatorial leader, severe economic and socia

KeyboardInterrupt: 

In [8]:
result[6]

'PER'

In [21]:
mapper = WikiMapper("index_enwiki-latest.db")

mapper.title_to_id("Python_(programming_language)")

OperationalError: no such table: mapping

In [40]:
# Falcon 2.0

IP_ADDRESS = "https://labs.tib.eu/falcon/falcon2/api?mode=long"

for i in range(6,100):
    print(df.test1.iloc[i])
    document = {
    "text": df.test1.iloc[i]
    }
    
    try:
        API_result = requests.post("{}".format(IP_ADDRESS), json=document).json()
        for result in API_result["entities_wikidata"]:
            code = result[0].rsplit("/",1)[-1][:-1]
            if code in IDs:
                print(code)
    except ValueError:  # includes simplejson.decoder.JSONDecodeError
        print("API Failure")
    print("---------------------------------------------")

I just don't like Clinton and I haven't liked Clinton for over 20 years. 

It didn't matter who Clinton went up against, they were still going to get my vote. I just didn't want to vote for Clinton. The republicans could have put anyone in trumps place and it wouldn't have changed my decision. Clinton is that unlikable with many people. That is part of the reason that the democrats lost. the democrats acted as if the democrats never even had to run and that my vote wasn't my vote. Clinton doesn't automatically get my vote and when Clinton supporters kick and scream about my vote, my vote only makes me want to vote for Clinton less
API Failure
---------------------------------------------
Maybe but it seems like a hassle. I don't really pay that much in taxes anyway since my only income is what I received for studying. 
---------------------------------------------
Next in line after Hatch is Thad Cochran of Mississippi. If you recognize that name, it's because he's been in thre news fo

---------------------------------------------
Donald Trump lacks the skill set to do anything at all
Q22686
---------------------------------------------
Yes you should be writing I could have written it off off
---------------------------------------------
Are you disagreeing with my point?  Did you read what I said?

They need to remember this is a possible homicide situation, not a confirmed fact. When They arrive They need to take a moment to assess the situation and not shoot the first person They see.
---------------------------------------------
How are things going for those fellows now?
---------------------------------------------
No problem, hope No problem didn’t come across maliciously. 
---------------------------------------------
Planes don't crash in 2017

"I made it happen!"

Trains crash in 2017

"It was Obama!"

How pathetically lacking in any kind of integrity to give themselves credit for shit themselves had nothing to do with and how pathetically desparate must s

---------------------------------------------
First our police didnt break into this guys home. 

Second, what the hell are you talking about? Lets say someone is breaking into your home, you call the police, but the police say "fuck off I think this is a prank." the police HAVE to respond, and the police HAVE to treat everything as a serious call because then if the police dont and something bad does happen because the police ignored it then what the point of police?
---------------------------------------------
This one?
URL
---------------------------------------------
Hypothetically, how do you think the word would look if The democratic party consistently refuses to admit fault, refuses to reach across the aisle, and refuses to listen to facts and evidence? Is it even possible in your mind that sometimes only one side is wrong?
---------------------------------------------
Though only because James Mattis for Secretary of Defense coincidentally had the coolest nickname.
----------

API Failure
---------------------------------------------
Ugh. Ok buddy, have a point and pretend you did something.
---------------------------------------------
Sikh decision! 
---------------------------------------------
As someone quite familiar with the industry and the process of H1-B, here's how to fix H1-B without hurting citizens, the job market, the applicants, or the companies who need expert candidates to grow:

1) Allow H1-B applicants to switch jobs after 3mo of start to any company who is willing to pay a higher salary without requiring any paperwork from the new employer. The applicant simply logs on to USCIS and changes The applicant employer name and new salary. This immediately gets rid of the IT sweatshops that force applicants to work 80-hour weeks under the table. This also removes the wage-depressing aspect of the visas. Companies that hire candidates for candidates genuine expertise and treat candidates well should not have any issue with this.

2) For each app

In [32]:
API_result["entities_wikidata"][0][0].rsplit("/",1)[-1][:-1]

'Q4294777'

In [41]:
for i in range(100):
    print(df.test1.iloc[i])
    print("------------------------------")

Are coal profits up, though? The industry is mortally wounded no matter what Trump does.
------------------------------
I can already hear the phantom "looks."
------------------------------
How is laying physical infrastructure, be it roads, telephone lines, or fiber - advocating for corporate welfare? It isn't like saying "Let's subsidize Comcast, lay the line, and beg for them to come." If you build it, various people will come.
------------------------------
Ah the good times of our long forgotten history (2012) when the 2AM Phone Call was an actual campaign issue and people actually considered how a candidate might handle an actual campaign issue. 
------------------------------
Oh come on, you exaggerate the all or nothing but. Warren doesn’t have a 13 minute video of Warren lying floating through the front page. Warren was a corrupt candidate plain and simple, we can’t blame the voters for the DNC’s fuck up. 

Warren actually has a bit of bark behind Warren. Warren shortcomings 