In [3]:
from pyvis import network as net
import networkx as nx 
from google.cloud import bigquery
import pandas as pd
from collections import namedtuple
import numpy as np

# Example of a graph

In [7]:
g = net.Network(notebook=True)
nxg = nx.complete_graph(5)
g.from_nx(nxg)
g.show("example.html")

# Query to de BQ

In [4]:
client = bigquery.Client()

In [9]:

sql=""" 
(
SELECT
    distinct item_id,
    title,
    date as fecha,
    text_content,
    'Semana' AS source
  FROM
    `servisentimen-servipolitics.news_scrapping.semana`
  WHERE
    REGEXP_CONTAINS(LOWER(text_content),
      r'santrich') )
  UNION ALL (
  SELECT
    distinct item_id, 
    title,
    cast(date as DATE) as fecha,
    text_content,
    'El Tiempo' AS source
  FROM
    `servisentimen-servipolitics.news_scrapping.el_tiempo`
  WHERE
    REGEXP_CONTAINS(LOWER(text_content),
      r'santrich') )
  UNION ALL (
  SELECT
    distinct item_id, 
    title,
    date as fecha,
    text_content,
    'El Espectador' AS source
  FROM
    `servisentimen-servipolitics.news_scrapping.el_espectador`
  WHERE
    REGEXP_CONTAINS(LOWER(text_content),
      r'santrich') 
)
"""
df = client.query(sql).to_dataframe()
df.head()

Unnamed: 0,item_id,title,fecha,text_content,source
0,873321,Iván Duque calificó de burrada la declaración ...,2019-07-29,""":""Es una burrada: Duque sobre declaración de ...",El Espectador
1,873299,"""Iván Márquez y Jesús Santrich son bienvenidos...",2019-07-28,""":""Iván Márquez y Jesús Santrich son bienvenid...",El Espectador
2,867099,"""Duque debe limitar su relaciÃ³n con Uribe"": T...",2019-06-21,""":""u0022Duque debe limitar su relación con Uri...",El Espectador
3,868066,Indagatoria contra JesÃºs Santrich por narcotr...,2019-06-27,""":""Indagatoria contra Jesús Santrich por narco...",El Espectador
4,868471,Otra versiÃ³n de la renuncia del exfiscal gene...,2019-06-29,""":""Otra versión de la renuncia del exfiscal ge...",El Espectador


# NLP

## 1. Defining thr functions used for NER

### 1.1 Calling the API

In [10]:
import six
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types
import sys
text = df["text_content"][0]

def getsentimental(text):
    client = language.LanguageServiceClient()

    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')

    document = types.Document(
        content=text.encode('utf-8'),
        type=enums.Document.Type.PLAIN_TEXT)

    # Detect and send native Python encoding to receive correct word offsets.
    encoding = enums.EncodingType.UTF32
    if sys.maxunicode == 65535:
        encoding = enums.EncodingType.UTF16

    result = client.analyze_entity_sentiment(document, encoding)

    return result.entities


In [11]:
s = getsentimental(text)

### 1.2 Creating the json

In [12]:
def jsonit(result):
    listdict = []
    for entity in result:
        dictd = {}
        dictd["name"] = entity.name
        dictd["type"] = entity.type
        dictd["Salience"] = entity.salience
        mentionss = []
        for mention in entity.mentions:
            dict_m = {}
            dict_m["Content"] = mention.text.content
            dict_m["Magnitude"] = mention.sentiment.magnitude
            dict_m["Sentiment"] = mention.sentiment.score
            dict_m["Salience"] = entity.salience
            mentionss.append(dict_m)
        dictd["mentions"] = mentionss
        listdict.append(dictd)
    return listdict

#### Clean de dataframe

In [46]:
dc = df.drop_duplicates(subset="item_id")

### Apply the NLP and make it a JSON (why did google create these weird classes is beyond my comprehension)

In [76]:
listarticles = [] 
articlent = namedtuple('articlent', 'item_id title source fecha listNER')
for article in  dc.itertuples():
    text = article.text_content
    result = getsentimental(text)
    listNER = jsonit(result)
    anarticle = articlent(article.item_id, article.title, article.source, article.fecha, listNER)
    listarticles.append(anarticle)

### list to create the dataframe

In [133]:
pre_df  = []
for ent in listarticles:
    mentions = ent.listNER 
    name = ent.title
    item_id = ent.item_id
    source = ent.source
    fecha = ent.fecha
    for mention in mentions:
        Salienceg = mention["Salience"]
        types = mention["type"]
        name = mention["name"]
        men = mention['mentions']
        for mentis in men:
            Magnitude = mentis['Magnitude']
            Saliencei = mentis['Salience']
            Sentiment = mentis['Sentiment']
            tup = (name, item_id, source, fecha, Salienceg, types, name, Magnitude, Saliencei, Sentiment)
            pre_df.append(tup)
col = ["nameg", 
       "item_id", 
       "source", 
        "fecha", 
        "Salienceg", 
        "types", 
        "namei", 
        "Magnitude", 
        "Saliencei", 
        "Sentiment"
          ]
dfObj = pd.DataFrame(pre_df , columns=col)

### Delete sentiment = 0 

In [165]:
df = dfObj[dfObj["Sentiment"] != 0.0]

### Upload to BQ

In [166]:
full_table_id = 'NER.santrich'
project_id = 'servisentimen-servipolitics'
df.to_gbq(full_table_id, project_id=project_id)

1it [00:00,  6.11it/s]


In [9]:
client = bigquery.Client()

sql=""" 
SELECT AVG(Sentiment) as Sentiment, AVG(Magnitude) as Magnitude, namei
FROM( 
SELECT *
FROM `servisentimen-servipolitics.NER.santrich`
where types = 1
)
GROUP BY namei 
"""

dfs = client.query(sql).to_dataframe()
dfs.head()

Unnamed: 0,Sentiment,Magnitude,namei
0,-1.490116e-09,0.2,jefe
1,-0.09818182,0.192727,presidente
2,0.2,0.325,presidenta
3,0.2333333,0.3,Max Flórez
4,0.4,0.4,Felipe Córdoba


### Testing if I can modify the size of the graph

In [36]:
dfs["sentiment"] = dfs["Sentiment"] + 1
dfs["sentiment"] = np.absolute(np.log(dfs["sentiment"])*100)

In [37]:
dfs

df = dfs[:50]

G = nx.Graph()
for index,row in df.iterrows():
    G.add_node(row["namei"], value=row["sentiment"])
edg = [(x,"Santrich") for x in list(df['namei'])]
G.add_edges_from(edg)
g = net.Network(notebook=True)
nxg = nx.complete_graph(5)
g.from_nx(G)
g.show("example.html")