In [5]:
from textblob import TextBlob
from textblob import translate
from textblob.exceptions import NotTranslated
import logging
import json
from utils.extract import db_connection, download_data
from tqdm import tqdm_notebook as tqdm
import multiprocessing as mp
from concurrent.futures import ThreadPoolExecutor
import threading

In [6]:
logging.getLogger().setLevel(logging.DEBUG)

In [7]:
def get_tweets_db(creds):
    """
    Saca los tweets de la base y guarda su sentiment

    Args:
        df(Dataframe): DataFrame con datos a subir a la base de datos
        creds(dict): Diccionario con las credenciales de la base de datos
    """
    logging.info('*** Descargando tweets ***')
    
    try:
        query = """SELECT id, tweet 
                   from tweets
                   where polarity is NULL
                   limit 100000;"""
        conn = db_connection(creds)
        data = download_data(conn, query)
    except Exception as e:
        logging.error(e)
    
    return data

In [8]:
def get_sentiment(dict_tweet):
    
    text = TextBlob(dict_tweet['text'])
    text_polarity = text.polarity
    text_subjec = text.subjectivity
    identif = dict_tweet['id']
    
    return identif, text_polarity, text_subjec

In [9]:
def save_sentiment(list_record, creds):
    query = """INSERT INTO temp_tweets (id, polarity, subjectivity) VALUES {};""".format(list(list_record)).replace('[','').replace(']','')
    try:
        conn = db_connection(creds)
        download_data(conn, query)    
    except Exception as error:
        logging.error(error)
    

In [None]:
def update_db(creds):
    query = """
            UPDATE tweets
            SET   (polarity,   subjectivity) = (a.polarity, a.subjectivity)
            FROM   temp_tweets as a
            WHERE  a.id = tweets.id;
            """
    try:
        conn = db_connection(creds)
        download_data(conn, query)    
    except Exception as error:
        logging.error(error)

In [10]:
creds_file = 'creds.txt'
with open(creds_file, encoding='utf-8') as data_file:
            creds = json.loads(data_file.read())

In [12]:
data_tweets = ['a']

while len(data_tweets)>0:
    sentiment_results = []
    data_tweets = get_tweets_db(creds)
    
    data_tweets.columns = ['id','text']
    dict_tweets = data_tweets.to_dict('records')
    logging.info('*** Obteniendo sentiment de {} tweets'.format(len(dict_tweets)))
    
    sentiment_results = list(map(get_sentiment, dict_tweets))
    save_sentiment(sentiment_results, creds)

INFO:root:*** Descargando tweets ***
INFO:root:Nueva conexión a base: investing_data


HBox(children=(IntProgress(value=0, max=100000), HTML(value='')))

INFO:root:*** Obteniendo sentiment de 100000 tweets
INFO:root:Nueva conexión a base: investing_data
INFO:root:*** Descargando tweets ***
INFO:root:Nueva conexión a base: investing_data


HBox(children=(IntProgress(value=0, max=100000), HTML(value='')))

INFO:root:*** Obteniendo sentiment de 100000 tweets
INFO:root:Nueva conexión a base: investing_data
INFO:root:*** Descargando tweets ***
INFO:root:Nueva conexión a base: investing_data


HBox(children=(IntProgress(value=0, max=100000), HTML(value='')))

INFO:root:*** Obteniendo sentiment de 100000 tweets
INFO:root:Nueva conexión a base: investing_data
INFO:root:*** Descargando tweets ***
INFO:root:Nueva conexión a base: investing_data


HBox(children=(IntProgress(value=0, max=100000), HTML(value='')))

INFO:root:*** Obteniendo sentiment de 100000 tweets


KeyboardInterrupt: 