# Sample Sentiment Analysis

In [1]:
!pip install nest_asyncio
from logging.handlers import TimedRotatingFileHandler
import logging.config
from psenti import SentimentAnalysis, SentimentConnection, Document
from sklearn import metrics
import socket

user_name = socket.gethostname()
host = 'sentiment2.wikiled.com'
port = 80

import nest_asyncio
import asyncio
nest_asyncio.apply(loop=asyncio.get_event_loop())

logger = logging.getLogger('JupyterUI')
logFormatter = logging.Formatter('%(asctime)s - [%(thread)s] [%(threadName)s]- %(name)s - %(levelname)s - %(message)s')
logger.setLevel(logging.DEBUG)

console = logging.StreamHandler()
console.setFormatter(logFormatter)
console.setLevel(logging.INFO)

logger.addHandler(console)



In [2]:
%%time

connection = SentimentConnection(host=host, port=port, client_id=user_name)
logger.info(f'Supported domains')
for domain in connection.supported_domains:    
    logger.info(f'Domain: [{domain}]')

2020-08-18 13:22:17,287 - [139992959448896] [MainThread]- JupyterUI - INFO - Supported domains
2020-08-18 13:22:17,289 - [139992959448896] [MainThread]- JupyterUI - INFO - Domain: [TwitterMarket]
2020-08-18 13:22:17,291 - [139992959448896] [MainThread]- JupyterUI - INFO - Domain: [TwitterTrump]
2020-08-18 13:22:17,293 - [139992959448896] [MainThread]- JupyterUI - INFO - Domain: [medical]
2020-08-18 13:22:17,296 - [139992959448896] [MainThread]- JupyterUI - INFO - Domain: [market]


CPU times: user 29.2 ms, sys: 8.62 ms, total: 37.8 ms
Wall time: 823 ms


# Training Model

## Define Test routine

In [3]:
def test_sentiment(test_doc, model=None):
    logger.info(f'Using {len(test_doc)} test documents...')
    
    analysis = SentimentAnalysis(connection, model=model, clean=True)
    
    results = []
    detected_document_class = {}
    analysis.on_message.subscribe(lambda result: results.append(result))
    analysis.detect_sentiment(test_doc)

    for result in results:
        stars = result['Stars']    
        id  = result['Id']
        detected_document_class[id] = stars is not None and stars > 3

    logger.info(f'Total processed documents: {len(detected_document_class)}')

    test_y = [document.IsPositive for document in test_doc]
    result_y = [detected_document_class[document.Id] for document in test_doc]
    vacc = metrics.accuracy_score(test_y, result_y)
    logger.info(f'Accuracy: {vacc:1.2f}')

## Load Amazon reviews

In [4]:
%%time
from sklearn.model_selection import train_test_split

id = 0 
all_amazon_documents = []

with open('../data/amazon/positive.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(line, id)
        doc.IsPositive = True
        all_amazon_documents.append(doc)        
        id += 1
    
with open('../data/amazon/negative.txt', "r", encoding='utf8') as reader:
    for line in reader:
        doc = Document(line, id)    
        doc.IsPositive = False
        all_amazon_documents.append(doc)
        id += 1
    
train_doc, test_doc = train_test_split(all_amazon_documents, test_size=0.3)

CPU times: user 23.3 ms, sys: 16.3 ms, total: 39.6 ms
Wall time: 210 ms


## Testing with default model

In [5]:
test_sentiment(test_doc)

2020-08-18 13:22:24,627 - [139992959448896] [MainThread]- JupyterUI - INFO - Using 600 test documents...
Processing failed
Traceback (most recent call last):
  File "/home/prachi/anaconda3/lib/python3.7/asyncio/tasks.py", line 251, in __step
    result = coro.throw(exc)
  File "/home/prachi/anaconda3/lib/python3.7/site-packages/websockets-8.1-py3.7-linux-x86_64.egg/websockets/protocol.py", line 827, in transfer_data
    message = await self.read_message()
  File "/home/prachi/anaconda3/lib/python3.7/site-packages/websockets-8.1-py3.7-linux-x86_64.egg/websockets/protocol.py", line 895, in read_message
    frame = await self.read_data_frame(max_size=self.max_size)
  File "/home/prachi/anaconda3/lib/python3.7/site-packages/websockets-8.1-py3.7-linux-x86_64.egg/websockets/protocol.py", line 971, in read_data_frame
    frame = await self.read_frame(max_size)
  File "/home/prachi/anaconda3/lib/python3.7/site-packages/websockets-8.1-py3.7-linux-x86_64.egg/websockets/protocol.py", line 1051, i

KeyError: '1561'

## Training Sentiment Analysis model

In [None]:
%%time

connection.delete_documents('Amazon2')
connection.save_documents('Amazon2', train_doc)
analysis = SentimentAnalysis(connection, clean=True)
analysis.train('Amazon2')

## Testing with trained model

In [None]:
%%time
test_sentiment(test_doc, 'Amazon2')