In [1]:
from joblib import Parallel, delayed
import multiprocessing as mp
import pandas as pd
import random
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer

In [2]:
sample = pd.read_csv('../data/hand_labeled_examples_short.csv', encoding='ISO-8859-1')

In [3]:
%%time
# Analyze sentiments in parallel
def process_sentiment(tweet):
    return TextBlob(tweet, analyzer=NaiveBayesAnalyzer()).sentiment[0]

n_cores = mp.cpu_count()
sentiments = Parallel(n_jobs=n_cores, verbose=2)(delayed(process_sentiment)(tweet) for tweet in sample['text'])

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:   35.2s


CPU times: user 246 ms, sys: 81.1 ms, total: 327 ms
Wall time: 1min 51s


[Parallel(n_jobs=8)]: Done 104 out of 104 | elapsed:  1.9min finished


In [4]:
sample['pred']=[1 if sent=='neg' else 3 if sent=='pos' else 2 for sent in sentiments]

In [5]:
accuracy_baseline = 1.0 * sum(sample['sentiment']==2) / len(sample)
accuracy_model = 1.0 * sum(sample['sentiment']==sample['pred']) / len(sample)

In [6]:
print('Accuracy of {:.2%} when predicting all positives.'.format(accuracy_baseline))
print('Model accuracy of {:.2%}'.format(accuracy_model))

Accuracy of 47.12% when predicting all positives.
Model accuracy of 26.92%


In [7]:
%%time
# Repeat for pattern analyzer
# Analyze sentiments in parallel
def process_sentiment(tweet):
    return TextBlob(tweet).sentiment.polarity

n_cores = mp.cpu_count()
sentiments = Parallel(n_jobs=n_cores, verbose=2)(delayed(process_sentiment)(tweet) for tweet in sample['text'])

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.


CPU times: user 137 ms, sys: 60.4 ms, total: 198 ms
Wall time: 2.04 s


[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    1.7s
[Parallel(n_jobs=8)]: Done 104 out of 104 | elapsed:    1.8s finished


In [8]:
sample['pred']=[0 if sent<0.5 else 2 if sent>=0.5 else 1 for sent in sentiments]

In [9]:
accuracy_baseline = 1.0 * sum(sample['sentiment']==2) / len(sample)
accuracy_model = 1.0 * sum(sample['sentiment']==sample['pred']) / len(sample)

In [10]:
print('Accuracy of {:.2%} when predicting all positives.'.format(accuracy_baseline))
print('Model accuracy of {:.2%}'.format(accuracy_model))

Accuracy of 47.12% when predicting all positives.
Model accuracy of 5.77%
