In [25]:
from joblib import Parallel, delayed
import multiprocessing as mp
import pandas as pd
import random
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer

In [None]:
sample = pd.read_csv('../data/hand_labeled_examples_short.csv', encoding='ISO-8859-1')

In [None]:
%%time
# Analyze sentiments in parallel
def process_sentiment(tweet):
    return TextBlob(tweet, analyzer=NaiveBayesAnalyzer()).sentiment[0]

n_cores = mp.cpu_count()
sentiments = Parallel(n_jobs=n_cores, verbose=2)(delayed(process_sentiment)(tweet) for tweet in sample['text'])

In [None]:
sample['pred']=[1 if sent=='neg' else 3 if sent=='pos' else 2 for sent in sentiments]

In [20]:
accuracy_baseline = 1.0 * sum(sample['sentiment']==2) / len(sample)
accuracy_model = 1.0 * sum(sample['sentiment']==sample['pred']) / len(sample)

In [21]:
print('Accuracy of {:.2%} when predicting all positives.'.format(accuracy_baseline))
print('Model accuracy of {:.2%}'.format(accuracy_model))

Accuracy of 48.00% when predicting all positives.
Model accuracy of 54.00%


In [17]:
%%time
# Repeat for pattern analyzer
# Analyze sentiments in parallel
def process_sentiment(tweet):
    return TextBlob(tweet).sentiment.polarity

n_cores = mp.cpu_count()
sentiments = Parallel(n_jobs=n_cores, verbose=2)(delayed(process_sentiment)(tweet) for tweet in sample['text'])

[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.


CPU times: user 243 ms, sys: 85.8 ms, total: 329 ms
Wall time: 1.88 s


[Parallel(n_jobs=8)]: Done  25 tasks      | elapsed:    1.4s
[Parallel(n_jobs=8)]: Done 442 tasks      | elapsed:    1.5s
[Parallel(n_jobs=8)]: Done 500 out of 500 | elapsed:    1.5s finished


In [14]:
sample['pred']=[0 if sent<0.5 else 2 if sent>=0.5 else 1 for sent in sentiments]

In [15]:
accuracy_baseline = 1.0 * sum(sample['sentiment']==2) / len(sample)
accuracy_model = 1.0 * sum(sample['sentiment']==sample['pred']) / len(sample)

In [16]:
print('Accuracy of {:.2%} when predicting all positives.'.format(accuracy_baseline))
print('Model accuracy of {:.2%}'.format(accuracy_model))

Accuracy of 48.00% when predicting all positives.
Model accuracy of 56.40%
