In [None]:
import requests
import numpy as np
import csv 
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
import get_sentiment

In [None]:
AZURE_SUBSCRIPTION_KEY = ""
AZURE_ENDPOINT = ""

In [None]:
def run_azure(input_array):
        headers = { 'Ocp-Apim-Subscription-Key' : AZURE_SUBSCRIPTION_KEY }
        sentiment_api_url =  AZURE_ENDPOINT + "sentiment"
        docs = []
        assert(len(input_array.flatten()) < 1000)
        for i, input_text in enumerate(input_array.flatten()):
            docs.append({'id' : str(i+1), 'language' : 'en', 'text' : input_text})
        documents = { 'documents' : docs }
        response = requests.post(sentiment_api_url, headers=headers, json=documents)
        sentiments = response.json()
        print(sentiments)
        scores = [x['score'] for x in sentiments['documents']]
        scores = np.array(scores)
        return scores.reshape(input_array.shape)

In [None]:
inputs = ["this is a text", "this is another text"]
input_array = np.array(inputs)
result = run(input_array)
print(result)

In [None]:
def write_results(tweet_list, label_list, api_func, save_file): 
    with open(save_file, 'w') as f: 
        csv_writer = csv.writer(f)
        csv_writer.writerow(["text", "label", str(api_func)])
        sentiment = api_func(np.array(tweet_list))
        for i, twt in enumerate(tweet_list): 
            csv_writer.writerow([twt, label_list[i], sentiment[i]])

In [None]:
tweets = [] 
labels = [] 

with open('datasets/twitter_sentiment100.csv', 'r') as f: 
    csv_reader = csv.reader(f)
    row = next(csv_reader)
    for row in csv_reader: 
        tweets.append(row[0])
        labels.append(row[1])

write_results(tweets, labels, run_azure, 'results/twitter_sentiment100_results.csv')

In [None]:
df = pd.read_csv('results/sst_sentiment100_results.csv')
print(df.mean()) 
print(df.sem())

In [None]:
label = df['label'].tolist() 
pred = df['g_sentiment'].tolist()

In [None]:
label = np.asarray(label)
new_pred = [] 
for p in pred: 
    if float(p) > 0: 
        new_pred.append(1)
    else: 
        new_pred.append(0)
pred = np.asarray(new_pred)
pred = np.around(pred)
acc = np.sum(pred==label)
print("Accuracy", acc/len(label))

In [None]:
#NB for Amazon Reviews


In [None]:
texts = df['text'].tolist() 
labels = df['label'].tolist()

In [None]:
texts[:10]

In [None]:
vectorizer = CountVectorizer(stop_words='english', max_features=10000)
train_features = vectorizer.fit_transform(texts[1000:9000])
print(train_features.shape)
nb_model = MultinomialNB()
nb_model.fit(train_features, labels[1000:9000])

In [None]:
vocab = vectorizer.vocabulary_

In [None]:
test_df = pd.read_csv('datasets/amazon_sentiment100.csv')
test_texts = test_df['text'].tolist() 
test_labels = test_df['label'].tolist() 
print(len(test_labels))

In [None]:
vectorizer = CountVectorizer(stop_words='english', vocabulary=vocab)
test_features = vectorizer.fit_transform(test_texts)
print(test_features.shape)
predictions = nb_model.predict(test_features)
prob = nb_model.predict_proba(test_features)
predictions = nb_model.predict(test_features)

In [None]:
print(test_labels)
class_prob = [p[1] for p in prob]
nb_model.score(test_features, test_labels)

In [None]:
print(np.mean(class_prob))
print(np.sqrt(np.var(class_prob)/len(class_prob)))

In [None]:
save_file = 'results/sst_sentiment100_custom_results.csv'
with open(save_file, 'w') as f: 
    csv_writer = csv.writer(f)
    csv_writer.writerow(["text", "label", "NB_prob"])
    for i, twt in enumerate(test_texts): 
            csv_writer.writerow([twt, test_labels[i], class_prob[i]])