# IMPORT

In [None]:
import tempfile
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.pipeline import Pipeline

# LOAD DATA, TRAIN MODEL

In [None]:
%%bash

if [ ! -d ./data ]; then
    mkdir ./data
fi

if [ ! -f ./data/trainingandtestdata.zip ]; then
    wget -q -O ./data/trainingandtestdata.zip http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip
fi

unzip -n ./data/trainingandtestdata.zip -d ./data

In [None]:
columns = ['polarity', 'tweetid', 'date', 'query_name', 'user', 'text']
df_train_file_path = './data/training.1600000.processed.noemoticon.csv'
df_train_name = 'training.1600000.processed.noemoticon'
df_train = pd.read_csv(df_train_file_path,
                      header=None,
                      encoding='ISO-8859-1')

df_test_file_path = './data/testdata.manual.2009.06.14.csv'
df_test_name = 'testdata.manual.2009.06.14'
df_test = pd.read_csv(df_test_file_path,
                     header=None,
                     encoding='ISO-8859-1')
df_train.columns = columns
df_test.columns = columns

In [None]:
sentiment_lr = Pipeline([
                         ('count_vect', CountVectorizer(min_df=100,
                                                        ngram_range=(1,2),
                                                        stop_words='english')), 
                         ('lr', LogisticRegression())])
sentiment_lr.fit(df_train.text, df_train.polarity)

In [None]:
x_test, y_test = df_test.text[df_test.polarity != 2], df_test.polarity[df_test.polarity != 2]
print(classification_report(y_test, sentiment_lr.predict(x_test)))

In [None]:
sentiment_lr.predict([x_test[0]])

In [None]:
sentiment_lr.predict(['good', 'bad'])

# UNBOX

In [1]:
import unboxapi
client = unboxapi.UnboxClient(email='me@vikasnair.com', password='00000000')

## Create function

In [None]:
# class_dict = { 4: 'positive', 2: 'neutral', 0: 'negative'}
class_names = ['negative', 'positive']
dataset_labels = [0, 4]
def predict_function(model, text_list):
    return model.predict_proba(text_list), class_names, dataset_labels

In [None]:
texts = ['some new text, sweet noodles', 'happy time', 'sad day']

predict_function(sentiment_lr, texts)

# Package (function, model) & Upload to Firebase

In [None]:
print('Uploading model...')
client.add_model(function=predict_function, 
                 model=sentiment_lr, 
                 name='03.11.2021.sentiment_analyzer',
                 description='this is my sklearn sentiment model')
print('Complete.')

In [None]:
print('\nUploading dataset (from file)...')
with tempfile.TemporaryDirectory() as tmpdir:
    df_train.to_csv(f'{tmpdir}/dataset.csv')
    response_i = client.add_dataset(file_path=f'{tmpdir}/dataset.csv', 
                                    name=df_train_name, 
                                    description='this is my sentiment train dataset',
                                    label_column_name='polarity',
                                    text_column_name='text')
print(f'Complete. Response: {response_i}')

In [None]:
print('\nUploading dataset (from data frame)...')
response_j = client.add_dataframe(df=df_test, 
                                  name=df_test_name, 
                                  description='this is my sentiment test dataset',
                                  label_column_name='polarity',
                                  text_column_name='text')
print(f'Complete. Response: {response_j}')

In [10]:
# Test error analysis / dataset-model association
result = client._test_associate(model_id='6fd90796-82db-11eb-9127-1e008a21a079', 
                                dataset_id='89b0098a-82db-11eb-9127-1e008a21a079',
                                user_id='ytGD2XvoGPSaippqWhAmi5V8mHT2')
print(result.content)

b'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n<title>500 Internal Server Error</title>\n<h1>Internal Server Error</h1>\n<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>\n'
