In [None]:
import io, os, sys, types
import nbformat
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell

from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [None]:
def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path
    
    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path

In [None]:
class NotebookLoader(object):
    """Module Loader for IPython Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path
    
    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)
        
        print ("importing notebook from %s" % path)
                                       
        # load the notebook object
        nb = nbformat.read(path, as_version=4)
        
        
        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod
        
        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__
        
        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod

In [None]:
class NotebookFinder(object):
    """Module finder that locates IPython Notebooks"""
    def __init__(self):
        self.loaders = {}
    
    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return
        
        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)
        
        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]

In [None]:
sys.meta_path.append(NotebookFinder())

In [None]:
from rating_prediction_lda import totalTopics,all_text_train, all_text_test,topic_dist_train_all_stars,topic_dist_test_all_stars
from rating_prediction_tfidf import tfidfvectorizer

In [None]:
def getSentiment(s):
    if s < 3.5:
        return 0
    else:
        return 1

In [None]:
topic_dist_train_all_stars['Sentiment'] = topic_dist_train_all_stars['Star'].map(getSentiment)
topic_dist_test_all_stars['Sentiment'] = topic_dist_test_all_stars['Star'].map(getSentiment)

In [None]:
sentimentTextTrain = tfidfvectorizer.fit_transform(all_text_train)
sentimentTextTest = tfidfvectorizer.transform(all_text_test)

sentimentLabelTrain = topic_dist_train_all_stars['Sentiment']
sentimentLabelTest = topic_dist_test_all_stars['Sentiment']

In [None]:
classifier = LogisticRegression().fit(sentimentTextTrain, sentimentLabelTrain)

ySentimentTrain = classifier.predict(sentimentTextTrain)
ySentimentTest = classifier.predict(sentimentTextTest)

topic_dist_train_all_stars['Sentiment_Predicted'] = ySentimentTrain
topic_dist_test_all_stars['Sentiment_Predicted'] = ySentimentTest

In [None]:
features = list(topic_dist_train_all_stars.columns[:totalTopics])
features.append(topic_dist_train_all_stars.columns[totalTopics+2])

In [None]:
x_train = topic_dist_train_all_stars[features]
y_train = topic_dist_train_all_stars['Star']

x_test = topic_dist_test_all_stars[features]
y_test = topic_dist_test_all_stars['Star'] 

In [None]:
classifiers = [MultinomialNB(), LogisticRegression(), RandomForestClassifier(n_estimators=100, n_jobs=2), AdaBoostClassifier(n_estimators=100)]
classifiers_names = ['Multinomial Naive Bayes', 'Logistic Regression', 'Random Forest', 'AdaBoost']

LdaSentimentResults = {}
for (i, clf_) in enumerate(classifiers):
    clf = clf_.fit(x_train, y_train)
    preds = clf.predict(x_test)
    
    precision = metrics.precision_score(y_test, preds)
    recall = metrics.recall_score(y_test, preds)
    f1 = metrics.f1_score(y_test, preds)
    accuracy = accuracy_score(y_test, preds)
    report = classification_report(y_test, preds)
    matrix = metrics.confusion_matrix(y_test, preds, labels=starsGroup.groups.keys())
    
    data = {'precision':precision,
            'recall':recall,
            'f1_score':f1,
            'accuracy':accuracy,
            'clf_report':report,
            'clf_matrix':matrix,
            'y_predicted':preds}
    
    LdaSentimentResults[classifiers_names[i]] = data

In [None]:
cols = ['precision', 'recall', 'f1_score', 'accuracy']
pd.DataFrame(LdaSentimentResults).T[cols].T

for model, val in LdaSentimentResults.iteritems():
    print '-------'+'-'*len(model)
    print 'MODEL:', model
    print '-------'+'-'*len(model)
    print 'The precision for this classifier is ' + str(val['precision'])
    print 'The recall for this classifier is    ' + str(val['recall'])
    print 'The f1 for this classifier is        ' + str(val['f1_score'])
    print 'The accuracy for this classifier is  ' + str(val['accuracy'])
    print 'The confusion matrix for this classifier is  \n' + str(val['clf_matrix'])
    print '\nHere is the classification report:'
    print val['clf_report']