# Assess Model Accuracy for Logistic Regression, Decision Tree, and Random Forest

- Add some discussion of what the metrics are 
- Define Jaccard Similarity, Hamming Loss
- Interpret Report for each label
- Consider AdaBoost, GradientBoost, CART

In [74]:
import numpy as np
import pandas as pd
import pickle
import os

from sklearn.metrics import f1_score
from sklearn.metrics import hamming_loss 
from sklearn.metrics import make_scorer
from sklearn.metrics import jaccard_similarity_score as jaccard_score
from sklearn.metrics import classification_report

In [75]:
os.chdir('/Users/AlexandraDing/Documents/cs109b-best-group')

# WD Where the model results are kept
data_wd = '/Users/AlexandraDing/Documents/cs109b-best-group/Model_Results/'

In [76]:
### Load Dataset
# X: Unprocessed features
# X_std: standardized by Preprocessor
# y: MultiLabel Binarized targets
[X_data, X_data_std, y_data] = pickle.load(open('continuous_features_targets.p', 'rb'))

print 'X_data shape:', X_data.shape
print 'X_data_std shape:', X_data_std.shape
print 'y shape', y_data.shape

# Unpickle Model Results
[LogReg_cv, y_pred_LogReg] = pickle.load(open(data_wd+'LogReg_grid_results.p', 'rb'))
print y_pred_LogReg.shape

[DT_grid_cv, y_pred_Decision_Tree] = pickle.load(open(data_wd + 'DecisionTree_grid_results.p', 'rb'))
print y_pred_Decision_Tree.shape

[rf_grid_cv, y_pred_RF] = pickle.load(open(data_wd + 'RandomForest_tune_maxdepth_hamming_grid_results.p', 'rb'))
print y_pred_RF.shape

# Create list with all model prediction
prediction_list = [y_pred_LogReg, y_pred_Decision_Tree, y_pred_RF]

X_data shape: (5996, 124)
X_data_std shape: (5996, 124)
y shape (5996, 20)
(5996, 20)
(5996, 20)
(5996, 20)


In [89]:
# Read column names to get genre labels for tables below
movie_data = pd.read_csv("add_imdb_utf8_fixruntime_cleaned.csv")
# print movie_data.head(n=3)
genre_numbers = movie_data.columns[14:33]
genre_dict = pickle.load(open('/Users/AlexandraDing/Documents/cs109b-best-group/Milestone1/genre_dict_by_id.p', 'rb'))
genre_labels = [genre_dict[int(genre_numbers[i])] for i in range(len(genre_numbers))]
print pd.Series(data= np.sum(movie_data[genre_numbers], axis = 0).values, index=genre_labels)
genre_labels.insert(17, "Foreign")
print genre_labels


                        poster_path                                title  \
0  /5E99Ucwf5JJ9iDzARNk0mizQ8bN.jpg  WWE: The True Story of WrestleMania   
1  /mQqPsqE1ZVfc2rgQ1grnPb1hI1E.jpg              The Music Never Stopped   
2  /78DFS4QcjCBi89NtiEqAROpSkIz.jpg        Roman Polanski: A Film Memoir   

  release_date                                           overview  popularity  \
0    3/13/2011  It is the most anticipated yearly event in Spo...    1.132434   
1    3/18/2011  Henry struggles to bond with his estranged son...    1.414362   
2    9/27/2011  An interview with film director Roman Polanski...    1.034842   

                        original_title                     backdrop_path  \
0  WWE: The True Story of WrestleMania  /yGUzOJqrHbiMl3KMxb7maD8Eat0.jpg   
1              The Music Never Stopped  /6fMQCfmsqdhhlcNx6FdleX6zTXG.jpg   
2        Roman Polanski: A Film Memoir  /y0wy89A4tl9Ldf7pGMcWkdZ9cMo.jpg   

   vote_count  video  adult      ...       10749 10751  10752 107

In [79]:
### SUMMARIZE MODEL ACCURACY: 
    # for MULTILABEL DATA, calculates baseline accuracy, hamming loss, f1 score, jaccard similarity, classification report
    # INPUTS:
        # y_prediction: predicted y
        # y_data : ground truth y
    # OUTPUTS:
        # prints accuracy metrics
        # Return 0

def summarize_model_accuracy (y_prediction, y_data, names):
    # Get basic accuracy: what proportion of labels are correct
    print 'Accuracy:', np.mean(y_prediction == y_data)
    
    # Get Hamming Loss
    print 'Hamming Loss:', hamming_loss(y_data, y_prediction)
    
    # Get f1
    print 'F1 Score:', f1_score(y_data, y_prediction, average = 'weighted')
    
    # get Jaccard Similarity
    print 'Jaccard Similarity:', jaccard_score(y_data, y_prediction)
    
    # Classification report:report recall, precision, f1 ON EACH CLASS (can be used for multilabel case)
    print classification_report(y_data, y_prediction, target_names = names)

## Logistic Regression Model Performance

In [80]:
# Summarize LogReg Performance
LogRegSummary = summarize_model_accuracy(y_pred_LogReg, y_data, genre_labels)
# print classification_report(y_pred_LogReg, y_data, target_names = genre_labels)

Accuracy: 0.595488659106
Hamming Loss: 0.404511340894
F1 Score: 0.363745775457
Jaccard Similarity: 0.151680275982
                 precision    recall  f1-score   support

      Adventure       0.10      0.61      0.18       367
        Fantasy       0.07      0.66      0.13       268
      Animation       0.10      0.80      0.18       339
          Drama       0.52      0.67      0.58      2179
         Horror       0.22      0.74      0.35       856
         Action       0.20      0.74      0.32       774
         Comedy       0.36      0.68      0.47      1496
        History       0.04      0.80      0.07       125
        Western       0.02      0.73      0.03        55
       Thriller       0.29      0.76      0.42      1157
          Crime       0.09      0.71      0.17       396
    Documentary       0.30      0.91      0.45       909
Science Fiction       0.11      0.77      0.20       422
        Mystery       0.06      0.67      0.11       269
          Music       0.08    

## Decision Tree Model Performance

In [81]:
Decision_Tree = summarize_model_accuracy(y_pred_Decision_Tree, y_data, genre_labels)

Accuracy: 0.908522348232
Hamming Loss: 0.0914776517678
F1 Score: 0.168331087028
Jaccard Similarity: 0.13245377871
                 precision    recall  f1-score   support

      Adventure       0.59      0.04      0.08       367
        Fantasy       0.14      0.01      0.01       268
      Animation       0.45      0.05      0.09       339
          Drama       0.56      0.27      0.37      2179
         Horror       0.00      0.00      0.00       856
         Action       0.49      0.03      0.06       774
         Comedy       0.86      0.12      0.22      1496
        History       0.08      0.01      0.01       125
        Western       0.00      0.00      0.00        55
       Thriller       0.25      0.00      0.00      1157
          Crime       0.00      0.00      0.00       396
    Documentary       0.82      0.30      0.44       909
Science Fiction       0.30      0.03      0.06       422
        Mystery       0.00      0.00      0.00       269
          Music       0.65    

## Random Forest Model Performance

In [82]:
RF_Model_Summary = summarize_model_accuracy(y_pred_RF, y_data, genre_labels)

Accuracy: 0.909372915277
Hamming Loss: 0.0906270847231
F1 Score: 0.215170777762
Jaccard Similarity: 0.169916055148
                 precision    recall  f1-score   support

      Adventure       0.52      0.06      0.11       367
        Fantasy       0.27      0.01      0.02       268
      Animation       0.52      0.04      0.07       339
          Drama       0.60      0.34      0.43      2179
         Horror       0.62      0.09      0.16       856
         Action       0.44      0.05      0.09       774
         Comedy       0.64      0.17      0.27      1496
        History       0.00      0.00      0.00       125
        Western       0.00      0.00      0.00        55
       Thriller       0.40      0.10      0.16      1157
          Crime       0.14      0.00      0.00       396
    Documentary       0.76      0.35      0.48       909
Science Fiction       0.47      0.02      0.04       422
        Mystery       0.00      0.00      0.00       269
          Music       0.63   