In [426]:
from newsgac import database
from newsgac.ace.models import ACE
from newsgac.pipelines.models import Pipeline
from newsgac.genres import genre_labels
from scipy.sparse import csr_matrix
import ipywidgets as widgets
import numpy as np
import re
import pprint

def get_coef_map(num_classes):
    coef_map = {}
    index=0
    i=0
    for i in range(0, num_classes - 1):
        for j in range(i+1, num_classes):
            coef_map[index] = (i,j)
            index += 1

    return coef_map

svc_pipelines = [p for p in Pipeline.objects.all() if re.search(r'svc|SVC|svm|SVM', p.display_title)]
pipeline_id = widgets.Dropdown(
    options=[(p.display_title, p._id) for p in svc_pipelines],
    description='pipeline:',
    disabled=False
)
genre_options = tuple((v,k) for k,v in enumerate(genre_labels))
class_1 = widgets.RadioButtons(
    options=genre_options,
    description='class 1:',
    disabled=False
)
class_2 = widgets.RadioButtons(
    options=genre_options,
    description='class 2:',
    disabled=False
)

def out(pipeline_id, class_1, class_2):
    if class_1 == class_2:
        print('Pick two different classes')
        return
    if class_2 < class_1:
        class_1, class_2 = class_2, class_1
    p = Pipeline.objects.get({'_id': pipeline_id})
    print("Showing %s vs %s for %s " % (genre_labels[class_1], genre_labels[class_2], p.display_title))
    skp = p.sk_pipeline.get()
    classifier = skp.named_steps['Classifier']
    num_classes = len(classifier.classes_)
    feature_names = np.array(skp.get_feature_names())
    if num_classes != 16:
        print("WARNING: not 16 classes, names are probably mixed up")
    coef_map = get_coef_map(num_classes)
    coef_map_inverse = {v: k for k, v in coef_map.iteritems()}
    
    weights = classifier.coef_[coef_map_inverse[(class_1, class_2)]]
    
    if isinstance(weights, csr_matrix):
        weights = np.asarray(weights.todense())[0]
    top_10_pos_weight_indices = np.argsort(weights)[-10:][::-1]
    top_10_neg_weight_indices = np.argsort(weights)[:10]
    weights_pos = weights[top_10_pos_weight_indices]
    weights_neg = weights[top_10_neg_weight_indices]
    print("Positive:")
    pprint.pprint(zip(
        ['%.2f' % w for w in weights_pos],
        feature_names[top_10_pos_weight_indices]
    ))
    print("Negative:")
    pprint.pprint(zip(
        ['%.2f' % w for w in weights_neg],
        feature_names[top_10_neg_weight_indices]
    ))
    
widgets.VBox([
    widgets.HBox([pipeline_id, class_1, class_2]),
    widgets.interactive_output(out, dict(pipeline_id=pipeline_id, class_1=class_1, class_2=class_2))
])

VkJveChjaGlsZHJlbj0oSEJveChjaGlsZHJlbj0oRHJvcGRvd24oZGVzY3JpcHRpb249dSdwaXBlbGluZTonLCBvcHRpb25zPSgodSdTVkMgRlJPRycsIE9iamVjdElkKCc1YmY0ODQxOGVkYjHigKY=
