In [None]:
import pandas as pd
import eli5

from sklearn.externals import joblib

### Load Training Data (in case needed) and model

In [None]:
training_data = pd.read_csv('../data/lobsters_numeric.csv')
X = training_data.drop(['score'], axis=1)
y = training_data.score
X.head()

In [None]:
X = X.set_index('Unnamed: 0')

In [None]:
lr_model = joblib.load(
    '../data/lobsters_lr_score_model_overfit.pkl')

### Exploring weights

In [None]:
lr_explanation = eli5.explain_weights(lr_model)

In [None]:
lr_explanation

In [None]:
eli5.explain_weights_df?

In [None]:
lr_explanation = eli5.explain_weights_df(lr_model)

In [None]:
lr_explanation.head()

### Adding human-readable feature names

In [None]:
sorted(lr_explanation.feature.values)[-20:]

In [None]:
X.shape

In [None]:
def get_feature_name_from_eli5_name(eli5_name, 
                                    feature_names=X.columns):
    if eli5_name.startswith('x'):
        number = int(eli5_name.lstrip('x'))
        return feature_names[number]
    return 'not in features'

In [None]:
lr_explanation['feature_name'] = lr_explanation.feature.map(
    get_feature_name_from_eli5_name)

In [None]:
lr_explanation.head()

### Looking at an explanation

In [None]:
eli5.explain_prediction(lr_model,
    X.iloc[0])

In [None]:
y[0]

In [None]:
eli5.explain_prediction(lr_model,
    X.iloc[382])

In [None]:
y[382]

### Your Turn: 

- Investigate a few more predictions
- Investigate at least one story tagged with interview
- Report your findings to our Slack Chat! :)

### Investigating our Tag Classifier

In [None]:
tag_data = pd.read_csv(
    '../data/lobsters_tag_training_data.csv')
svm_model = joblib.load(
    '../data/lobsters_tag_classification_svm.pkl')

In [None]:
import json
import re


from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()
all_tags = json.load(open('../data/lobsters_tag_list.json'))

def clean_text(sentence):
    words = re.findall("\w+", sentence.lower())
    return [word for word in words if
            word not in ENGLISH_STOP_WORDS
            and (len(word) > 1 or word in all_tags)]


def remove_urls(text):
    return re.sub('http\S+', '', text)


def tokenize(text):
    return [stemmer.stem(w) for w in 
            clean_text(remove_urls(text.lower()))]


In [None]:
explanation_df = eli5.explain_weights_df(svm_model)

In [None]:
explanation_df.head()

In [None]:
explanation_df.tail()

In [None]:
explanation_df[explanation_df['target'] == 0]

In [None]:
class_dict = json.load(
    open('../data/lobsters_tag_classes.json'))

In [None]:
class_dict

In [None]:
explanation_df[explanation_df['target'] == 3]

### Explaining a text prediction

In [None]:
tag_data.iloc[0]

In [None]:
eli5.explain_prediction(svm_model, 
                        tag_data['full_text'].iloc[0])

In [None]:
text = tag_data['full_text'].iloc[0]
vectorizer = svm_model.steps[0][1]
vectorized_text = vectorizer.transform([text])

In [None]:
text

In [None]:
vectorized_text

In [None]:
eli5.explain_prediction(svm_model.steps[1][1], 
                        vectorized_text)

In [None]:
vectorizer.get_feature_names()[77]

In [None]:
from eli5.lime import TextExplainer
target_names = [class_dict[str(k)] for k in range(4)]

te = TextExplainer(random_state=42)
te.fit(text, svm_model.predict_proba)
te.show_prediction(target_names=target_names)

In [None]:
svm_model_proba = joblib.load(
    '../data/lobsters_tag_classification_svm_proba.pkl')

In [None]:
te.fit(text, svm_model_proba.predict_proba)
te.show_prediction(target_names=target_names)

### Your Turn

- Investigate another text example from the dataset
- Investigate some text which you write
- Try and write text which is mainly about culture. What was the result?