General steps:

- Split into train and test set (random and stratified, using sklearn train_test_split)
- Learn transformations and word -> token mapping on train set 
- Train model on train set (potentially train severals model on different subsets and choose the best one / ensemble their predictions, in the latter case calculate out of bag score as well)
- Calculate performance on train set (custom performance metric - choose threshold to get precision to >95% and evaluate recall based on that)
- Apply transformations on test set, make predictions and evaluate performance

### Imports

In [None]:
# debugging
from IPython.core.debugger import set_trace

# file system navigation
from pathlib import Path

# data transformation
import pandas as pd
import numpy as np
from numpy import ndarray

# plotting
import matplotlib.pyplot as plt
%matplotlib inline

# ml algorithms and evaluation metrics
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn import metrics
from sklearn.model_selection import cross_val_score, cross_validate, GridSearchCV, RandomizedSearchCV
from sklearn import model_selection
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.stats.distributions import uniform, randint

# sklearn specific stuff
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

# nlp
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import spacy
from spacy.pipeline import TextCategorizer
from spacy.util import minibatch, compounding
from spacy.util import decaying

# misc
import random
import copy
from collections import namedtuple

### Custom functions and classes

In [None]:
class CustomEvaluator():
    """
    """
    
    def __init__(self, target_precision:float=0.95, pos_label:int=1):
        self.target_precision = target_precision
        self.pos_label = pos_label
    
    def print_npos(self, y_true:ndarray):
        if self.pos_label not in np.unique(y_true):
            warnings.warn(f"Label '{self.pos_label}' is not in test set.")
        else:
            npos = np.sum(y_true == self.pos_label)
            print(f"N POSITIVE CLASS: {npos} ({npos / len(y_true)*100:.2f}%)")
        
    def score(self, y_true:ndarray, probas_pred:ndarray, display_results:bool=True):
        #self.print_npos(y_true) # Log number of positive examples in the current test set
        prs, rcs, ths = metrics.precision_recall_curve(y_true, probas_pred, pos_label=self.pos_label)
    
        if display_results:
            auc = metrics.roc_auc_score(y_true, probas_pred)
            print(f"AUC SCORE: {auc:.2f}")
            results = pd.DataFrame({"precision": prs[:-1], "recall": rcs[:-1], "threshold": ths})\
                        .sort_values(by=["precision", "recall"], ascending=[False, False])
            if np.max(results.precision) > self.target_precision:
                print(results[results.precision >= self.target_precision])
            else:
                print(results.head(3))

    def score_extensive(self, y_true:ndarray, probas_pred:ndarray, display_results:bool=False):
        self.print_npos(y_true) # Log number of positive examples in the current test set
        prs, rcs, ths = metrics.precision_recall_curve(y_true, probas_pred, pos_label=self.pos_label)
        
        tmp_min_th = np.min(np.append(ths, 1.)[np.where(prs >= self.target_precision)])
        min_th = tmp_min_th if tmp_min_th < 1. else np.min(ths)
        res_pr = np.max(prs[np.where(ths == min_th)])
        res_rc = np.max(rcs[np.where(ths == min_th)])
        auc = metrics.roc_auc_score(y_true, probas_pred)
        
        self.print_results(min_th, res_pr, res_rc, auc) # Log resulting threshold, precision, recall and auc

        if display_results:
            display(pd.DataFrame({"Precision": prs[:-1], "Recall": rcs[:-1], "Threshold": ths})\
                    .sort_values(by="Precision", ascending=False))
            
                
    def print_results(self, min_th, res_pr, res_rc, auc):
        print(f"MINIMAL THRESHOLD: {min_th:.2f}")
        print(f"RESULTING PRECISION: {res_pr:.2f} (Target: {self.target_precision})")
        print(f"RESULTING RECALL: {res_rc:.2f}")
        print(f"RESULTING AUC: {auc:.2f}")

In [None]:
ScoredClf = namedtuple("ScoredClf", [
    "clf",
    "train_auc",
    "oob_auc"
])

def fit_ensemble(m, s:StratifiedShuffleSplit, X:ndarray, y:ndarray, **kwargs):
    fitted_clfs = []

    for i, split in enumerate(s.split(X, y)):
        i_train = split[0]
        i_test = split[1]
        
        print("#######################################")
        print("Training model number  ", i+1)
        print("#######################################")
        print("")

        m.fit(X[i_train], y[i_train], **kwargs)
        fitted_clf = copy.deepcopy(m)
        
        p1_train = fitted_clf.predict_proba(X[i_train])[:, 1]
        p1_oob = fitted_clf.predict_proba(X[i_test])[:, 1]
        
        train_auc = metrics.roc_auc_score(y[i_train], p1_train)
        oob_auc = metrics.roc_auc_score(y[i_test], p1_oob)
        fitted_clfs.append(ScoredClf(fitted_clf, train_auc, oob_auc))
        
        #print("#####################")
        print("PERFORMANCE ON TRAIN")
        #print("#####################")
        print("")
        evaluator.score(y[i_train], p1_train)
        
        print("")
        #print("#####################")
        print("OOB PERFORMANCE")
        #print("#####################")
        print("")
        evaluator.score(y[i_test], p1_oob)
        
        print("")
    
    return fitted_clfs

In [None]:
def evaluate_ensemble(fitted:list, eval:CustomEvaluator, X:ndarray):
    train_scores = [m.train_auc for m in fitted]
    oob_scores = [m.oob_auc for m in fitted]
    preds_test = np.array([m.clf.predict_proba(X)[:, 1] for m in fitted])

    print(f"Mean Train AUC: {np.mean(train_scores):.2f} (+/- {np.std(train_scores):.2f})")
    print(f"Mean OOB AUC: {np.mean(oob_scores):.2f} (+/- {np.std(oob_scores):.2f})")
    print("")
    print("Performance on hold out set:")
    eval.score(y_test, preds_test.mean(axis=0))

#### Archive

In [None]:
def get_cv_scores(model, X, y, n_cv=10):
    precision_scores = cross_val_score(model, X, y, cv=n_cv, scoring="precision")
    recall_scores = cross_val_score(model, X, y, cv=n_cv, scoring="recall")
    print(f"Average precision score for {n_cv} splits: {precision_scores.mean():.2f} (+/- {precision_scores.std():.2f})")
    print(f"Average recall score for {n_cv} splits: {recall_scores.mean():.2f} (+/- {recall_scores.std() * 2:.2f})")

In [None]:
def get_cv_score_auc(model, X, y, n_cv=10):
    auc_scores = cross_val_score(model, X, y, cv=n_cv, scoring="roc_auc")
    print(f"Average auc score for {n_cv} splits: {auc_scores.mean():.2f} (+/- {auc_scores.std():.2f})")

In [None]:
def get_cv_auc(model, X, y, n_cv=10):
    auc_cv = cross_validate(model, X, y,
                                scoring="roc_auc",
                                cv=n_cv,
                                n_jobs=-1,
                                return_train_score=False,
                                return_estimator=True)
    auc_scores = auc_cv["test_score"]
    mean_auc = auc_scores.mean()
    representative_estimator = auc_cv["estimator"][np.argmin([np.abs(score - mean_auc) for score in auc_scores])]
    print(f"Average auc score for {n_cv} splits: {mean_auc:.2f} (+/- {auc_scores.std():.2f})")
    
    return representative_estimator

In [None]:
def plot_precision_recall(model, X, y):
    precision, recall, _ = metrics.precision_recall_curve(y, model.predict_proba(X)[:, 1])

    step_kwargs = {"step": "post"}
    plt.step(recall, precision, color='b', alpha=0.2,
             where="post")
    plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title("Precision-Recall curve")
    plt.show()

### Load preprocessed data and make split

In [None]:
data = pd.read_parquet(Path.cwd() / "data" / "processed" / "train_data.parquet")

In [None]:
data.shape

In [None]:
X = data[["claps", "reading_time", "text"]]
y = np.array(data["interesting"])

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3, random_state=42,
                                                    stratify=y)

In [None]:
evaluator = CustomEvaluator(target_precision=0.8)
sss = model_selection.StratifiedShuffleSplit(n_splits=6, test_size=0.2, random_state=42)

### Baseline model

In [None]:
data_base = data[["claps", "reading_time", "interesting"]]

#### Visualize data distribution for numerical features

In [None]:
x_index = 0
y_index = 1
target_names = ["not interesting", "interesting"]

colors = ["red", "green"]

for label, color in zip(range(len(data_base["interesting"])), colors):
    plt.scatter(np.array(data_base[data_base["interesting"]==label].iloc[:, x_index]), 
                np.array(data_base[data_base["interesting"]==label].iloc[:, y_index]),
                label=target_names[label],
                c=color)

plt.xlabel(data_base.columns[x_index])
plt.ylabel(data_base.columns[y_index])
plt.legend(loc="upper left")
plt.show()

#### Save figure

In [None]:
plt.savefig(os.path.join(wd, "output", "base_classifier.png"))

#### Baseline classficiation model using author, claps and reading time

In [None]:
num_cols = ["claps", "reading_time"]
X_train_num, X_test_num = np.array(X_train[num_cols]), np.array(X_test[num_cols])

In [None]:
rf = RandomForestClassifier(min_samples_leaf=3)

In [None]:
fitted = fit_ensemble(rf, sss, X_train_num, y_train)

In [None]:
evaluate_ensemble(fitted, evaluator, X_test_num)

#### Archive

In [None]:
rf_model = get_cv_auc(RandomForestClassifier(), X_num, y, n_cv=20)

In [None]:
s = model_selection.StratifiedShuffleSplit(n_splits=4, test_size=0.3, random_state=42)

In [None]:
for i, split in enumerate(s.split(X_num, y_num)):
    i_train = split[0]
    i_test = split[1]
    print("Training model number  ", i)
    rf_model.fit(X_num.iloc[i_train, :], y_num[i_train])
    print("AUC on the test set:")
    print(metrics.roc_auc_score(y_num[i_test], rf_model.predict_proba(X_num.iloc[i_test, :])[:, 1]))

### Text based models

In [None]:
text_col = "text"
X_train_text, X_test_text = np.array(X_train[text_col]), np.array(X_test[text_col])

#### Archive

In [None]:
X = data["text"]
y = data["interesting"]

Create true hold out set to simulate future articles coming in

In [None]:
X_70 = X_text[0:70]
y_70 = y[0:70]

X_100 = X_text[70:]
y_100 = y[70:]

In [None]:
X_70 = X_70.reset_index().drop("index", axis=1)["text"]
X_100 = X_100.reset_index().drop("index", axis=1)["text"]

In [None]:
y_70 = y_70.reset_index().drop("index", axis=1)["interesting"]
y_100 = y_100.reset_index().drop("index", axis=1)["interesting"]

In [None]:
X_text_train, X_text_test, y_train, y_test = train_test_split(X_text,
                                                              y,
                                                              test_size=0.3,
                                                              random_state=42,
                                                              stratify=y)

#### CountVectorizer + random forest

##### Default values

In [None]:
count_vectorizer = CountVectorizer()

In [None]:
pipe_countvec = make_pipeline(count_vectorizer, RandomForestClassifier())

In [None]:
fitted_countvec = fit_ensemble(pipe_countvec, sss, X_train_text, y_train)

In [None]:
evaluate_ensemble(fitted_countvec, evaluator, X_test_text)

###### Archive

In [None]:
estimator = get_cv_auc(pipe, X_text_train, y_train, n_cv=20)

In [None]:
plot_precision_recall(estimator, X_text_test, y_test)

##### Some optimization, i.e. preprocessing and feature selection

In [None]:
pipe = Pipeline([
    ("vec", CountVectorizer()),
    ("rf", RandomForestClassifier())
    ])
params = {"vec__stop_words": ["english", None],
          "vec__ngram_range": [(1, 1), (1, 2), (1, 3)], 
          "vec__max_df": uniform(loc=0.8, scale=0.2),
          "vec__min_df": uniform(loc=0.0, scale=0.2),
          "vec__max_features": randint(low=1000, high=9000)}

grid = RandomizedSearchCV(pipe,
                          params,
                          n_iter=8,
                          scoring="roc_auc",
                          n_jobs=-1,
                          cv=10,
                          return_train_score=False)

In [None]:
_ = grid.fit(X_text_train, y_train)

In [None]:
avg_auc = grid.cv_results_["mean_test_score"].mean()
std_auc = grid.cv_results_["std_test_score"].mean()
print(f"Average auc score: {avg_auc:.2f} (+/- {std_auc:.2f})")
# pd.DataFrame.from_dict(grid.cv_results_).sort_values("rank_test_score")

In [None]:
plot_precision_recall(grid.best_estimator_, X_text_test, y_test)

#### TfidfVectorizer + random forest

##### Default values

In [None]:
tfidf_vectorizer = TfidfVectorizer()
tfidf_model = RandomForestClassifier(min_samples_leaf=5, n_jobs=-1, n_estimators=10, max_features=0.5)

In [None]:
pipe_tfidf = make_pipeline(tfidf_vectorizer, tfidf_model)

In [None]:
fitted_tfidf = fit_ensemble(pipe_tfidf, sss, X_train_text, y_train)

In [None]:
evaluate_ensemble(fitted_tfidf, evaluator, X_test_text)

###### Archive

In [None]:
estimator = get_cv_auc(pipe, X_text_train, y_train, n_cv=20)

In [None]:
plot_precision_recall(estimator, X_text_test, y_test)

In [None]:
vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1, 3), max_df=0.8, min_df=0.2, max_features=5000)

pipe = make_pipeline(vectorizer, RandomForestClassifier())

estimator = get_cv_auc(pipe, X_text_train, y_train, n_cv=20)

In [None]:
plot_precision_recall(estimator, X_text_test, y_test)

##### Grid

In [None]:
pipe = Pipeline([
    ("vec", TfidfVectorizer()),
    ("rf", RandomForestClassifier())
    ])
params = {"vec__stop_words": ["english", None],
          "vec__ngram_range": [(1, 1), (1, 2), (1, 3)], 
          "vec__max_df": uniform(loc=0.8, scale=0.2),
          "vec__min_df": uniform(loc=0.0, scale=0.2),
          "vec__max_features": randint(low=1000, high=9000)}

grid = RandomizedSearchCV(pipe,
                          params,
                          n_iter=8,
                          scoring="roc_auc",
                          n_jobs=-1,
                          cv=10,
                          return_train_score=False)

In [None]:
_ = grid.fit(X_text_train, y_train)

In [None]:
avg_auc = grid.cv_results_["mean_test_score"].mean()
std_auc = grid.cv_results_["std_test_score"].mean()
print(f"Average auc score: {avg_auc:.2f} (+/- {std_auc:.2f})")
# pd.DataFrame.from_dict(grid.cv_results_).sort_values("rank_test_score")

In [None]:
plot_precision_recall(grid.best_estimator_, X_text_test, y_test)

#### Advanced tokenization and lemmatization using spaCy

In [None]:
nlp = spacy.load('en_core_web_sm')

In [None]:
doc = nlp(X_text[0])

In [None]:
for token in doc:
    print(token.text, token.lemma_, token.is_stop)

In [None]:
nlp = spacy.load("en")

#### SpaCy language model

[Instructions from SpaCy documentation](https://spacy.io/usage/training#section-textcat)

In [None]:
class CustomSpacyClassifier():
    """ Wrapper for spaCy's text classification that enables integration with sklearn.metrics.cross_validate
    
    """
    
    def __init__(self):
        self._estimator_type = "classifier"
        
        self.nlp = None
        self.label = None
        self.train_data = None
        
    def get_params(self, deep=True):
        return dict()
    
    def add_textcat(self, label):
        self.label = label
        if "textcat" not in self.nlp.pipe_names:
            textcat = self.nlp.create_pipe("textcat")
            self.nlp.add_pipe(textcat, last=True)
        # otherwise, get it, so we can add labels to it
        else:
            textcat = self.nlp.get_pipe("textcat")
        textcat.add_label(label)
    
    def fit(self, X, y, n_iter=10, **kwargs):
        
        self.nlp = spacy.load("en")
        self.add_textcat("interesting")
        self.train_data = [(e, {"cats": {self.label: bool(l)}}) for e, l in zip(X, y)]
        
        drop_rate = kwargs["drop_rate"]
        
        other_pipes = [pipe for pipe in self.nlp.pipe_names if pipe != "textcat"]
        with self.nlp.disable_pipes(*other_pipes):  # only train textcat
            optimizer = self.nlp.begin_training()
            for i in range(n_iter):
                print(f"EPOCH {i+1}")
                losses = {}
                batches = minibatch(self.train_data, size=compounding(4., 16., 1.001))
                for batch in batches:
                    texts, annotations = zip(*batch)
                    self.nlp.update(texts, annotations, sgd=optimizer, drop=drop_rate,
                               losses=losses)
                loss = losses["textcat"]
                print(f"LOSS: {loss}")
                print("")
                
    def predict_proba(self, X):
        p1_scores = [np.float64(self.nlp(sample_text).cats["interesting"]) for sample_text in X]
        
        return np.array([[1. - score, score] for score in p1_scores])

In [None]:
clf_spacy = CustomSpacyClassifier()

In [None]:
fitted_spacy = fit_ensemble(clf_spacy, sss, X_train_text, y_train, n_iter=5, drop_rate=0.4)

In [None]:
evaluate_ensemble(fitted_spacy, evaluator, X_test_text)

##### Archive

Train several custom classifiers and evaluate their performance on the true hold out set

In [None]:
fitted_clfs = []

for i, split in enumerate(s.split(X_70, y_70)):
    i_train = split[0]
    i_test = split[1]
    
    print("Training model number  ", i)
    print("")
    print("Training IDs: ", i_train)
    print("Test IDs: ", i_test)
    
    clf.fit(X_70[i_train], y_70[i_train], n_iter=5, drop_rate=0.4)
    fitted_clf = copy.deepcopy(clf)
    test_auc = metrics.roc_auc_score(y_70[i_test], clf.predict_proba(X_70[i_test])[:, 1])
    fitted_clfs.append((fitted_clf, test_auc))

    print("AUC on the test set: ", test_auc)
    print("")

In [None]:
scores = [score for _, score in fitted_clfs]
print("Mean AUC: ", np.mean(scores))
print("Std deviation of AUC: ", np.std(scores))

In [None]:
preds = np.array([c.predict_proba(X_100)[:, 1] for c, _ in fitted_clfs])

In [None]:
print("AUC on the hold out set: ", metrics.roc_auc_score(y_100, preds.mean(axis=0)))

In [None]:
auc_cv = cross_validate(clf, X_text_train, y_train,
                            scoring="roc_auc",
                            cv=2,
                            n_jobs=1,
                            return_train_score=False,
                            return_estimator=True)

In [None]:
auc_cv

In [None]:
nlp = spacy.load("en")

In [None]:
if "textcat" not in nlp.pipe_names:
    textcat = nlp.create_pipe("textcat")
    nlp.add_pipe(textcat, last=True)
# otherwise, get it, so we can add labels to it
else:
    textcat = nlp.get_pipe("textcat")

In [None]:
textcat.add_label("interesting")

In [None]:
TRAIN_DATA = [(example, {"cats": {"interesting": bool(label)}}) for example, label in zip(X_text_train, y_train)]

In [None]:
n_iter = 10

In [None]:
# dropout = decaying(0.6, 0.2, 1e-4)

In [None]:
i = 0
while i < 20:
    print(next(dropout))
    i += 1

In [None]:
size=compounding(4., 16., 1.05)

In [None]:
i = 0
while i < 20:
    print(next(size))
    i += 1

In [None]:
for i in range(n_iter):
        print(f"EPOCH {i+1}")
        batches = minibatch(TRAIN_DATA, size=compounding(4., 16., 1.5))
        print(len(next(batches)))

In [None]:
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "textcat"]
with nlp.disable_pipes(*other_pipes):  # only train textcat
    optimizer = nlp.begin_training()
    for i in range(n_iter):
        print(f"EPOCH {i+1}")
        losses = {}
        batches = minibatch(TRAIN_DATA, size=compounding(4., 16., 1.001))
        for batch in batches:
            texts, annotations = zip(*batch)
            nlp.update(texts, annotations, sgd=optimizer, drop=0.3,
                       losses=losses)
        loss = losses["textcat"]
        print(f"LOSS: {loss}")
        print("")
        

#### Evaluate

In [None]:
test_scores = [nlp(sample_text).cats["interesting"] for sample_text in X_text_test]

In [None]:
te = [np.float64(nlp(sample_text).cats["interesting"]) for sample_text in X_text_test[0:2]]

In [None]:
te2 = np.array([[1. - score, score] for score in te])

In [None]:
te2

In [None]:
np.float64(te[0])

In [None]:
metrics.roc_auc_score(y_test, test_scores)

In [None]:
precision, recall, _ = metrics.precision_recall_curve(y_test, test_scores)

step_kwargs = {"step": "post"}
plt.step(recall, precision, color='b', alpha=0.2,
         where="post")
plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title("Precision-Recall curve")
plt.show()

#### Fastai

In [1]:
from fastai import *
from fastai.text import *

In [None]:
data = pd.read_parquet(Path.cwd() / "data" / "shared" / "train_data.parquet")

X = data[["claps", "reading_time", "text"]]
y = np.array(data["interesting"])

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3, random_state=42,
                                                    stratify=y)

In [None]:
train_fastai = pd.DataFrame({"label": y_train, "text": X_train["text"]})
test_fastai = pd.DataFrame({"label": y_test, "text": X_test["text"]})

In [3]:
path = Path.cwd() / "data" / "shared" / "fastai"

In [None]:
train_fastai.to_csv(path / "train.csv", sep=",", index=False, header=False)
train_fastai.to_csv(path / "test.csv", sep=",", index=False, header=False)

In [None]:
evaluator = CustomEvaluator(target_precision=0.8)
sss = model_selection.StratifiedShuffleSplit(n_splits=6, test_size=0.2, random_state=42)

In [35]:
# Language model data
data_lm = TextLMDataBunch.from_csv(path)
# Classifier model data
data_clas = TextClasDataBunch.from_csv(path, vocab=data_lm.train_ds.vocab, bs=8)

Numericalizing valid.


##### Download pre-trained model

This did not work, had to do this using curl from the command line

In [None]:
# URLs.download_wt103_model()

##### Train language model

In [36]:
learn = RNNLearner.language_model(data_lm, pretrained_fnames=['lstm_wt103', 'itos_wt103'], drop_mult=0.5)
learn.fit_one_cycle(4, 1e-2)

Total time: 00:26
epoch  train_loss  valid_loss  accuracy
1      5.129876    4.502655    0.242058  (00:06)
2      4.669750    3.958226    0.271622  (00:06)
3      4.352579    3.780446    0.286204  (00:06)
4      4.158467    3.748044    0.289051  (00:06)



Unfreeze and fine-tune

In [37]:
learn.unfreeze()
learn.fit_one_cycle(4, 1e-3)

Total time: 00:31
epoch  train_loss  valid_loss  accuracy
1      3.991501    3.651473    0.300154  (00:07)
2      3.908763    3.523459    0.316158  (00:07)
3      3.810472    3.444433    0.324955  (00:07)
4      3.737535    3.413374    0.328164  (00:07)



Save encoder to use for classification

In [38]:
learn.save_encoder('ft_enc')

###### Train classifier

In [39]:
learn = RNNLearner.classifier(data_clas, drop_mult=0.5)
learn.load_encoder('ft_enc')
learn.fit_one_cycle(4, 1e-2)

Total time: 01:50
epoch  train_loss  valid_loss  accuracy
1      0.724800    0.636203    0.692308  (00:27)
2      0.635331    0.501309    0.780220  (00:27)
3      0.565197    0.541426    0.703297  (00:27)
4      0.546613    0.515166    0.703297  (00:28)



In [40]:
learn.freeze_to(-2)
learn.fit_one_cycle(4, slice(5e-3/2., 5e-3))

Total time: 01:52
epoch  train_loss  valid_loss  accuracy
1      0.534426    0.456059    0.802198  (00:29)
2      0.502023    0.393139    0.857143  (00:28)
3      0.453869    0.436747    0.846154  (00:27)
4      0.410717    0.399213    0.857143  (00:27)



In [41]:
learn.unfreeze()
learn.fit_one_cycle(4, slice(2e-3/100, 2e-3))

Total time: 02:34
epoch  train_loss  valid_loss  accuracy
1      0.426951    0.362562    0.879121  (00:39)
2      0.393601    0.347144    0.901099  (00:37)
3      0.372209    0.348093    0.868132  (00:40)
4      0.360141    0.329950    0.879121  (00:37)



In [43]:
dir()

['ActivationStats',
 'AdamW',
 'AdaptiveConcatPool2d',
 'AffineFunc',
 'AffineMatrix',
 'AnnealFunc',
 'Any',
 'AnyStr',
 'ArgStar',
 'AverageMetric',
 'BOS',
 'BaseTextDataset',
 'BaseTokenizer',
 'BatchSampler',
 'BatchSamples',
 'BnFreeze',
 'BoolOrTensor',
 'ByteTensor',
 'Callable',
 'Callback',
 'CallbackHandler',
 'CallbackList',
 'Classes',
 'Collection',
 'Counter',
 'CrossEntropyFlat',
 'DataBunch',
 'DataFrame',
 'DataFrameOrChunks',
 'DataLoader',
 'Dataset',
 'DatasetBase',
 'Debugger',
 'DeviceDataLoader',
 'Dict',
 'DoubleTensor',
 'EarlyStoppingCallback',
 'EmbeddingDropout',
 'Enum',
 'F',
 'FLD',
 'Fbeta',
 'FilePathList',
 'Flatten',
 'FloatOrTensor',
 'FloatTensor',
 'Floats',
 'GeneralScheduler',
 'GradientClipping',
 'HalfTensor',
 'Hashable',
 'Hook',
 'HookCallback',
 'HookFunc',
 'Hooks',
 'ImgLabel',
 'ImgLabels',
 'In',
 'IntEnum',
 'IntOrTensor',
 'ItemBase',
 'ItemsList',
 'Iterable',
 'Iterator',
 'KWArgs',
 'KeyFunc',
 'LRFinder',
 'LabelDataset',
 'Lambd

### Archive

In [None]:
class CustomEvaluator():
    """ Simple class holding data and functionality related to evaluating a classifier's performance
    
    """
    
    def __init__(self, texts, labels, scores):
        self.df = pd.DataFrame({"text": texts, "label": labels, "score": scores})
        self.group_means = self.df.groupby(by="label").mean()
        
    def get_scores(self, thresholds=[0.25, 0.5, 0.75]):
        if isinstance(thresholds, float):
            thresholds = [thresholds]
        
        tps = [1e-8]*len(thresholds)  # True positives
        fps = [1e-8]*len(thresholds)  # False positives
        fns = [1e-8]*len(thresholds)  # False negatives
        tns = [1e-8]*len(thresholds)  # True negatives
        
        for i, t in enumerate(thresholds):
            for truth, pred in zip(self.df["label"], self.df["score"] > t):
                if truth and pred:
                    tps[i] += 1.
                elif not truth and pred:
                    fps[i] += 1.
                elif truth and not pred:
                    fns[i] += 1.
                elif not truth and not pred:
                    tns[i] += 1.
        
        precisions = [tp / (tp + fp) for tp, fp in zip(tps, fps)]
        recalls = [tp / (tp + fn) for tp, fn in zip(tps, fns)]
        f_scores = [2 * (p * r) / (p + r) for p, r in zip(precisions, recalls)]
        accuracies = [(tp + tn) / (tp + fp + fn + tn) for tp, fp, fn, tn in zip(tps, fps, fns, tns)]
        
        score_df = pd.DataFrame({"threshold": thresholds,
                                 "precision": precisions,
                                 "recall": recalls,
                                 "f_score": f_scores,
                                 "accuracy": accuracies})
        
        print(score_df)
        self.score_df = score_df
        
        return

In [None]:
test_evaluator = CustomEvaluator(X_text_test, y_test, test_scores)

In [None]:
test_evaluator.group_means

In [None]:
test_evaluator.get_scores()