## Imports

In [2]:
import mltlk
print(mltlk.__version__)
from mltlk import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

0.1.33


## Load data
Load data, clean text and use Word2vec word vectors preprocessing.

In [3]:
session = load_data("data/wikipedia_300.csv.gz",
                    preprocess="word2vec",
                    w2v_vector_size=75,
                    stopwords=["english","stopwords/custom.csv"],
                    clean_text="letters digits",
                    encode_categories=True,
                    w2v_rebuild=True,
)

[1m[33mInfo: [0mClean texts keeping letters and digits
[1m[33mInfo: [0mCategories encoded
[1m[33mInfo: [0mLoad 180 stopwords from [36menglish, stopwords/custom.csv[0m
[1m[33mInfo: [0mWord2vec model generated in [34m3.04[0m sec
[1m[33mInfo: [0mWord2vec model stored to [36mword2vec/wikipedia_300_75.w2v[0m
[1m[33mInfo: [0mWord2vec embeddings generated in [34m9.86[0m sec
[1m[33mInfo: [0mLoaded [34m300[0m examples in [34m2[0m categories


#### Show data stats

In [4]:
data_stats(session)

0,1,2,3,4,5,6,7,8,9,10,11
Category,No,%,Σ%,Category,No,%,Σ%,Category,No,%,Σ%
Games (0),150,50.0%,50.0%,Programming (1),150,50.0%,100.0%,,,,
Examples:,300,,,Features:,75,,,Categories:,2,,


## Define Keras model
Builds the structure for the Keras model to use.

In [5]:
def get_model():
    model = Sequential()
    model.add(Dense(256, input_dim=75, activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(2, activation="softmax"))
    return model

## Evaluate model using train-test split
Build a Keras model and evaluate results using train-test split.

In [6]:
split_data(session, 
           test_size=0.15,
           seed=4,
           stratify=True,
)

[1m[33mInfo: [0mSplit data using [34m85%[0m training data ([34m255[0m samples) and [34m15%[0m test data ([34m45[0m samples) with seed [34m4[0m and stratify


In [7]:
evaluate_model(get_model(), 
               session, 
               reload=False,
               mode="split",
               categories=True,
               epochs=8,
               batch_size=32,
               loss="categorical_crossentropy",
               optimizer="adam"
)

[1m[33mInfo: [0mBuilding and evaluating model using train-test split took [34m0.66[0m sec



0,1
Results,
Accuracy:,93.33%
F1-score:,93.29%
Precision:,94.10%
Recall:,93.33%





0,1,2,3
Category,Accuracy,Correct,n
Games (0),100.00%,23,23
Programming (1),86.36%,19,22
Games (0),15.79%,3,





## Build final model and predict example
Build final model using all data and predict an unknown example.

In [8]:
build_model(get_model(), 
            session,
            epochs=8,
            batch_size=32,
            loss="categorical_crossentropy",
            optimizer="adam"
)
predict("This is an article about gamers - people who love playing games", session)

[1m[33mInfo: [0mBuilding final model on all data took [34m0.64[0m sec (accuracy [34m93.00%[0m)
[1m[33mInfo: [0mExample is predicted as [32mGames (0)[0m
