# Training Pipeline

We choose LSTM as a classification model 


### Imports & Load data

In [36]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from tensorflow.keras.layers import Embedding, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
import torch 
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import joblib
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

In [3]:
# parameters
max_len = 60 

voc_size = 5000

### Connect to Hopsworks

In [4]:
import hopsworks

project = hopsworks.login() 

fs = project.get_feature_store() 

Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5321




Connected. Call `.close()` to terminate connection gracefully.


In [5]:
try: 
    feature_view = fs.get_feature_view(
    name = 'headlines_sentiment_fg',
    version = 2)
except:
    feature_group= fs.get_feature_group(name = 'headlines_sentiment_fg', version=3)
    query = feature_group.select_all()
    feature_view = fs.create_feature_view(name = 'headlines_sentiment_fg', 
                                        version=2,
                                        labels=["sentiment"],
                                        query=query)

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/5321/fs/5241/fv/headlines_sentiment_fg/version/2


In [67]:
X_train, X_test, y_train, y_test = feature_view.train_test_split(0.2)


2023-01-09 12:07:21,563 INFO: USE `torileatherman_featurestore`
2023-01-09 12:07:22,074 INFO: SELECT `fg0`.`sentiment` `sentiment`, `fg0`.`headline` `headline`
FROM `torileatherman_featurestore`.`headlines_sentiment_fg_3` `fg0`




In [39]:
X_train['headline'] = X_train['headline'].apply(lambda x: np.fromstring(x.replace('[','').replace(']',''), dtype=int, sep=',').tolist())
X_test['headline'] = X_test['headline'].apply(lambda x: np.fromstring(x.replace('[','').replace(']',''), dtype=int, sep=',').tolist())
X_train = X_train['headline'].to_list()
X_test = X_test['headline'].to_list()
y_train = y_train['sentiment'].to_list()
y_test = y_test['sentiment'].to_list()

## LSTM Model

In [22]:
embedding_vector_features=40

model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length = max_len))
model.add(Dropout(0.5))
model.add(LSTM(200))
model.add(Dropout(0.5))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [48]:
history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [68]:
history1 = history
acc = history1.history['accuracy'][-1]

In [69]:
mr = project.get_model_registry()

model_dir="headlines_sentiment_model"
if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)

Connected. Call `.close()` to terminate connection gracefully.


In [70]:
joblib.dump(model, model_dir + "/headlines_sentiment_model.pkl")


Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......dense
.........vars
............0
............1
......dropout
.........vars
......dropout_1
.........vars
......embedding
.........vars
............0
......lstm
.........cell
............vars
...............0
...............1
...............2
.........vars
...metrics
......mean
.........vars
............0
............1
......mean_metric_wrapper
.........vars
............0
............1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........2
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2023-01-09 12:07:36         2503
metadata.json                                  2023-01-09 12:07:36           64
variables.h5                                   2023-01-09 12:07:37      4745328


['headlines_sentiment_model/headlines_sentiment_model.pkl']

In [71]:
input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema, output_schema)

In [72]:
headlines_sentiment_model = mr.python.create_model(
        name = "headlines_sentiment_model", 
        metrics = {"accuracy": acc},
        model_schema=model_schema,
        description="Predicting Sentiment of Headlines"
    )


In [73]:
headlines_sentiment_model.save(model_dir)


  0%|          | 0/6 [00:00<?, ?it/s]

Model created, explore it at https://c.app.hopsworks.ai:443/p/5321/models/headlines_sentiment_model/3


Model(name: 'headlines_sentiment_model', version: 3)