In [2]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import h5py
import numpy as np
from tensorflow.keras.models import load_model

ModuleNotFoundError: No module named 'pandas'

In [3]:
def remove_special_characters(text, remove_digit=False):
    pattern = r'[^a-zA-z0-9\s]' if not remove_digit else r'[^a-zA-z\s]'
    text = re.sub(pattern, '', text)
    return text

In [3]:
def remove_stopwords(text):
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text)
    return " ".join([token for token in tokens if token not in stop_words])

In [4]:
def snowball_stemmer(text):
    stemmer = SnowballStemmer("english")
    text = ' '.join([stemmer.stem(word) for word in text.split()])
    return text

In [5]:
def normalize_corpus(text, 
                     special_char_removal=True,
                     remove_digit=False,
                     text_lower_case=True, 
                     stopword_removal=True,
                     text_stem=True):
    # remove special characters and\or digits 
    if special_char_removal:
        if remove_digit:
            text = remove_special_characters(text, remove_digit=True)
        else:
            text = remove_special_characters(text)
    # lowercase the text    
    if text_lower_case:
        text = text.lower()
    # remove stopwords
    if stopword_removal:
        text = remove_stopwords(text)
    # stem text
    if text_stem:
        text = snowball_stemmer(text)
    return text

In [6]:
max_words = 10000
max_len = 500

In [7]:
def process(text):
    processed_text = normalize_corpus(text, remove_digit=True)
    tokenizer = Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(text)
    tokenized_text = tokenizer.texts_to_sequences([text])[0]
    # Pad the sequences
    padded_sequence = pad_sequences([tokenized_text], maxlen=max_len, padding='post', truncating='post')
    # Return the processed text with the correct shape
    return np.array([padded_sequence[0]])

In [8]:
text = "Baby I ain't smoking by the rules"

In [9]:
model = load_model('model.h5')

2023-05-12 11:57:32.045552: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-05-12 11:57:32.046825: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-05-12 11:57:32.120483: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:1b:00.0 name: Quadro RTX 6000 computeCapability: 7.5
coreClock: 1.77GHz coreCount: 72 deviceMemorySize: 23.64GiB deviceMemoryBandwidth: 625.94GiB/s
2023-05-12 11:57:32.121121: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:1c:00.0 name: Quadro RTX 6000 computeCapability: 7.5
coreClock: 1.77GHz coreCount: 72 deviceMemorySize: 23.64GiB deviceMemoryBandwidth: 625.94GiB/s
2023-05-12 11:57:32.121730: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 2 with properties: 
pciBusID: 0000:1d:00.0 name: Quadro RTX 600

In [10]:
def decode_sentiment(score, include_neutral=True):
    return "NEGATIVE" if score > 0.5 else "POSITIVE"

def predict(text):
    # Predict
    processed = process(text)
    score = model.predict([processed])[0]
    # Decode sentiment
    label = decode_sentiment(score)

    return {"label": label, "score": float(score)}  

In [11]:
predict("I hate it")



2023-05-12 11:57:37.795757: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2023-05-12 11:57:37.796226: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2200000000 Hz
2023-05-12 11:57:37.875859: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10


{'label': 'POSITIVE', 'score': 0.36076807975769043}