In [None]:
from flask import Flask, request, render_template
import joblib
import pandas as pd
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel ,TFAutoModel
import xgboost

xgb = joblib.load('model3/xgb_model.joblib')
model_name = 'bert-large-cased'

minMaxDiff = 33752
minVal = 708

currentIndex = 0

# ['laughingTime','jokeLength','sentiment','sentiment_prob']
df = pd.DataFrame(columns=['text','laughingTime','jokeLength','sentiment','sentiment_prob','rank'])

tokenizer = BertTokenizer.from_pretrained(model_name)

model2 = TFBertModel.from_pretrained(model_name)

base_model = TFAutoModel.from_pretrained('bert-base-uncased')
base_model.trainable = False

input_ids = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name='input_ids')
attention_mask = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name='attention_mask')

x = base_model({"input_ids": input_ids, "attention_mask": attention_mask})[1]
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs={"input_ids": input_ids, "attention_mask": attention_mask}, outputs=outputs)
model.load_weights('model/model20_minmax_extradata/my_model')




def get_rank(input):
    global xgb
    score = xgb.predict(input)
    return score

def getSentiment(text):
    global tokenizer,model2
    tokens = tokenizer.encode_plus(text, max_length=512, truncation=True, padding='max_length', return_tensors='tf')
    output = model2(tokens)
    pooled_output = output[1]
    sentiment_logits = tf.keras.layers.Dense(3, activation='softmax')(pooled_output)
    sentiment_probabilities = tf.nn.softmax(sentiment_logits, axis=1).numpy().squeeze()
    sentiment_label = tf.argmax(sentiment_probabilities).numpy().item()

    # Map the sentiment label to the corresponding sentiment
    # sentiment_map = {0: 'Negative', 1: 'Positive', 2: 'Neutral'}
    return [sentiment_label, sentiment_probabilities[sentiment_label]]

# {'laughingTime':,'jokeLength':,'sentiment':,'sentiment_prob':}
def addRowToDF(inp):
    global df
    new_row = pd.DataFrame(inp)

    df = pd.concat([df, new_row], ignore_index=True)

    new_row_index = df.index[-1]

    return new_row_index



def dataPreProcess(texts):
    # Load the BERT tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # Define the maximum sequence length for padding/truncating
    max_length = 512

    # Tokenize the list of strings and convert them to input IDs, attention masks, and token type IDs
    input_ids = []
    attention_masks = []

    for text in texts:
        # Tokenize the text and add the special [CLS] and [SEP] tokens
        encoded_dict = tokenizer.encode_plus(
                            text,                      # Text to encode
                            add_special_tokens = True, # Add [CLS] and [SEP] tokens
                            max_length = max_length,   # Pad/truncate to a maximum length
                            pad_to_max_length = True,
                            return_attention_mask = True,   # Generate attention masks
                            return_token_type_ids = False,   # Do not generate token type IDs
                            truncation=True,
                            )
        
        # Add the encoded sequence and attention mask to the lists
        input_ids.append(encoded_dict['input_ids'])
        attention_masks.append(encoded_dict['attention_mask'])

    # Convert the lists to tensors
    input_ids = tf.convert_to_tensor(input_ids, dtype=tf.int32)
    attention_masks = tf.convert_to_tensor(attention_masks, dtype=tf.int32)
    
    # Return a tuple of input IDs and attention masks
    return input_ids, attention_masks

def get_laugh_Duration(text):
    global minMaxDiff,minVal,model
    data = dataPreProcess([text])
    inputs = {'input_ids': data[0], 'attention_mask': data[1]}
    preds = model.predict(inputs).tolist()
    val_preds = [((item[0]*minMaxDiff)+minVal) for item in list(preds)]
    return val_preds



In [74]:
text = " pick up the box get back on when you hit the box together and subtract your own weight I'm going slow down hold on professor I know this guy's never tried this because I tried it and you still can't see the numbers  one of my mr. Olympia"
currentIndex = addRowToDF([{'text':text,'laughingTime':'','jokeLength':len(text.split()),'sentiment':'','sentiment_prob':'','rank':''}])


In [75]:
duration = get_laugh_Duration(df['text'][currentIndex])





In [77]:
currentIndex

1

In [76]:
duration

[5491.5219403505325]

In [78]:
df['laughingTime'][currentIndex] = duration[0]

In [79]:
sentiment = getSentiment(df['text'][currentIndex])

In [80]:
sentiment

[2, 0.37211284]

In [82]:
df['sentiment'][currentIndex] = sentiment[0]
df['sentiment_prob'][currentIndex] = sentiment[1]

In [83]:
df['rank'][currentIndex] = get_rank(pd.DataFrame(df.iloc[currentIndex].to_dict(), index=[0])[['laughingTime','jokeLength','sentiment','sentiment_prob']])[0]

In [84]:
df.head()

Unnamed: 0,text,laughingTime,jokeLength,sentiment,sentiment_prob,rank
0,pick up the box get back on when you hit the ...,5491.52194,48,2,0.372113,3.368503
1,pick up the box get back on when you hit the ...,5491.52194,48,2,0.372113,3.368503
