In [79]:
# Import necessary libraries
get_ipython().system('pip install transformers')
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import tensorflow as tf
from transformers import BertTokenizer

# Load your dataset from a CSV file
df = pd.read_csv('combineddata.csv')

# Create a new column 'list' that contains the values from columns 1 onwards
df['list'] = df[df.columns[1:]].values.tolist()

# Select only the 'phrase' and 'list' columns and create a new DataFrame
df = df[['phrase', 'list']].copy()
df.head()

In [None]:
# Initialize the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')


In [83]:
# Tokenize the first phrase in your DataFrame
token = tokenizer.encode_plus(
    df['phrase'].iloc[0],
    max_length=256,
    truncation=True,
    padding='max_length',
    add_special_tokens=True,
    return_tensors='tf'
)


In [85]:
# Initialize empty arrays for input IDs and attention masks
X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))


In [87]:
# Define a function to generate training data by tokenizing and encoding phrases
def generate_training_data(df, ids, masks, tokenizer):
    for i, text in tqdm(enumerate(df['phrase'])):
        tokenized_text = tokenizer.encode_plus(
            text,
            max_length=256,
            truncation=True,
            padding='max_length',
            add_special_tokens=True,
            return_tensors='tf'
        )
        ids[i, :] = tokenized_text.input_ids
        masks[i, :] = tokenized_text.attention_mask
    return ids, masks


In [88]:
# Generate training data
X_input_ids, X_attn_masks = generate_training_data(df, X_input_ids, X_attn_masks, tokenizer)


(4319, 256)

In [89]:
# Initialize labels as a binary multi-label tensor
target_labels = np.array(df['list'].values.tolist(), dtype=np.float32)
labels = target_labels


In [None]:
# Create a TensorFlow dataset from the input data and labels
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))


In [91]:
# Define a mapping function for the dataset
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
    return {
        'input_ids': input_ids,
        'attention_mask': attn_masks
    }, labels


(4319, 10)

In [92]:
# Apply the mapping function to the dataset
dataset = dataset.map(SentimentDatasetMapFunction)

In [None]:
# Shuffle and batch the dataset
dataset = dataset.shuffle(9000).batch(16, drop_remainder=True)

In [95]:
# Split the dataset into training and validation sets
p = 0.8
train_size = int((len(df)//16)*p)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

### **Model**

In [96]:
# Import the BERT model
from transformers import TFBertModel

In [98]:
# Initialize the BERT model with custom layers
model = TFBertModel.from_pretrained('bert-base-cased')

# Define input layers for input_ids and attention_masks
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')

# Pass input through the BERT model
bert_embds = model.bert(input_ids, attention_mask=attn_masks)[1]

# Add an intermediate dense layer and output layer
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)
output_layer = tf.keras.layers.Dense(10, activation='softmax', name='output_layer')(intermediate_layer)

# Create the sentiment analysis model
sentiment_model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)
sentiment_model.summary()

In [106]:
optim = tf.keras.optimizers.legacy.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

In [107]:
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])

In [None]:
num_tags = 10
num_classes=10
epochs=6
multi_label_targets=np.zeros(num_classes, dtype=np.int32)
hist = sentiment_model.fit([X_input_ids, X_attn_masks], labels, epochs=epochs, batch_size=32)


In [None]:
sentiment_model.save('my_model.h5')

### **Prediction**

In [None]:
sentiment_model = tf.keras.models.load_model('my_model.h5')

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

def prepare_data(input_text, tokenizer):
    token = tokenizer.encode_plus(
        input_text,
        max_length=256,
        truncation=True,
        padding='max_length',
        add_special_tokens=True,
        return_tensors='tf'
    )
    return {
        'input_ids': tf.cast(token.input_ids, tf.int32),
        'attention_mask': tf.cast(token.attention_mask, tf.int32)
    }

def make_prediction(sentiment_model, processed_data, classes=['Advanced','Bad','Bad Management','Beginner Friendly','Overall Good','Good Management',
                                                    'Good Resources','Informative and Knowledgeable','Lengthy','Short']):
    tags=[]
    probs = sentiment_model.predict(processed_data)[0]
    for i in range(10):
        tags.append([classes[i],probs[i]])
    return tags

In [None]:
input_text = input('Enter event review here: ')
processed_data = prepare_data(input_text, tokenizer)
tags=make_prediction(sentiment_model, processed_data=processed_data)
for i in range(10):
    if(tags[i][1]>0.01):
        print(tags[i][0],tags[i][1])