In [None]:
!pip install tensorflow numpy librosa

In [8]:
import zipfile
import os

def unzip_folder(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

# Specify the path to the zip file and the directory where you want to extract the contents
zip_file_path = '/content/drive/MyDrive/AudioWAV.zip'
extracted_folder_path = 'Audio'

# Create the destination folder if it doesn't exist
os.makedirs(extracted_folder_path, exist_ok=True)

# Call the function to unzip the folder
unzip_folder(zip_file_path, extracted_folder_path)

print(f"Folder '{zip_file_path}' has been successfully extracted to '{extracted_folder_path}'.")

Folder '/content/drive/MyDrive/AudioWAV.zip' has been successfully extracted to 'Audio'.


In [None]:
import os
# Specify the path for the new folder
folder_path = "/content/AudioWithCategorisedWAV/"

folders = ['Anger','Disgust','Fear','Happy','Neutral','Sad']

for i in folders:
  folder_path1 = folder_path + i
  if not os.path.exists(folder_path1):
    os.makedirs(folder_path1)

In [None]:
import os

import shutil
#sourcepath = '/content/AudioInput'
sourcepath = '/content/Audio/AudioWAV'
destinationpath = '/content/AudioWithCategorisedWAV'
files = os.listdir(sourcepath)

for file in files:
    source_file = os.path.join(sourcepath, file)
    if not '.wav' in file:
      continue
    if 'ANG' in file:
      destination_file = os.path.join(destinationpath+'/Anger', file)
      shutil.move(source_file, destination_file)
    elif 'DIS' in file:
      destination_file = os.path.join(destinationpath+'/Disgust', file)
      shutil.move(source_file, destination_file)
    elif 'FEA' in file:
      destination_file = os.path.join(destinationpath+'/Fear', file)
      shutil.move(source_file, destination_file)
    elif 'HAP' in file:
      destination_file = os.path.join(destinationpath+'/Happy', file)
      shutil.move(source_file, destination_file)
    elif 'NEU' in file:
      destination_file = os.path.join(destinationpath+'/Neutral', file)
      shutil.move(source_file, destination_file)
    elif 'SAD' in file:
      destination_file = os.path.join(destinationpath+'/Sad', file)
      shutil.move(source_file, destination_file)

In [None]:
def noise(data):
    noise_amp = 0.04*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.70):
    return librosa.effects.time_stretch(data, rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.8):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

def higher_speed(data, speed_factor = 1.25):
    return librosa.effects.time_stretch(data, rate = speed_factor)

def lower_speed(data, speed_factor = 0.75):
    return librosa.effects.time_stretch(data, rate = speed_factor)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import librosa
import numpy as np
import os

# Function to extract audio features using librosa
def extract_features(audio, sample_rate, mfcc=True, chroma=True, mel=True):
    result = np.array([])
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13), axis=1)
        result = np.hstack((result, mfccs))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate), axis=1)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate), axis=1)
        result = np.hstack((result, mel))
    return result

# Function to load audio data and labels
def load_data(data_path):
    features, labels = [], []
    for folder in os.listdir(data_path):
        label = folder
        for file_name in os.listdir(os.path.join(data_path, folder)):
            file_path = os.path.join(data_path, folder, file_name)
            audio, sample_rate = librosa.load(file_path)
            feature = extract_features(audio,sample_rate)
            features.append(feature)
            labels.append(label)
            #noised
            noise_data = noise(audio)
            feature = extract_features(noise_data,sample_rate)
            features.append(feature)
            labels.append(label)
            #stretched
            #stretch_data = stretch(audio)
            #feature = extract_features(stretch_data,sample_rate)
            #features.append(feature)
            #labels.append(label)
            #pitched
            #pitch_data = pitch(data = audio, sampling_rate = sample_rate)
            #feature = extract_features(pitch_data,sample_rate)
            #features.append(feature)
            #labels.append(label)

            #speed up
            higher_speed_data = higher_speed(audio)
            feature = extract_features(higher_speed_data,sample_rate)
            features.append(feature)
            labels.append(label)

            #speed down
            lower_speed_data = higher_speed(audio)
            feature = extract_features(lower_speed_data,sample_rate)
            features.append(feature)
            labels.append(label)
    return np.array(features), np.array(labels)

# Load data and preprocess
data_path = "/content/AudioWithCategorisedWAV"
features, labels = load_data(data_path)

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split the data into training and testing sets



In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, encoded_labels, test_size=0.1, random_state=42)
model = models.Sequential()

# Convolutional layers
model.add(layers.Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling1D(pool_size=2))

model.add(layers.Conv1D(128, kernel_size=3, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling1D(pool_size=2))

model.add(layers.Conv1D(256, kernel_size=3, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling1D(pool_size=2))

# Recurrent layers
model.add(layers.Bidirectional(layers.LSTM(128, return_sequences=True)))
model.add(layers.Bidirectional(layers.LSTM(128)))

# Fully connected layers
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))

# Output layer
model.add(layers.Dense(6, activation='softmax'))

# Compile the model
optimiser = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimiser, loss='sparse_categorical_crossentropy', metrics=['accuracy'],run_eagerly=True)


In [None]:
from sklearn.utils.class_weight import compute_class_weight
class_labels = np.unique(labels)
class_indices = {label: index for index, label in enumerate(class_labels)}
Y = np.array([class_indices[label] for label in labels])

# Calculate class weights
class_weights = compute_class_weight(class_weight ='balanced',classes = np.unique(Y),y= Y)

# Convert class weights to a dictionary for class_weight parameter in model.fit
class_weights_dict = {class_index: weight for class_index, weight in zip(np.unique(Y), class_weights)}
# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), class_weight=class_weights_dict)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

Epoch 1/50
Epoch 2/50
Epoch 3/50
 26/838 [..............................] - ETA: 10:10 - loss: 1.3474 - accuracy: 0.4615

KeyboardInterrupt: ignored

In [None]:
model.save("emotion_classification_Audio_model.h5")

# Optionally, you can also save the weights only
model.save_weights("emotion_classification_model_Audio_weights.h5")

  saving_api.save_model(


In [9]:
import zipfile
import os

def unzip_folder(zip_path, extract_to):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

# Specify the path to the zip file and the directory where you want to extract the contents
zip_file_path = '/content/drive/MyDrive/VideoFlash.zip'
extracted_folder_path = 'Videos'

# Create the destination folder if it doesn't exist
os.makedirs(extracted_folder_path, exist_ok=True)

# Call the function to unzip the folder
unzip_folder(zip_file_path, extracted_folder_path)

print(f"Folder '{zip_file_path}' has been successfully extracted to '{extracted_folder_path}'.")

Folder '/content/drive/MyDrive/VideoFlash.zip' has been successfully extracted to 'Videos'.


In [2]:
import os
import cv2
import numpy as np
from keras.applications.vgg19 import VGG19, preprocess_input
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization
from keras.optimizers import Adam

import shutil
def preprocess_frame(frame):
    # Preprocess the frame: resize, normalize, etc.
    processed_frame = cv2.resize(frame, (224, 224))
    processed_frame = processed_frame / 255.0  # Normalize pixel values
    return processed_frame

def read_video(video_path):
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        processed_frame = preprocess_frame(frame)

    cap.release()
    return processed_frame

In [None]:
sourcepath = '/content/Videos/VideoFlash'
files = os.listdir(sourcepath)
xdata = []
ylabels = []

for file in files:
    source_file = os.path.join(sourcepath, file)
    xdata.append(read_video(source_file))
    if 'ANG' in file:
      ylabels.append('Ang')
    elif 'DIS' in file:
      ylabels.append('DIS')
    elif 'FEA' in file:
      ylabels.append('FEA')
    elif 'HAP' in file:
      ylabels.append('HAP')
    elif 'NEU' in file:
      ylabels.append('NEU')
    elif 'SAD' in file:
      ylabels.append('SAD')

xdata1 = np.array(xdata)

In [None]:
from sklearn.model_selection import train_test_split
X_train1, X_test1, y_train1, y_test1 = train_test_split(xdata1, ylabels, test_size=0.2, random_state=42)
from sklearn.preprocessing import OneHotEncoder
onehot_encoder = OneHotEncoder(sparse = False)
y_train_onehot = onehot_encoder.fit_transform((np.array(y_train1)).reshape(-1, 1))

In [None]:
from sklearn.utils.class_weight import compute_class_weight

# Load the VGG19 model without the fully connected layers
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = False

# Add your own fully connected layers for emotion classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dense(1024, activation='relu')(x)
x = BatchNormalization()(x)
predictions = Dense(6, activation='softmax')(x)  # Add your number of emotion classes

model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'],run_eagerly=True)


In [None]:
class_labels = np.unique(y_train1)
class_indices = {label: index for index, label in enumerate(class_labels)}
Y = np.array([class_indices[label] for label in y_train1])

# Calculate class weights
class_weights = compute_class_weight(class_weight ='balanced',classes = np.unique(Y),y= Y)

# Convert class weights to a dictionary for class_weight parameter in model.fit
class_weights_dict = {class_index: weight for class_index, weight in zip(np.unique(Y), class_weights)}

model.fit(x=X_train1,y=y_train_onehot,batch_size=8,epochs=20, class_weight=class_weights_dict)

In [None]:
model.save("emotion_classification_Video_model.h5")

# Optionally, you can also save the weights only
model.save_weights("emotion_classification_model_Video_weights.h5")

  saving_api.save_model(


In [None]:
!pip install contractions
!pip install nltk
#!pip install transformers
#!pip install --upgrade protobuf
#!pip install --upgrade tensorflow --user

Collecting contractions
  Downloading contractions-0.1.73-py2.py3-none-any.whl (8.7 kB)
Collecting textsearch>=0.0.21 (from contractions)
  Downloading textsearch-0.0.24-py2.py3-none-any.whl (7.6 kB)
Collecting anyascii (from textsearch>=0.0.21->contractions)
  Downloading anyascii-0.3.2-py3-none-any.whl (289 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.9/289.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyahocorasick (from textsearch>=0.0.21->contractions)
  Downloading pyahocorasick-2.0.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.8/110.8 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyahocorasick, anyascii, textsearch, contractions
Successfully installed anyascii-0.3.2 contractions-0.1.73 pyahocorasick-2.0.0 textsearch-0.0.24


In [None]:
import warnings
warnings.filterwarnings('ignore')
import random
import re
import string
import contractions
import nltk
from nltk import pos_tag
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from transformers import TFRobertaModel, RobertaTokenizerFast
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline

from keras.models import Model
from keras.layers import Dense, Dropout, Input
from keras.optimizers import Adam
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy
from keras.callbacks import EarlyStopping, ModelCheckpoint

from tabulate import tabulate


In [None]:
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('omw-1.4')
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('words')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [None]:
df_clean = pd.read_csv('tweet_emotions.csv')
stop_words = set(stopwords.words('english'))

def expand_contractions(text):
    '''
    Function replaces abbreviations with full word versions
    '''
    return contractions.fix(text)

def clean_content(text):

    text = expand_contractions(text)
    # remove twitter handles
    clean_text = re.sub(r'@\w+\s?', '', text)

    # convert to lowercase
    clean_text = clean_text.lower()

    # remove links http:// or https://
    clean_text = re.sub(r'https?:\/\/\S+', '', clean_text)

    # remove links beginning with www. and ending with .com
    clean_text = re.sub(r'www\.[a-z]?\.?(com)+|[a-z]+\.(com)', '', clean_text)

    # remove html reference characters
    clean_text = re.sub(r'&[a-z]+;', '', clean_text)

    # remove non-letter characters besides spaces "/", ";" "[", "]" "=", "#"
    clean_text = re.sub(r"[^a-z\s\(\-:\)\\\/\];='#]", '', clean_text)
    clean_text = clean_text.split()

    # remove stop words
    clean_lst = []
    for word in clean_text:
      if word not in stop_words:
        clean_lst.append(word)


    lemmatized_words = []
    for word in clean_lst:
      lemmatized_word = WordNetLemmatizer().lemmatize(word)
      lemmatized_words.append(lemmatized_word)

    return ' '.join(lemmatized_words)

df_clean['content'] = df_clean['content'].apply(lambda x :  clean_content(x))

# delete duplicates
df_clean.drop_duplicates(subset='content', inplace=True)
#df_clean.reset_index(drop=True, inplace=True)

# delete small sentence
df_clean = df_clean.loc[df_clean['content'].apply(lambda x: len(x) >= 3)]

# splitting into tokens, features of the structure of the text used in Twitter
df_clean['content'] = df_clean['content'].apply(TweetTokenizer().tokenize)

# remove punctuation marks
PUNCUATION_LIST = list(string.punctuation)
def remove_punctuation(word_list):
    return [w for w in word_list if w not in PUNCUATION_LIST]
df_clean['content'] = df_clean['content'].apply(remove_punctuation)
df_clean['content'] = df_clean['content'].apply(lambda x: ' '.join(x))
df_clean['sentiment'] = df_clean['sentiment'].replace(['happiness', 'enthusiasm', 'surprise','love'], 'Happy')
df_clean['sentiment'] = df_clean['sentiment'].replace(['boredom','sadness','Sad'], 'sad')
df_clean['sentiment'] = df_clean['sentiment'].replace(['hate','anger'], 'Anger')
df_clean['sentiment'] = df_clean['sentiment'].replace(['relief', 'empty', 'Neutral'], 'neutral')
X_train, X_test, y_train, y_test= train_test_split(df_clean['content'], df_clean['sentiment'], test_size=0.2, random_state=42)
onehot_encoder = OneHotEncoder(sparse = False)
y_train_onehot = onehot_encoder.fit_transform((np.array(df_clean['sentiment'])).reshape(-1, 1))

from sklearn.preprocessing import LabelEncoder

# Create a LabelEncoder instance
label_encoder = LabelEncoder()

# Fit the label encoder and transform the categories into numerical labels
y_label_encoder = label_encoder.fit_transform(df_clean['sentiment'])

In [None]:
#from imblearn.over_sampling import RandomOverSampler
tokenizer_roberta = RobertaTokenizerFast.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')

#ros = RandomOverSampler()
#x_train, y_train = ros.fit_resample(np.array(df_clean['content']).reshape(-1, 1), np.array(df_clean['sentiment']).reshape(-1, 1))
#train_os = pd.DataFrame(list(zip([x[0] for x in x_train], y_train)), columns = ['content', 'sentiment'])
X_train = df_clean['content'].values
y_train = df_clean['sentiment'].values

y_train = OneHotEncoder().fit_transform(np.array(y_train).reshape(-1, 1)).toarray()

token_lens = []

for txt in X_train:
    tokens = tokenizer_roberta.encode(txt, max_length=512, truncation=True)
    token_lens.append(len(tokens))
max_length=np.max(token_lens)

MAX_LEN=128

def tokenize_roberta(data, max_len=MAX_LEN) :
    input_ids = []
    attention_masks = []
    for i in range(len(data)):
        encoded = tokenizer_roberta.encode_plus(
            data[i],
            add_special_tokens=True,
            max_length=max_len,
            padding='max_length',
            return_attention_mask=True
        )
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])
    return np.array(input_ids),np.array(attention_masks)

train_inputs, train_masks = tokenize_roberta(X_train, MAX_LEN)


roberta_model = TFRobertaModel.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
callbacks = [EarlyStopping(monitor='val_categorical_accuracy', patience=5, min_delta=0.01),
             ModelCheckpoint(filepath='best_model.h5', monitor='val_categorical_accuracy', save_best_only=True)]



Some layers from the model checkpoint at cardiffnlp/twitter-roberta-base-emotion were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-emotion.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


In [None]:
def create_model(bert_model, max_len=MAX_LEN):
    inputs = Input(shape=(max_len,), dtype='int32')
    masks = Input(shape=(max_len,), dtype='int32')

    bert_output = bert_model([inputs, masks])[1]

    dense_1 = Dense(128, activation='relu')(bert_output)
    dropout_1 = Dropout(0.5)(dense_1)

    dense_2 = Dense(64, activation='relu')(dropout_1)
    dropout_2 = Dropout(0.5)(dense_2)

    output = Dense(6, activation='softmax')(dropout_2)

    model = Model(inputs=[inputs, masks], outputs=output)

    model.compile(optimizer=Adam(learning_rate=1e-5),
                  loss=CategoricalCrossentropy(),
                  metrics=CategoricalAccuracy())
    return model

model = create_model(roberta_model, MAX_LEN)
history = model.fit([train_inputs, train_masks],y_train,epochs=4,batch_size=32)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
model.save("emotion_classification_Text_model.h5")

# Optionally, you can also save the weights only
model.save_weights("emotion_classification_model_Text_weights.h5")

In [None]:
roberta_model = TFRobertaModel.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
roberta_model.predict([tokenize_roberta(["It's eleven o'clock 😔"])])



array([[0.02356157, 0.04002122, 0.01045423, 0.04828082, 0.55280364,
        0.32487854]], dtype=float32)

In [None]:
y_train[30:45]

array([[1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0.]])

In [None]:
set(df_clean['sentiment'].values)

{'Anger', 'Happy', 'fun', 'neutral', 'sad', 'worry'}

In [None]:
OneHotEncoder().fit(np.array(df_clean['sentiment'].values).reshape(-1, 1)).get_feature_names_out()

array(['x0_Anger', 'x0_Happy', 'x0_fun', 'x0_neutral', 'x0_sad',
       'x0_worry'], dtype=object)

In [None]:
from keras.models import load_model

# Load the entire model
#Text_model = load_model("/content/emotion_classification_Text_model.h5")
Audio_model = load_model("/content/emotion_classification_Audio_model.h5")
Video_model = load_model("/content/emotion_classification_Video_model.h5")

In [None]:
Video_model.predict(np.array([read_video('/content/1001_IEO_SAD_LO.flv')]))



array([[3.1434891e-01, 1.0206088e-01, 4.4348928e-01, 2.7667569e-05,
        2.5584141e-02, 1.1448909e-01]], dtype=float32)

In [None]:
np.argmax(Video_model.predict(np.array([read_video('/content/1001_IEO_SAD_LO.flv')])))



2

In [None]:
text_model = model
text_model.predict([tokenize_roberta(["It's eleven o'clock 😔"])])



array([[0.02356157, 0.04002122, 0.01045423, 0.04828082, 0.55280364,
        0.32487854]], dtype=float32)

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import cv2
import librosa

# Function to extract features from video
def extract_video_features(video_path):
    # Your video feature extraction code here
    # Example: Using OpenCV to extract color histogram features
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        processed_frame = cv2.resize(frame, (224, 224))
        processed_frame = processed_frame / 255.0  # Normalize pixel values

    cap.release()
    return processed_frame

# Function to extract features from audio
def extract_audio_features(file_path, mfcc=True, chroma=True, mel=True):
    # Your audio feature extraction code here
    # Example: Using librosa to extract MFCC features
    audio, sample_rate = librosa.load(file_path)
    result = np.array([])
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13), axis=1)
        result = np.hstack((result, mfccs))
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate), axis=1)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate), axis=1)
        result = np.hstack((result, mel))
    return result

# Load your pre-trained text model (replace with your actual model loading code)
def predict_text_emotion(text):
    # Your text prediction code here
    # Example: Using a simple RandomForestClassifier
    # You should replace this with your actual text classification model
    return text_model.predict([tokenize_roberta([text])])[0]

# Load your pre-trained video model (replace with your actual model loading code)
def predict_video_emotion(video_path):
    # Your video prediction code here
    # Example: Extract video features and use a simple RandomForestClassifier
    video_features = extract_video_features(video_path)
    return Video_model.predict(np.array([video_features]))

# Load your pre-trained audio model (replace with your actual model loading code)
def predict_audio_emotion(audio_path):
    # Your audio prediction code here
    # Example: Extract audio features and use a simple RandomForestClassifier
    audio_features = extract_audio_features(audio_path)
    return Audio_model.predict(np.array([audio_features]))

# Example usage
text_path = "It's eleven o'clock 😔"
video_path = "/content/1064_IEO_SAD_HI.flv"
audio_path = "/content/1064_IEO_SAD_HI.wav"

# Let's assume your text, video, and audio models have predicted the following emotions
text_emotion = predict_text_emotion(text_path)  # Example prediction from the text model
video_emotion = predict_video_emotion(video_path)#[0.1, 0.4, 0.1, 0.1, 0.1, 0.2]  # Example prediction from the video model
audio_emotion = predict_audio_emotion(audio_path)#[0.3, 0.2, 0.2, 0.1, 0.1, 0.1]  # Example prediction from the audio model

# Set custom weights for each modality
text_weight = 0.8
video_weight = 0.2
audio_weight = 0.3

# Apply custom weights to each modality's prediction
weighted_text_emotion = text_weight * np.array(text_emotion)
weighted_video_emotion = video_weight * np.array(video_emotion)
weighted_audio_emotion = audio_weight * np.array(audio_emotion)

# Combine the weighted predictions (you can choose a different method, e.g., averaging)
final_emotion = np.argmax(weighted_text_emotion + weighted_video_emotion + weighted_audio_emotion)

print("Weighted Text Emotion:", np.argmax(weighted_text_emotion))
print("Weighted Video Emotion:", np.argmax(weighted_video_emotion))
print("Weighted Audio Emotion:", np.argmax(weighted_audio_emotion))
print("Final Emotion Class:", final_emotion)


In [None]:
from transformers import TFRobertaModel, RobertaTokenizerFast
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy
from keras.optimizers import Adam
def create_model(bert_model, max_len=128):
    inputs = Input(shape=(max_len,), dtype='int32')
    masks = Input(shape=(max_len,), dtype='int32')

    bert_output = bert_model([inputs, masks])[1]

    dense_1 = Dense(128, activation='relu')(bert_output)
    dropout_1 = Dropout(0.5)(dense_1)

    dense_2 = Dense(64, activation='relu')(dropout_1)
    dropout_2 = Dropout(0.5)(dense_2)

    output = Dense(6, activation='softmax')(dropout_2)

    model = Model(inputs=[inputs, masks], outputs=output)

    model.compile(optimizer=Adam(learning_rate=1e-5),
                  loss=CategoricalCrossentropy(),
                  metrics=CategoricalAccuracy())
    return model

roberta_model = TFRobertaModel.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
tokenizer_roberta = RobertaTokenizerFast.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
model1 = create_model(roberta_model)

Some layers from the model checkpoint at cardiffnlp/twitter-roberta-base-emotion were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-emotion.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


In [None]:
model1.load_weights("textmodelweights.h5")

OSError: ignored

In [1]:
!pip install nlpaug
import nlpaug.augmenter.word as naw

# Create an augmentation pipeline
augmenter = naw.SynonymAug(aug_src='wordnet')
sentdict = {'IEO':"It's eleven o'clock",'TIE':"That is exactly what happened",'IOM':"I'm on my way to the meeting",'IWW':"I wonder what this is about",'TAI':"The airplane is almost full",'MTI':"Maybe tomorrow it will be cold",
            'IWL':"I would like a new alarm clock",'ITH':"I think I have a doctor's appointment",'DFA':"Don't forget a jacket",'ITS':"I think I've seen this before",'TSI':"The surface is slick",'WSI':"We'll stop in a couple of minutes"}
testsentences = []
finalemotion=[]
def create(sentence):
  text = sentence + ' 😠'
  augmented_text1 = augmenter.augment(text)
  augmented_text2 = augmenter.augment(text)
  testsentences.append(text)
  testsentences.append(augmented_text1[0])
  testsentences.append(augmented_text2[0])
  finalemotion.append(0)
  finalemotion.append(0)
  finalemotion.append(0)
  text = sentence + ' 😖'
  augmented_text1 = augmenter.augment(text)
  augmented_text2 = augmenter.augment(text)
  testsentences.append(text)
  testsentences.append(augmented_text1[0])
  testsentences.append(augmented_text2[0])
  finalemotion.append(1)
  finalemotion.append(1)
  finalemotion.append(1)
  text = sentence + ' 😱'
  augmented_text1 = augmenter.augment(text)
  augmented_text2 = augmenter.augment(text)
  testsentences.append(text)
  testsentences.append(augmented_text1[0])
  testsentences.append(augmented_text2[0])
  finalemotion.append(2)
  finalemotion.append(2)
  finalemotion.append(2)
  text = sentence + ' 😊'
  augmented_text1 = augmenter.augment(text)
  augmented_text2 = augmenter.augment(text)
  testsentences.append(text)
  testsentences.append(augmented_text1[0])
  testsentences.append(augmented_text2[0])
  finalemotion.append(3)
  finalemotion.append(3)
  finalemotion.append(3)
  text = sentence + ' 😐'
  augmented_text1 = augmenter.augment(text)
  augmented_text2 = augmenter.augment(text)
  testsentences.append(text)
  testsentences.append(augmented_text1[0])
  testsentences.append(augmented_text2[0])
  finalemotion.append(4)
  finalemotion.append(4)
  finalemotion.append(4)
  text = sentence + ' 😢'
  augmented_text1 = augmenter.augment(text)
  augmented_text2 = augmenter.augment(text)
  testsentences.append(text)
  testsentences.append(augmented_text1[0])
  testsentences.append(augmented_text2[0])
  finalemotion.append(5)
  finalemotion.append(5)
  finalemotion.append(5)
for k in sentdict.keys():
  create(sentdict[k])

import re
import random

augmentedemojitext=[]
emotions=[]

def emoji_augmentation(text):
    # Define a dictionary of emoji replacements
    emoji_replacements = {
        "😠": ["😠", "😡", "😤", "😾"],
        "😖": ["😖", "😣", "😞", "😷"],
        "😱": ["😱", "😨", "😰", "😲"],
        "😊": ["😊", "😄", "😁", "😆"],
        "😐": ["😐", "😑", "😶", "😏"],
        "😢": ["😢", "😭", "😓", "😥"],
        # Add more emojis and their possible replacements
    }

    emoji_dict = {"😠":0,"😖": 1,"😱": 2,"😊": 3,"😐": 4,"😢": 5}

    # Use regular expression to find emojis in the text
    emoji_pattern = re.compile(r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F700-\U0001F77F\U0001F780-\U0001F7FF\U0001F800-\U0001F8FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FA6F\U0001FA70-\U0001FAFF\U00002702-\U000027B0\U000024C2-\U0001F251]+')
    matches = emoji_pattern.findall(text)

    # Perform augmentation by randomly replacing emojis
    for match in matches:
      if match in emoji_replacements:
        for replacement in emoji_replacements[match]:
          augmented_text = text
          augmented_text = augmented_text.replace(match, replacement)
          augmentedemojitext.append(augmented_text)
          emotions.append(emoji_dict[match])

for text in testsentences:
  emoji_augmentation(text)


Collecting nlpaug
  Downloading nlpaug-1.1.11-py3-none-any.whl (410 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.5/410.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: nlpaug
Successfully installed nlpaug-1.1.11


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [2]:
import warnings
warnings.filterwarnings('ignore')

import random
import re
import string

import nltk
from nltk import pos_tag
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from transformers import TFRobertaModel, RobertaTokenizerFast
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline

from keras.models import Model
from keras.layers import Dense, Dropout, Input
from keras.optimizers import Adam
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy
from keras.callbacks import EarlyStopping, ModelCheckpoint

from tabulate import tabulate
X_train1, X_test1, y_train1, y_test1 = train_test_split(augmentedemojitext,emotions, test_size=0.1, random_state=42)

from transformers import TFRobertaModel, RobertaTokenizerFast
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy
from keras.optimizers import Adam
tokenizer_roberta = RobertaTokenizerFast.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')

MAX_LEN=128

def tokenize_roberta(data, max_len=MAX_LEN) :
    input_ids = []
    attention_masks = []
    for i in range(len(data)):
        encoded = tokenizer_roberta.encode_plus(
            data[i],
            add_special_tokens=True,
            max_length=max_len,
            padding='max_length',
            return_attention_mask=True
        )
        input_ids.append(encoded['input_ids'])
        attention_masks.append(encoded['attention_mask'])
    return np.array(input_ids),np.array(attention_masks)

def create_model(bert_model, max_len=MAX_LEN):
    inputs = Input(shape=(max_len,), dtype='int32')
    masks = Input(shape=(max_len,), dtype='int32')

    bert_output = bert_model([inputs, masks])[1]

    dense_1 = Dense(128, activation='relu')(bert_output)
    dropout_1 = Dropout(0.5)(dense_1)

    dense_2 = Dense(64, activation='relu')(dropout_1)
    dropout_2 = Dropout(0.5)(dense_2)

    output = Dense(6, activation='softmax')(dropout_2)

    model = Model(inputs=[inputs, masks], outputs=output)

    model.compile(optimizer=Adam(learning_rate=1e-5),
                  loss=CategoricalCrossentropy(),
                  metrics=CategoricalAccuracy())
    return model

roberta_model = TFRobertaModel.from_pretrained('cardiffnlp/twitter-roberta-base-emotion')
model = create_model(roberta_model, MAX_LEN)


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/768 [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some layers from the model checkpoint at cardiffnlp/twitter-roberta-base-emotion were not used when initializing TFRobertaModel: ['classifier']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-emotion.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


In [3]:
train_inputs, train_masks = tokenize_roberta(X_train1, MAX_LEN)#tokenize_roberta(X_train, MAX_LEN)
history = model.fit([train_inputs, train_masks],  OneHotEncoder().fit_transform(np.array(y_train1).reshape(-1, 1)).toarray(),  epochs=4,  batch_size=32)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [4]:
%%capture
textemo=[]
for s in X_test1:
  textemo.append(np.argmax(model.predict([tokenize_roberta([s])])[0]))

In [5]:
te=0
for i in range(0,len(X_test1)):
  if y_test1[i] == textemo[i]:
    te= te+1
print(te/len(X_test1))

0.7701149425287356


In [6]:
from keras.models import load_model
Audio_model = load_model("/content/emotion_classification_Audio_model_With_Augmentation.h5")
Video_model = load_model("/content/drive/MyDrive/emotion_classification_Video_model.h5")
text_model = model

In [10]:
import os
AudiosData = []
data_path = '/content/Audio/AudioWAV'
for folder in os.listdir(data_path):
    file_path = os.path.join(data_path, folder)
    AudiosData.append(folder)

print(len(AudiosData))

ad, AudiosDataTest = train_test_split(AudiosData, test_size=0.3, random_state=42)
testdata=[]
for data in AudiosDataTest:
  testdata.append(data[:-3])

finalemotion = []
for file in testdata:
  if 'ANG' in file:
    finalemotion.append(0)
  elif 'DIS' in file:
    finalemotion.append(1)
  elif 'FEA' in file:
    finalemotion.append(2)
  elif 'HAP' in file:
    finalemotion.append(3)
  elif 'NEU' in file:
    finalemotion.append(4)
  elif 'SAD' in file:
    finalemotion.append(5)

emoji_replacements = {
        "😠": ["😠", "😡", "😤", "😾"],
        "😖": ["😖", "😣", "😞", "😷"],
        "😱": ["😱", "😨", "😰", "😲"],
        "😊": ["😊", "😄", "😁", "😆"],
        "😐": ["😐", "😑", "😶", "😏"],
        "😢": ["😢", "😭", "😓", "😥"],
        # Add more emojis and their possible replacements
    }
def CreateSentences(file,sentence,testsentences):
  if 'ANG' in file:
    testsentences.append(sentence + random.choice(emoji_replacements['😠']))
  elif 'DIS' in file:
    testsentences.append(sentence + random.choice(emoji_replacements['😖']))
  elif 'FEA' in file:
    testsentences.append(sentence + random.choice(emoji_replacements['😱']))
  elif 'HAP' in file:
    testsentences.append(sentence + random.choice(emoji_replacements['😊']))
  elif 'NEU' in file:
    testsentences.append(sentence + random.choice(emoji_replacements['😐']))
  elif 'SAD' in file:
    testsentences.append(sentence + random.choice(emoji_replacements['😢']))

testsentences=[]
sentdict = {'IEO':"It's eleven o'clock",'TIE':"That is exactly what happened",'IOM':"I'm on my way to the meeting",'IWW':"I wonder what this is about",'TAI':"The airplane is almost full",'MTI':"Maybe tomorrow it will be cold",
            'IWL':"I would like a new alarm clock",'ITH':"I think I have a doctor's appointment",'DFA':"Don't forget a jacket",'ITS':"I think I've seen this before",'TSI':"The surface is slick",'WSI':"We'll stop in a couple of minutes"}
for file in testdata :
  CreateSentences(file,sentdict[file[5:8]],testsentences)


7442


In [26]:
testsentences

['I wonder what this is about😠',
 "Don't forget a jacket😨",
 "I think I've seen this before😶",
 "Don't forget a jacket😱",
 'The surface is slick😑',
 'Maybe tomorrow it will be cold😤',
 'The airplane is almost full😣',
 "Don't forget a jacket😲",
 "It's eleven o'clock😢",
 "I'm on my way to the meeting😾",
 "I think I've seen this before😭",
 "I'm on my way to the meeting😆",
 'The airplane is almost full😑',
 'I would like a new alarm clock😑',
 'I would like a new alarm clock😞',
 'I would like a new alarm clock😣',
 "Don't forget a jacket😡",
 "It's eleven o'clock😣",
 "It's eleven o'clock😞",
 "Don't forget a jacket😶",
 "Don't forget a jacket😡",
 "It's eleven o'clock😷",
 "Don't forget a jacket😑",
 "It's eleven o'clock😣",
 "It's eleven o'clock😁",
 "It's eleven o'clock😓",
 "I think I've seen this before😥",
 'The airplane is almost full😱',
 "I think I have a doctor's appointment😨",
 'That is exactly what happened😣',
 "It's eleven o'clock😰",
 'Maybe tomorrow it will be cold😾',
 'Maybe tomorrow it wi

In [12]:
td = testdata
testdata = td[:1800]
print(len(testdata))
print(len(td))

1800
2233


In [21]:
testdata = td[1800:]
testsentences = testsentences[1800:]
finalemotion = finalemotion[1800:]

In [22]:
import numpy as np
import cv2
import librosa

# Function to extract features from video
def extract_video_features(video_path):
    # Your video feature extraction code here
    # Example: Using OpenCV to extract color histogram features
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        processed_frame = cv2.resize(frame, (224, 224))
        processed_frame = processed_frame / 255.0  # Normalize pixel values

    cap.release()
    return processed_frame

# Function to extract features from audio
def extract_audio_features(file_path, mfcc=True, chroma=True, mel=True):
    # Your audio feature extraction code here
    # Example: Using librosa to extract MFCC features
    audio, sample_rate = librosa.load(file_path)
    result = np.array([])
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13), axis=1)
        result = np.hstack((result, mfccs))
    #if chroma:
    #    chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate), axis=1)
    #    result = np.hstack((result, chroma))
    #if mel:
    #    mel = np.mean(librosa.feature.melspectrogram(y=audio, sr=sample_rate), axis=1)
    #    result = np.hstack((result, mel))
    return result

# Load your pre-trained text model (replace with your actual model loading code)
def predict_text_emotion(text):
    # Your text prediction code here
    # Example: Using a simple RandomForestClassifier
    # You should replace this with your actual text classification model
    return text_model.predict([tokenize_roberta([text])])[0]

# Load your pre-trained video model (replace with your actual model loading code)
def predict_video_emotion(video_path):
    # Your video prediction code here
    # Example: Extract video features and use a simple RandomForestClassifier
    video_features = extract_video_features(video_path)
    return Video_model.predict(np.array([video_features]))

# Load your pre-trained audio model (replace with your actual model loading code)
def predict_audio_emotion(audio_path):
    # Your audio prediction code here
    # Example: Extract audio features and use a simple RandomForestClassifier
    audio_features = extract_audio_features(audio_path)
    return Audio_model.predict(np.array([audio_features]))

textemotion_pred=[]
videoemotion_pred=[]
audioemotion_pred=[]
# Example usage
for i in range(0,len(testdata)):
  text = testsentences[i]
  video_path = '/content/Videos/VideoFlash/' + testdata[i] + 'flv' #"/content/Video/1064_IEO_SAD_HI.flv"
  audio_path = '/content/Audio/AudioWAV/'+ testdata[i] + 'wav' #"/content/Audio/1064_IEO_SAD_HI.wav"

    # Let's assume your text, video, and audio models have predicted the following emotions
  textemotion_pred.append(predict_text_emotion(text))#[0.2, 0.3, 0.1, 0.1, 0.2, 0.1]  # Example prediction from the text model
  videoemotion_pred.append(predict_video_emotion(video_path))#[0.1, 0.4, 0.1, 0.1, 0.1, 0.2]  # Example prediction from the video model
  audioemotion_pred.append(predict_audio_emotion(audio_path))#[0.3, 0.2, 0.2, 0.1, 0.1, 0.1]  # Example prediction from the audio model

textemotions=[]
videoemotions=[]
audioemotions=[]
finalemotions=[]
def predict(tw,vw,aw):
  textemotions.clear()
  videoemotions.clear()
  audioemotions.clear()
  finalemotions.clear()
  for i in range(0,len(testdata)):
    # Set custom weights for each modality
    text_weight = tw
    video_weight = vw
    audio_weight = aw

    # Apply custom weights to each modality's prediction
    weighted_text_emotion = text_weight * np.array(textemotion_pred[i])
    weighted_video_emotion = video_weight * np.array(videoemotion_pred[i])
    weighted_audio_emotion = audio_weight * np.array(audioemotion_pred[i])

    # Combine the weighted predictions (you can choose a different method, e.g., averaging)
    final_emotion = np.argmax(weighted_text_emotion + weighted_video_emotion + weighted_audio_emotion)

    textemotions.append(np.argmax(weighted_text_emotion))
    videoemotions.append(np.argmax(weighted_video_emotion))
    audioemotions.append(np.argmax(weighted_audio_emotion))
    finalemotions.append(final_emotion)



In [24]:
#equal weightage to audio and video
def getAccuracy():
  te=0
  ae=0
  ve=0
  fe=0
  for i in range(0,433):
    if finalemotion[i] == textemotions[i]:
      te= te+1
    if finalemotion[i] == audioemotions[i]:
      ae= ae+1
    if finalemotion[i] == videoemotions[i]:
      ve= ve+1
    if finalemotion[i] == finalemotions[i]:
      fe= fe+1
  print(' final emotion : ' + str(fe/433))
  print(' final emotion : ' + str(te/433))
  print(' final emotion : ' + str(ve/433))
  print(' final emotion : ' + str(ae/433))

In [15]:
predict(1,1,1)
getAccuracy()

 final emotion : 0.9816666666666667
 final emotion : 0.8188888888888889
 final emotion : 0.6227777777777778
 final emotion : 0.9394444444444444


In [18]:
tep=[]
aep=[]
vep=[]
for i in textemotion_pred:
  tep.append(i)
for i in videoemotion_pred:
  vep.append(i[0])
for i in audioemotion_pred:
  aep.append(i[0])

In [19]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Example probabilities for each modality
text_probabilities = tep
audio_probabilities = aep
video_probabilities = vep

# Ground truth labels for the samples (replace with your actual labels)
labels = finalemotion[:1800]

# Combine probabilities into a feature matrix
X = np.concatenate([text_probabilities, audio_probabilities, video_probabilities], axis=1)

# Define a neural network model
model = Sequential()
model.add(Dense(1, input_dim=X.shape[1], activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, np.array(labels), epochs=100, verbose=0)

# Get the learned weights
weights = model.get_weights()[0]

# Display the learned weights
textweight, audioweight, videoweight = weights.flatten()
print("Learned Weights - Text: {:.3f}, Audio: {:.3f}, Video: {:.3f}".format(textweight, audioweight, videoweight))


ValueError: ignored

In [25]:
tw=np.array(weights.flatten()[:6])
aw=np.array(weights.flatten()[6:12])
vw=np.array(weights.flatten()[12:])
predict(tw,vw,aw) # aw =1.8
getAccuracy()

 final emotion : 0.8013856812933026
 final emotion : 0.6697459584295612
 final emotion : 0.6004618937644342
 final emotion : 0.7759815242494227
