In [5]:
from transformers import pipeline
from tqdm import tqdm
import numpy as np
from itertools import product
classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True,)

def emotion_scores_function(sample):
    emotion=classifier(sample)
    return emotion[0]


class BigramLM:
    def __init__(self):
        self.vocab = set()
        self.bigram_counts = {}
        self.unigram_counts = {}
        self.bigram_probs = {}
        self.emotions = ['joy','sadness','love','anger','fear','surprise']
        self.all_bigram_probs = {i:{} for i in self.emotions }
        self.total_prob = {i:0 for i in self.emotions   }

    def scale_bigram_probabilities(self, bigram_probabilities):

      # Find the score corresponding to the given emotion label

      total_probability = {}
      for bigram, prob in tqdm( bigram_probabilities.items(), desc="SCALING EMOTIONS"): #IMPLEMNET TQDM
          # Obtain emotional scores for the current bigram
          emotion_scores = emotion_scores_function(" ".join(bigram))

          for score in emotion_scores:
              scaled_probabilities = self.all_bigram_probs[score["label"]]
              scaled_probabilities[bigram] = prob * score["score"]
              self.total_prob[score["label"]]+=(prob * score["score"])

      for i in self.emotions:
        scaled_probabilities = self.all_bigram_probs[i]
        for bigram, prob in scaled_probabilities.items():
            scaled_probabilities[bigram] = prob / self.total_prob[score["label"]]




    def learn_model(self, corpus,smooth ="laplace"):
        # Implement code to learn bigram model from the dataset
        for sentence in corpus:
            tokens = sentence.split()
            # Add a start token at the beginning of each sentence
            tokens = ['<start>'] + tokens
            self.vocab.update(tokens)
        print(len(self.vocab))
        # Generate all possible bigrams using product
        all_bigrams = list(product(self.vocab, repeat=2))

        # Initialize bigram counts and unigram counts
        self.bigram_counts = {bigram: 0 for bigram in all_bigrams}
        self.unigram_counts = {word: 0 for word in self.vocab}

        # Update bigram and unigram counts
        for sentence in corpus:
            tokens = sentence.split()
            # Add a start token at the beginning of each sentence
            tokens = ['<start>'] + tokens
            for i in range(len(tokens) - 1):
                bigram = (tokens[i], tokens[i + 1])
                self.bigram_counts[bigram] += 1
                self.unigram_counts[tokens[i]] += 1

        time_modify_dict = {}
        count =500000
        for i,j in self.bigram_counts.items():
            if j !=0:
              time_modify_dict[i]=j
            else:
              if count:
                time_modify_dict[i]=j
                count-=1
        print(len(time_modify_dict),"kkook")
        self.bigram_counts = time_modify_dict
        # Calculate initial bigram probabilities
        if smooth ==None:
          for bigram, count in tqdm(self.bigram_counts.items(), desc="Generating probalities of bigram"):
              previous_word = bigram[0]
              if count == 0:
                  self.bigram_probs[bigram] = 0
              else:
                  self.bigram_probs[bigram] = count / self.unigram_counts[previous_word]
        elif smooth =="laplace":
          for bigram, count in tqdm(self.bigram_counts.items(), desc="Generating probalities of bigram"):
            previous_word = bigram[0]
            self.bigram_probs[bigram] = (count + 1) / (self.unigram_counts[previous_word] + len(self.vocab))
        elif smooth == "kneser-ney":
            bigram_counts = Counter(zip(corpus, corpus[1:]))
            c_star_1 = sum(1 for count in bigram_counts.values() if count == 1)
            c_star_2 = sum(1 for count in bigram_counts.values() if count == 2)
            d = c_star_1 / (c_star_1 + 2 * c_star_2)
            unigram_counts = Counter(corpus)
            total_bigrams = len(bigram_counts)

            for bigram in tqdm(self.bigram_counts.keys(), desc="Generating probabilities of bigram"):
                discounted_prob = max(bigram_counts.get(bigram, 0) - d, 0) / unigram_counts[bigram[1]]
                backoff_prob = sum(1 for key in bigram_counts.keys() if key[1] == bigram[1]) / total_bigrams
                self.bigram_probs[bigram] = discounted_prob + 0.5 * backoff_prob


        self.scale_bigram_probabilities(self.bigram_probs)


    def generate_next_word(self, current_word,emotion="fear"):

        if not self.bigram_probs:
            raise ValueError("Model has not been trained yet.")
        bigram_probs = self.all_bigram_probs[emotion]
        possible_next_words = [w2 for w1, w2 in bigram_probs if w1 == current_word and w2 !="<start>"]
        emotional_normlization = []

        probabilities = [bigram_probs.get((current_word, w2), 0) for w2 in tqdm(possible_next_words, desc="Choosing the next word")]
        probabilities = np.array(probabilities)

        # Normalize probabilities
        norm = probabilities / sum(probabilities)

        # Choose the next word based on probabilities
        next_word = np.random.choice(possible_next_words, p=norm)

        return next_word


# Example usage:
# Assuming you have a dataset, you can create an instance of BigramLM and train it on the dataset
# For simplicity, let's consider a small dataset:
file_path = 'D:\Downloads\corpus.txt'
corpus=[]
try:
    with open(file_path, 'r') as file:
        for line in file:
            corpus.append( line.strip())
except FileNotFoundError:
    print(f"File '{file_path}' not found.")
except Exception as e:
    print(f"An error occurred: {e}")



bigram_model = BigramLM()
bigram_model.learn_model(corpus,"kneser-ney")

# Generate a sequence of words


Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [None]:
# import pickle

# with open('vocab.pkl', 'wb') as file:
#     pickle.dump(bigram_model.vocab, file)

# with open('bigram_counts.pkl', 'wb') as file:
#     pickle.dump(bigram_model.bigram_counts, file)

# with open('unigram_counts.pkl', 'wb') as file:
#     pickle.dump(bigram_model.unigram_counts, file)

# with open('bigram_probs.pkl', 'wb') as file:
#     pickle.dump(bigram_model.bigram_probs, file)

# with open('model.pkl', 'wb') as file:
#     pickle.dump(bigram_model, file)

# with open('total_prob.pkl', 'wb') as file:
#     pickle.dump(bigram_model.total_prob, file)

# with open('all_bigram_probs.pkl', 'wb') as file:
#     pickle.dump(bigram_model.all_bigram_probs, file)


In [9]:
import pickle
from transformers import pipeline
from tqdm import tqdm
import numpy as np
from itertools import product
# Load vocabulary
with open(r'C:\Users\Manvendra Nema\Notebooks\Submission_NLP_A1\vocab.pkl', 'rb') as file:
    loaded_vocab = pickle.load(file)

# Load bigram counts
with open(r'C:\Users\Manvendra Nema\Notebooks\Submission_NLP_A1\bigram_counts.pkl', 'rb') as file:
    loaded_bigram_counts = pickle.load(file)

# Load unigram counts
with open(r'C:\Users\Manvendra Nema\Notebooks\Submission_NLP_A1\unigram_counts.pkl', 'rb') as file:
    loaded_unigram_counts = pickle.load(file)

# Load bigram probabilities
with open(r'C:\Users\Manvendra Nema\Notebooks\Submission_NLP_A1\bigram_probs.pkl', 'rb') as file:
    loaded_bigram_probs = pickle.load(file)

# Load the entire bigram model
with open(r'C:\Users\Manvendra Nema\Notebooks\Submission_NLP_A1\model.pkl', 'rb') as file:
    bigram_model = pickle.load(file)

# Load total probability
with open(r'C:\Users\Manvendra Nema\Notebooks\Submission_NLP_A1\total_prob.pkl', 'rb') as file:
    loaded_total_prob = pickle.load(file)

# Load all bigram probabilities
with open(r'C:\Users\Manvendra Nema\Notebooks\Submission_NLP_A1\all_bigram_probs.pkl', 'rb') as file:
    loaded_all_bigram_probs = pickle.load(file)


In [69]:
z = []
for i,j in loaded_bigram_probs.items():
    if loaded_bigram_counts[i] and i[0]=='i' and i[1] in ['wanna', 'gain', 'guess', 'once', 'meant']:
        z.append((i,j))
for i in sorted(z,key = lambda x: x[1]):
    print(i)

(('i', 'wanna'), 0.00021706099413935315)
(('i', 'gain'), 0.00021706099413935315)
(('i', 'once'), 0.00021706099413935315)
(('i', 'meant'), 0.00021706099413935315)
(('i', 'guess'), 0.0014108964619057955)


In [68]:
z = []
for i,j in loaded_bigram_probs.items():
    if loaded_bigram_counts[i]: #and i[0]=='i' and i[1] in ['wanna', 'gain', 'guess', 'once', 'meant', 'cry', 'how', 'left', 'stared', 'rely', 'kept', 'know', 'wake', 'invest', 'watch', 'exceptionally', 'figure', 'dream', 'took', 'confused']:
        z.append((i,j))
for i in sorted(z,key = lambda x: x[1],reverse=True):
    print(i)

(('<start>', 'i'), 0.2693486590038314)
(('i', 'feel'), 0.11048404601693075)
(('feel', 'like'), 0.035092684307343996)
(('i', 'am'), 0.031907966138484914)
(('<start>', 'im'), 0.027203065134099615)
(('that', 'i'), 0.026552944962986803)
(('and', 'i'), 0.02311046511627907)
(('im', 'feeling'), 0.022450728363324766)
(('i', 'was'), 0.02192316040807467)
(('to', 'be'), 0.018633540372670808)
(('when', 'i'), 0.01838105337575115)
(('of', 'the'), 0.01829663212435233)
(('but', 'i'), 0.018002423403150423)
(('in', 'the'), 0.017860151642796966)
(('to', 'feel'), 0.017006802721088437)
(('feel', 'so'), 0.015706806282722512)
(('i', 'have'), 0.01562839157803343)
(('like', 'i'), 0.015449915110356536)
(('am', 'feeling'), 0.014821272885789015)
(('want', 'to'), 0.014275388507408745)
(('a', 'little'), 0.014184397163120567)
(('feel', 'that'), 0.01146172350360832)
(('because', 'i'), 0.011402102262604668)
(('at', 'the'), 0.011291460832745237)
(('feeling', 'a'), 0.011259449895447965)
(('don', 't'), 0.0107487702678083

In [65]:
z = []

for em,tu in loaded_all_bigram_probs.items():
    count =15
    print(em)
    for a in tu:
        if count==0:
            break
        if a[0]=='i':
            print(a,": ", loaded_all_bigram_probs[em][a])
            count-=1
        
    # print(loaded_all_bigram_probs['joy'][('i','am')])

joy
('i', 'wanna') :  7.729681491808414e-05
('i', 'gain') :  7.612433806923575e-05
('i', 'guess') :  0.00026765653411676293
('i', 'once') :  3.0840386109186997e-06
('i', 'meant') :  3.3385516403475177e-06
('i', 'cry') :  5.962919619667323e-06
('i', 'how') :  2.6286877569657136e-05
('i', 'left') :  3.3651364254026753e-06
('i', 'stared') :  3.051598102296229e-07
('i', 'rely') :  6.55399021846609e-05
('i', 'kept') :  3.705188181427376e-06
('i', 'know') :  0.0026085764874219045
('i', 'wake') :  0.00015538525806827366
('i', 'invest') :  9.038670055607125e-05
('i', 'watch') :  0.00010465531834702186
sadness
('i', 'wanna') :  2.5197193226794485e-06
('i', 'gain') :  3.0461714354751783e-06
('i', 'guess') :  5.298798608714352e-05
('i', 'once') :  1.1855419700174903e-06
('i', 'meant') :  1.252134151377875e-06
('i', 'cry') :  3.472726403336819e-05
('i', 'how') :  6.246505362600175e-06
('i', 'left') :  0.00013125784438376925
('i', 'stared') :  3.9948499685667466e-07
('i', 'rely') :  8.2246752669418

In [10]:
def generate_word_sequences(bigram_model, emotion, num_sequences=1, max_length=50):
    all_sequences = []

    for _ in range(num_sequences):
        current_word = "<start>"
        generated_sequence = []

        for _ in range(max_length):
            try:
                if current_word!= "<start>":
                    generated_sequence.append(current_word)
                current_word = bigram_model.generate_next_word(current_word, emotion)

            except Exception as e:
                current_word = bigram_model.generate_next_word("<start>", emotion)

        all_sequences.append(' '.join(generated_sequence)[1:])

    return all_sequences

# Example usage:
emotion_input = 'surprise'
num_sequences = 1
generated_sequences = generate_word_sequences(bigram_model, emotion_input, num_sequences,20)
for i, sequence in enumerate(generated_sequences, start=1):
    print(f"Sequence {i}: {sequence}")


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████| 440/440 [00:00<00:00, 110257.72it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 6/6 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 34/34 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 10/10 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 673/673 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 324/324 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 8/8 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Sequence 1:  wonder if class and of dazed a diy project if the weird ones who would love seeing something





In [None]:
def generate_word_sequences(bigram_model, emotion, num_sequences=1, max_length=50):
    all_sequences = []

    for _ in range(num_sequences):
        current_word = "<start>"
        generated_sequence = []

        for _ in range(max_length):
            try:
                if current_word != "<start>":
                    generated_sequence.append(current_word)
                current_word = bigram_model.generate_next_word(current_word, emotion)

            except Exception as e:
                current_word = bigram_model.generate_next_word("<start>", emotion)

        all_sequences.append(' '.join(generated_sequence)[1:])

    return all_sequences


# Example usage:
emotions = ['joy', 'sadness', 'love', 'anger', 'fear', 'surprise']
num_sequences_per_emotion = 50
generated_corpus = []
generated_labels = []

for emotion in emotions:
    generated_sequences = generate_word_sequences(bigram_model, emotion, num_sequences_per_emotion, 25)
    generated_corpus.extend(generated_sequences)
    generated_labels.extend([emotion] * num_sequences_per_emotion)

# Print or use the generated corpus and labels as needed
for i, (sequence, label) in enumerate(zip(generated_corpus, generated_labels), start=1):
    print(f"Sequence {i}: {sequence} - Emotion: {label}")


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 137/137 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1000.31it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████| 433/433 [00:00<00:00, 433031.39it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 21/21 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 388/388 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 433/433 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Sequence 1: m destined to keep my ability to be thankful for people ought to know what may feel let me feeling gracious and pretend to - Emotion: joy
Sequence 2:  can be perfect for some cute hairstyle starts with me feeling not enter into a big city solo for more than inspired because - Emotion: joy
Sequence 3:  sort of students blogs were always feel a little bit too and am beginning to all time can actually prefer peep toe shoes - Emotion: joy
Sequence 4:  enjoy feeling fabulous future career choices friends online community around feeling convinced that were ok and extremely passionate about medical and comfortable in - Emotion: joy
Sequence 5:  feel rather artistic and happy and join in this since its so privileged to learn from today is being very small for the - Emotion: joy
Sequence 6: m discovering pretty carefree and obtain the logic explain to offer as well i work tomorrow and feel so glad to have found myself - Emotion: joy
Sequence 7:  could look into account this patrioti




In [17]:
import os
# Example usage:
emotions = ['joy', 'sadness', 'love', 'anger', 'fear', 'surprise']
# Create a directory to store generated samples
output_directory = "generated_samples"
os.makedirs(output_directory, exist_ok=True)

def generate_and_save_sequences(bigram_model, emotion, num_sequences=50, max_length=20):
    generated_sequences = generate_word_sequences(bigram_model, emotion, num_sequences, max_length)

    # Save the generated sequences to a file
    output_filename = f"{output_directory}/gen_{emotion.lower()}.txt"
    with open(output_filename, 'w') as file:
        for sequence in generated_sequences:
            file.write(f"{sequence}\n")

    return output_filename

# Example usage:
num_sequences_per_emotion = 50
for emotion in emotions:
    generated_file = generate_and_save_sequences(bigram_model, emotion, num_sequences_per_emotion, 25)
    print(f"Generated {num_sequences_per_emotion} samples for {emotion}: {generated_file}")


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 440/440 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 53/53 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 15/15 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 105/105 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for joy: generated_samples/gen_joy.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████| 440/440 [00:00<00:00, 110535.08it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 130/130 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 11/11 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 673/673 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 145/145 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for sadness: generated_samples/gen_sadness.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 440/440 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████████████████████████████████████████████████| 40/40 [00:00<00:00, 38418.17it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 440/440 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 40/40 [00:00<?, ?it/s]
Choosing the next word: 100%|█████████████████████████████████████████████████| 5429/5429 [00:00<00:00, 1472128.03it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 109/109 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for love: generated_samples/gen_love.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 440/440 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 5/5 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 433/433 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 8/8 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 71/71 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 6/6 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 433/433 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for anger: generated_samples/gen_anger.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 440/440 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 4/4 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 268/268 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 43/43 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 10/10 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 162/162 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 737/737 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for fear: generated_samples/gen_fear.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 440/440 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 9/9 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 87/87 [00:00<?, ?it/s]
Choosing the next word: 0it [00:00, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 440/440 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 12/12 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 6/6 [00:00<?, ?it/

Generated 50 samples for surprise: generated_samples/gen_surprise.txt





In [19]:
emotions = ['joy', 'sadness', 'love', 'anger', 'fear', 'surprise']
def read_generated_sequences_and_labels(emotion):
    input_filename = f"{output_directory}/gen_{emotion.lower()}.txt"
    generated_sequences = []
    labels = []  # Assign the emotion label to all sequences

    try:
        with open(input_filename, 'r') as file:
            for line in file:
                generated_sequences.append(line.strip())
                labels.append(emotion)
    except FileNotFoundError:
        print(f"File '{input_filename}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

    return generated_sequences, labels

# Example usage:
all_generated_sequences = []
all_generated_labels = []

for emotion in emotions:
    generated_sequences, labels = read_generated_sequences_and_labels(emotion)
    all_generated_sequences.extend(generated_sequences)
    all_generated_labels.extend(labels)


300

In [15]:
# with open(r"D:\Downloads\tfidf_svc_model.pkl", 'rb') as file:
#     loaded_label_encoder, loaded_tfidf_vectorizer, loaded_svc = pickle.load(file)

In [25]:
from sklearn.metrics import accuracy_score, f1_score, classification_report


# Assuming you have loaded the components using pickle
# with open('model.pkl', 'rb') as file:
#     loaded_label_encoder, loaded_tfidf_vectorizer, loaded_svc, loaded_grid_search = pickle.load(file)

# Sample testing data
test_corpus = all_generated_sequences
test_labels = all_generated_labels

# Transform labels using the loaded label encoder
encoded_test_labels = loaded_label_encoder.transform(test_labels)

# Use the loaded TF-IDF vectorizer and SVC model for prediction
X_test = loaded_tfidf_vectorizer.transform(test_corpus)
y_pred = loaded_svc.predict(X_test)
# print(y_pred)
# Decode the predicted labels back to original labels
# decoded_pred_labels = loaded_label_encoder.inverse_transform(y_pred)
# print(decoded_pred_labels)
# Calculate accuracy
ccuracy = accuracy_score(test_labels, y_pred)
print("Accuracy:", accuracy)

# Calculate F1 score
f1 = f1_score(test_labels, y_pred, average='macro')
print("F1 Score:", f1)

# Generate classification report
report = classification_report(test_labels, y_pred, target_names=loaded_label_encoder.classes_)
print("Classification Report:")
print(report)

Accuracy: 0.7733333333333333
F1 Score: 0.7618090454034839
Classification Report:
              precision    recall  f1-score   support

       anger       0.81      0.44      0.57        50
        fear       0.70      0.70      0.70        50
         joy       0.73      0.74      0.73        50
        love       0.81      1.00      0.89        50
     sadness       0.75      0.80      0.78        50
    surprise       0.84      0.96      0.90        50

    accuracy                           0.77       300
   macro avg       0.77      0.77      0.76       300
weighted avg       0.77      0.77      0.76       300

