In [6]:
from transformers import pipeline
from tqdm import tqdm
import numpy as np
from collections import Counter
import math



from itertools import product
classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True,)

def emotion_scores_function(sample): 
    emotion=classifier(sample)
    return emotion[0]


class BigramLM:
    def __init__(self):
        self.vocab = set()
        self.bigram_counts = {}
        self.unigram_counts = {}
        self.bigram_probs = {}
        self.emotions = ['joy','sadness','love','anger','fear','surprise']
        self.all_bigram_probs = {i:{} for i in self.emotions }
        self.total_prob = {i:0 for i in self.emotions   }

    def scale_bigram_probabilities(self, bigram_probabilities):

      # Find the score corresponding to the given emotion label
      
      total_probability = {}
      for bigram, prob in tqdm( bigram_probabilities.items(), desc="SCALING EMOTIONS"): #IMPLEMNET TQDM
          # Obtain emotional scores for the current bigram
          emotion_scores = emotion_scores_function(" ".join(bigram))

          for score in emotion_scores:
              scaled_probabilities = self.all_bigram_probs[score["label"]]
              scaled_probabilities[bigram] = prob * score["score"]
              self.total_prob[score["label"]]+=(prob * score["score"])
      
      for i in self.emotions:
        scaled_probabilities = self.all_bigram_probs[i]
        for bigram, prob in scaled_probabilities.items():
            scaled_probabilities[bigram] = prob / self.total_prob[score["label"]]


      

    def learn_model(self, corpus,smooth ="laplace"):
        # Implement code to learn bigram model from the dataset
        for sentence in corpus:
            tokens = sentence.split()
            # Add a start token at the beginning of each sentence
            tokens = ['<start>'] + tokens
            self.vocab.update(tokens)
        print(len(self.vocab))
        # Generate all possible bigrams using product
        all_bigrams = list(product(self.vocab, repeat=2))

        # Initialize bigram counts and unigram counts
        self.bigram_counts = {bigram: 0 for bigram in all_bigrams}
        self.unigram_counts = {word: 0 for word in self.vocab}

        # Update bigram and unigram counts
        for sentence in corpus:
            tokens = sentence.split()
            # Add a start token at the beginning of each sentence
            tokens = ['<start>'] + tokens
            for i in range(len(tokens) - 1):
                bigram = (tokens[i], tokens[i + 1])
                self.bigram_counts[bigram] += 1
                self.unigram_counts[tokens[i]] += 1
            self.unigram_counts[tokens[i+1]] += 1
        
        time_modify_dict = {}
        count =500000
        for i,j in self.bigram_counts.items():
            if j !=0:
              time_modify_dict[i]=j
            else:
              if count:
                time_modify_dict[i]=j
                count-=1
        print(len(time_modify_dict),"kkook")
        self.bigram_counts = time_modify_dict
        # Calculate initial bigram probabilities
        if smooth ==None:
          for bigram, count in tqdm(self.bigram_counts.items(), desc="Generating probalities of bigram"):
              previous_word = bigram[0]
              if count == 0:
                  self.bigram_probs[bigram] = 0
              else:
                  self.bigram_probs[bigram] = count / self.unigram_counts[previous_word]
        elif smooth =="laplace":
          for bigram, count in tqdm(self.bigram_counts.items(), desc="Generating probalities of bigram"):
            previous_word = bigram[0]
            self.bigram_probs[bigram] = (count + 1) / (self.unigram_counts[previous_word] + len(self.vocab))
        elif smooth == "kneser-ney":
#             bigram_counts = Counter(zip(corpus, corpus[1:]))
            c_star_1 = sum(1 for count in self.bigram_counts.values() if count == 1)
            c_star_2 = sum(1 for count in self.bigram_counts.values() if count == 2)
            d = c_star_1 / (c_star_1 + 2 * c_star_2)
#             unigram_counts = Counter(corpus)
#             total_bigrams = len(bigram_counts)
#             print(unigram_counts)
            for bigram in tqdm(self.bigram_counts.keys(), desc="Generating probabilities of bigram"):
#                 print(bigram[1],self.unigram_counts[bigram[1]])
                discounted_prob = max(self.bigram_counts.get(bigram, 0) - d, 0) / self.unigram_counts[bigram[1]]
                backoff_prob = sum(1 for key in self.bigram_counts.keys() if key[1] == bigram[1]) / len(self.bigram_counts)
                self.bigram_probs[bigram] = discounted_prob + 0.5 * backoff_prob
        
        
        self.scale_bigram_probabilities(self.bigram_probs)


    def generate_next_word(self, current_word,emotion="fear"):

        if not self.bigram_probs:
            raise ValueError("Model has not been trained yet.")
        bigram_probs = self.all_bigram_probs[emotion]
        possible_next_words = [w2 for w1, w2 in bigram_probs if w1 == current_word and w2 !="<start>"]
        emotional_normlization = []
        
        probabilities = [bigram_probs.get((current_word, w2), 0) for w2 in tqdm(possible_next_words, desc="Choosing the next word")]
        probabilities = np.array(probabilities)

        # Normalize probabilities
        norm = probabilities / sum(probabilities)

        # Choose the next word based on probabilities
        next_word = np.random.choice(possible_next_words, p=norm)

        return next_word


# Example usage:
# Assuming you have a dataset, you can create an instance of BigramLM and train it on the dataset
# For simplicity, let's consider a small dataset:
file_path = 'D:\Downloads\corpus.txt'
corpus=[]
try:
    with open(file_path, 'r') as file:
        for line in file:
            corpus.append( line.strip())
except FileNotFoundError:
    print(f"File '{file_path}' not found.")
except Exception as e:
    print(f"An error occurred: {e}")



bigram_model = BigramLM()
bigram_model.learn_model(corpus,"kneser-ney")

# Generate a sequence of words


  from .autonotebook import tqdm as notebook_tqdm
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [90]:
# import pickle

# with open('kneser-ney_vocab.pkl', 'wb') as file:
#     pickle.dump(bigram_model.vocab, file)

# with open('kneser-ney_bigram_counts.pkl', 'wb') as file:
#     pickle.dump(bigram_model.bigram_counts, file)

# with open('kneser-ney_unigram_counts.pkl', 'wb') as file:
#     pickle.dump(bigram_model.unigram_counts, file)

# with open('kneser-ney_bigram_probs.pkl', 'wb') as file:
#     pickle.dump(bigram_model.bigram_probs, file)

# with open('kneser-ney_model.pkl', 'wb') as file:
#     pickle.dump(bigram_model, file)

# with open('kneser-ney_total_prob.pkl', 'wb') as file:
#     pickle.dump(bigram_model.total_prob, file)

# with open('kneser-ney_all_bigram_probs.pkl', 'wb') as file:
#     pickle.dump(bigram_model.all_bigram_probs, file)


In [8]:
import pickle
from transformers import pipeline
from tqdm import tqdm
import numpy as np
from collections import Counter
import math


# Load vocabulary
with open('kneser-ney_vocab.pkl', 'rb') as file:
    loaded_vocab = pickle.load(file)

# Load bigram counts
with open('kneser-ney_bigram_counts.pkl', 'rb') as file:
    loaded_bigram_counts = pickle.load(file)

# Load unigram counts
with open('kneser-ney_unigram_counts.pkl', 'rb') as file:
    loaded_unigram_counts = pickle.load(file)

# Load bigram probabilities
with open('kneser-ney_bigram_probs.pkl', 'rb') as file:
    loaded_bigram_probs = pickle.load(file)

# Load the entire bigram model
with open('kneser-ney_model.pkl', 'rb') as file:
    bigram_model = pickle.load(file)

# Load total probability
with open('kneser-ney_total_prob.pkl', 'rb') as file:
    loaded_total_prob = pickle.load(file)

# Load all bigram probabilities
with open('kneser-ney_all_bigram_probs.pkl', 'rb') as file:
    loaded_all_bigram_probs = pickle.load(file)


In [26]:
z = []
for i,j in loaded_bigram_probs.items():
    if loaded_bigram_counts[i] and i[0]=='i' and i[1] in ['wanna', 'gain', 'guess', 'once', 'meant']:
        z.append((i,j))
for i in sorted(z,key = lambda x: x[1]):
    print(i)

(('i', 'once'), 0.01605965095305789)
(('i', 'wanna'), 0.04797111687037012)
(('i', 'meant'), 0.04797111687037012)
(('i', 'gain'), 0.09584784950729275)
(('i', 'guess'), 0.8609763059257162)


In [25]:
z = []
for i,j in loaded_bigram_probs.items():
    if loaded_bigram_counts[i]: #and i[0]=='i' and i[1] in ['wanna', 'gain', 'guess', 'once', 'meant', 'cry', 'how', 'left', 'stared', 'rely', 'kept', 'know', 'wake', 'invest', 'watch', 'exceptionally', 'figure', 'dream', 'took', 'confused']:
        z.append((i,j))
for i in sorted(z,key = lambda x: x[1],reverse=True):
    print(i)

(('i', 'm'), 0.9863855208846114)
(('i', 'am'), 0.9426514705041402)
(('a', 'lot'), 0.935500851551415)
(('the', 'rest'), 0.8990279836714317)
(('http', 'www'), 0.8990279836714317)
(('a', 'href'), 0.8877521064074193)
(('i', 've'), 0.8795239211900673)
(('in', 'general'), 0.8653410902362839)
(('i', 'guess'), 0.8609763059257162)
(('a', 'bit'), 0.8274873628268078)
(('the', 'same'), 0.8187350744649923)
(('href', 'http'), 0.8064734359381854)
(('that', 'companion'), 0.7979682567420838)
(('i', 'haven'), 0.7979673033659884)
(('the', 'window'), 0.7979673033659884)
(('i', 'cant'), 0.7890542371403786)
(('i', 'didn'), 0.7885089995249642)
(('a', 'little'), 0.7875310152437279)
(('i', 'don'), 0.7830052241370911)
(('i', 'didnt'), 0.777294678864062)
(('my', 'stomach'), 0.7740289370475271)
(('in', 'order'), 0.7740289370475271)
(('i', 'dont'), 0.745692216221874)
(('as', 'possible'), 0.7417345541467495)
(('my', 'husband'), 0.7417345541467495)
(('my', 'lungs'), 0.7305944698717883)
(('the', 'beach'), 0.730594469

In [22]:
z = []

for em,tu in loaded_all_bigram_probs.items():
    count =15
    print(em)
    for a in tu:
        if count==0:
            break
        if a[0]=='i':
            print(a,": ", loaded_all_bigram_probs[em][a])
            count-=1
        
    # print(loaded_all_bigram_probs['joy'][('i','am')])

joy
('i', 'wanna') :  0.0018977973477367952
('i', 'gain') :  0.0037343438921421026
('i', 'guess') :  0.018145297783523896
('i', 'once') :  2.534920334249538e-05
('i', 'meant') :  8.196837677009532e-05
('i', 'cry') :  0.00048503403802177585
('i', 'how') :  2.5123894011481706e-05
('i', 'left') :  7.083717674728736e-05
('i', 'stared') :  2.9925451434707462e-05
('i', 'rely') :  0.0016091087416302992
('i', 'kept') :  0.0003459119279067267
('i', 'know') :  0.01845243162379497
('i', 'wake') :  0.01245271738564996
('i', 'invest') :  0.004433996166186047
('i', 'watch') :  0.007094613621896652
sadness
('i', 'wanna') :  6.186434269885351e-05
('i', 'gain') :  0.00014943252030826724
('i', 'guess') :  0.003592226095556484
('i', 'once') :  9.744542225456645e-06
('i', 'meant') :  3.074249403437763e-05
('i', 'cry') :  0.002824774804610345
('i', 'how') :  5.9701475862340634e-06
('i', 'left') :  0.0027630187744820483
('i', 'stared') :  3.917543684187345e-05
('i', 'rely') :  0.00020192884682400383
('i', '

In [13]:
def generate_word_sequences(bigram_model, emotion, num_sequences=1, max_length=50):
    all_sequences = []

    for _ in range(num_sequences):
        current_word = "<start>"
        generated_sequence = []

        for _ in range(max_length):
            try:
                if current_word!= "<start>":
                    generated_sequence.append(current_word)
                current_word = bigram_model.generate_next_word(current_word, emotion)
            
            except Exception as e:
                current_word = bigram_model.generate_next_word("<start>", emotion)

        all_sequences.append(' '.join(generated_sequence)[1:])

    return all_sequences

# Example usage:
emotion_input = 'surprise'
num_sequences = 1
generated_sequences = generate_word_sequences(bigram_model, emotion_input, num_sequences,20)
for i, sequence in enumerate(generated_sequences, start=1):
    print(f"Sequence {i}: {sequence}")


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 38/38 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 54/54 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 45/45 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 28/28 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 21/21 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 6/6 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Sequence 1: ve dated because she feels amazing moments and stunned liv was amazingly chill that surprises tv program somehow include





In [14]:
def generate_word_sequences(bigram_model, emotion, num_sequences=1, max_length=50):
    all_sequences = []

    for _ in range(num_sequences):
        current_word = "<start>"
        generated_sequence = []

        for _ in range(max_length):
            try:
                if current_word != "<start>":
                    generated_sequence.append(current_word)
                current_word = bigram_model.generate_next_word(current_word, emotion)

            except Exception as e:
                current_word = bigram_model.generate_next_word("<start>", emotion)

        all_sequences.append(' '.join(generated_sequence)[1:])

    return all_sequences


# Example usage:
emotions = ['joy', 'sadness', 'love', 'anger', 'fear', 'surprise']
num_sequences_per_emotion = 50
generated_corpus = []
generated_labels = []

for emotion in emotions:
    generated_sequences = generate_word_sequences(bigram_model, emotion, num_sequences_per_emotion, 12)
    generated_corpus.extend(generated_sequences)
    generated_labels.extend([emotion] * num_sequences_per_emotion)

# Print or use the generated corpus and labels as needed
for i, (sequence, label) in enumerate(zip(generated_corpus, generated_labels), start=1):
    print(f"Sequence {i}: {sequence} - Emotion: {label}")


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 21/21 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 7/7 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 3/3 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 673/673 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 34/34 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Sequence 1: d gotten married and excitement if ahead of bliss once hows - Emotion: joy
Sequence 2: m feeling grew up wed have supported which aahhh work tomorrow - Emotion: joy
Sequence 3:  bet taylor swift basks in return id hoped it reminds - Emotion: joy
Sequence 4: m having baptized anyone feels really attending during our traditional holidays - Emotion: joy
Sequence 5: ccured while balancing out historical past grandparents i recommend using stellarium - Emotion: joy
Sequence 6: d recommend using stellarium to belong and ethical views lol i - Emotion: joy
Sequence 7: ve gotten married i deal are prepared early days straight home - Emotion: joy
Sequence 8: ve found herself ready to be loaded so honored that arrived - Emotion: joy
Sequence 9: ccured while balancing out historical past event where papamoka shows up - Emotion: joy
Sequence 10: d hoped it won t habitual both internally and taiwan depending - Emotion: joy
Sequence 11: uring our planet and joy surrender to learn that are




In [15]:
import os
# Example usage:
emotions = ['joy', 'sadness', 'love', 'anger', 'fear', 'surprise']
# Create a directory to store generated samples
output_directory = "generated_samples_k"
os.makedirs(output_directory, exist_ok=True)

def generate_and_save_sequences(bigram_model, emotion, num_sequences=50, max_length=20):
    generated_sequences = generate_word_sequences(bigram_model, emotion, num_sequences, max_length)
    
    # Save the generated sequences to a file
    output_filename = f"{output_directory}/gen_{emotion.lower()}.txt"
    with open(output_filename, 'w') as file:
        for sequence in generated_sequences:
            file.write(f"{sequence}\n")

    return output_filename

# Example usage:
for emotion in emotions:
    generated_file = generate_and_save_sequences(bigram_model, emotion, num_sequences_per_emotion, 30)
    print(f"Generated {num_sequences_per_emotion} samples for {emotion}: {generated_file}")


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 137/137 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 129/129 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 145/145 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 11/11 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 9/9 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for joy: generated_samples_k/gen_joy.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 137/137 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 6/6 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 673/673 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 268/268 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for sadness: generated_samples_k/gen_sadness.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 440/440 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 167/167 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 10/10 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for love: generated_samples_k/gen_love.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1996.34it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 105/105 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 19/19 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 7/7 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for anger: generated_samples_k/gen_anger.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 19/19 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 38/38 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 9/9 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 125/125 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 9/9 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for fear: generated_samples_k/gen_fear.txt


Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 16/16 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 19/19 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 388/388 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]
Choosing the next word: 100%|██████████████████████████████████████████████████████████████████| 34/34 [00:00<?, ?it/s]
Choosing the next word: 100%|████████████████████████████████████████████████████████████████| 137/137 [00:00<?, ?it/s]
Choosing the next word: 100%|███████████

Generated 50 samples for surprise: generated_samples_k/gen_surprise.txt





In [17]:
emotions = ['joy', 'sadness', 'love', 'anger', 'fear', 'surprise']
# output_directory = "generated_samples_SAVE"
def read_generated_sequences_and_labels(emotion):
    input_filename = f"{output_directory}/gen_{emotion.lower()}.txt"
    generated_sequences = []
    labels = []  # Assign the emotion label to all sequences

    try:
        with open(input_filename, 'r') as file:
            for line in file:
                generated_sequences.append(line.strip())
                labels.append(emotion)
    except FileNotFoundError:
        print(f"File '{input_filename}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

    return generated_sequences, labels

# Example usage:
all_generated_sequences = []
all_generated_labels = []

for emotion in emotions:
    generated_sequences, labels = read_generated_sequences_and_labels(emotion)
    all_generated_sequences.extend(generated_sequences)
    all_generated_labels.extend(labels)


In [10]:
# with open(r"D:\Downloads\tfidf_svc_model.pkl", 'rb') as file:
#     loaded_label_encoder, loaded_tfidf_vectorizer, loaded_svc = pickle.load(file)

In [18]:
from sklearn.metrics import accuracy_score, f1_score, classification_report


# Assuming you have loaded the components using pickle
# with open('model.pkl', 'rb') as file:
#     loaded_label_encoder, loaded_tfidf_vectorizer, loaded_svc, loaded_grid_search = pickle.load(file)

# Sample testing data
test_corpus = all_generated_sequences
test_labels = all_generated_labels

# Transform labels using the loaded label encoder
encoded_test_labels = loaded_label_encoder.transform(test_labels)

# Use the loaded TF-IDF vectorizer and SVC model for prediction
X_test = loaded_tfidf_vectorizer.transform(test_corpus)
y_pred = loaded_svc.predict(X_test)
# print(y_pred)
# Decode the predicted labels back to original labels
# decoded_pred_labels = loaded_label_encoder.inverse_transform(y_pred)
# print(decoded_pred_labels)
# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred)
print("Accuracy:", accuracy)

# Calculate F1 score
f1 = f1_score(test_labels, y_pred, average='macro')
print("F1 Score:", f1)

# Generate classification report
report = classification_report(test_labels, y_pred, target_names=loaded_label_encoder.classes_)
print("Classification Report:")
print(report)

Accuracy: 0.6633333333333333
F1 Score: 0.6582937316596253
Classification Report:
              precision    recall  f1-score   support

       anger       0.60      0.42      0.49        50
        fear       0.61      0.62      0.61        50
         joy       0.52      0.50      0.51        50
        love       0.72      0.88      0.79        50
     sadness       0.61      0.76      0.68        50
    surprise       0.93      0.80      0.86        50

    accuracy                           0.66       300
   macro avg       0.67      0.66      0.66       300
weighted avg       0.67      0.66      0.66       300

