In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
path = 'model_output.h5'
my_reloaded_model = tf.keras.models.load_model(
       (path),
       custom_objects={'KerasLayer':hub.KerasLayer}
)
result = ""
def predict_res(sentence):
        reviews = [sentence]
        result = my_reloaded_model.predict(reviews)
        return result
#print(predict_res("not a sarcasm"))



In [6]:
#working with text input and dark mode
import tkinter as tk
from tkinter import filedialog, ttk
import moviepy.editor as mp
import simpleaudio as sa
from transformers import pipeline
from PIL import Image, ImageTk
import time
import os
import tempfile

class VideoTranscriberApp:
    def __init__(self, master):
        self.master = master
        self.master.title("Emotion and Sarcasm Detection")

        # Set dark mode colors
        background_color = "#1E1E1E"  # Dark gray background
        button_color = "#3E3E3E"  # Darker gray for buttons
        text_color = "#FFFFFF"  # White text

        self.master.configure(bg=background_color)

        # Button row
        self.button_row = tk.Frame(master, bg=background_color)
        self.button_row.pack()



        # Select video, audio, and transcribe buttons
        self.select_video_button = tk.Button(self.button_row, text="Select Video", command=self.select_video, bg=button_color, fg=text_color)
        self.select_video_button.pack(side=tk.LEFT, padx=5)

        self.select_audio_button = tk.Button(self.button_row, text="Select Audio", command=self.select_audio, bg=button_color, fg=text_color)
        self.select_audio_button.pack(side=tk.LEFT, padx=5)

        self.transcribe_button = tk.Button(self.button_row, text="Predict", command=self.transcribe, bg=button_color, fg=text_color)
        self.transcribe_button.pack(side=tk.LEFT, padx=5)

        self.function_var = tk.StringVar()
        self.function_var.set("Models")

        # Use a themed Combobox for better appearance
        self.function_dropdown = ttk.Combobox(master, textvariable=self.function_var, values=["DistilBert", "Bert"])
        self.function_dropdown.pack(padx=5, pady=10)
        self.function_dropdown.configure(style="Dark.TCombobox")

        # Style for the Combobox
        style = ttk.Style()
        style.theme_use('clam')  # Choose an available theme

        style.configure("TCombobox", fieldbackground=button_color, foreground=text_color)
        style.map("TCombobox", fieldbackground=[('readonly', button_color)])

        # Labels and other widgets
        self.file_path_label = tk.Label(master, text="Selected File:", bg=background_color, fg=text_color)
        self.file_path_label.pack()

        self.transcription_label = tk.Label(master, text="Transcription:", width=60, height=10, wraplength=400, bg=background_color, fg=text_color)
        self.transcription_label.pack(padx=10, pady=10)

        # User text input box
        self.user_text_entry = tk.Entry(master, width=60, bg=button_color, fg=text_color)
        self.user_text_entry.pack(pady=10, padx=10)

        self.play_button = tk.Button(master, text="Play File", command=self.load_file, bg=button_color, fg=text_color)
        self.play_button.pack(padx=10, pady=10)

        # Transcribe text button
        self.transcribe_text_button = tk.Button(self.button_row, text="Predict(TEXT)", command=self.transcribe_text, bg=button_color, fg=text_color)
        self.transcribe_text_button.pack(pady=10, padx=5)
        
        # Video player
        self.video_player = ttk.Frame(master, style="Dark.TFrame")
        self.video_player.pack()
        self.canvas = tk.Canvas(self.video_player, width=640, height=480, bg=background_color)
        self.canvas.pack()

        # Function execution messages
        self.function_message_label = tk.Label(master, text="", font=("Helvetica", 12), bg=background_color, fg=text_color)
        self.function_message_label.pack(pady=10)

        # File path and type
        self.file_path = None
        self.file_type = None



    def select_video(self):
        file_path = filedialog.askopenfilename(filetypes=[("Video Files", "*.mp4;*.avi")])
        self.file_path_label.config(text="Selected File: " + file_path)
        self.file_path = file_path
        self.file_type = "video"

    def select_audio(self):
        file_path = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav;*.mp3")])
        self.file_path_label.config(text="Selected File: " + file_path)
        self.file_path = file_path
        self.file_type = "audio"

    def transcribe(self):
        if self.file_path:
            if self.file_type == "video":
                transcription = self.transcribe_video(self.file_path)
            elif self.file_type == "audio":
                transcription = self.transcribe_audio(self.file_path)
            else:
                transcription = "Unsupported file type. Please select a valid video or audio file."
            self.transcription_label.config(text="Transcription: " + transcription)
        else:
            self.transcription_label.config(text="Please select a file first.")

    def transcribe_text(self):
        user_text = self.user_text_entry.get()
        if user_text:
            self.transcription_label.config(text="Transcription: " + user_text)
            self.execute_function(user_text)
        else:
            self.transcription_label.config(text="Please enter text first.")

    def transcribe_video(self, video_path):
        audio_path = self.extract_audio(video_path)
        transcription = self.transcribe_audio_pipeline(audio_path)
        self.execute_function(transcription)
        return transcription

    def transcribe_audio(self, audio_path):
        transcription = self.transcribe_audio_pipeline(audio_path)
        return transcription

    def transcribe_audio_pipeline(self, audio_path):
        cls = pipeline("automatic-speech-recognition")
        transcription = cls(audio_path)
        transcription = transcription["text"]

        return transcription

    def extract_audio(self, video_path):
        video_clip = mp.VideoFileClip(video_path)
        audio_clip = video_clip.audio

        # Specify an absolute path for the temporary audio file
        temp_audio_file = os.path.join(tempfile.gettempdir(), "temp_audio.wav")

        audio_clip.write_audiofile(temp_audio_file)
        audio_clip.close()  # Close the audio clip to release the file

        return temp_audio_file

    def load_file(self):
        if self.file_path:
            if self.file_type == "video":
                self.load_video()
            elif self.file_type == "audio":
                self.play_audio(self.file_path)
            else:
                print("Unsupported file type. Please select a valid video or audio file.")

    def load_video(self):
        video_clip = mp.VideoFileClip(self.file_path)
        self.video_clip = video_clip
        self.audio_clip = video_clip.audio
        self.play_file()

    def play_audio(self, audio_path):
        # Load and play the audio using simpleaudio
        wave_obj = sa.WaveObject.from_wave_file(audio_path)
        play_obj = wave_obj.play()

        # Sleep for the duration of the audio
        time.sleep(play_obj.duration)

        play_obj.stop()  # Stop audio playback when the audio ends

    def play_file(self):
        # Set canvas size to match video dimensions
        self.canvas.config(width=self.video_clip.size[0], height=self.video_clip.size[1])

        # Load and play the audio using simpleaudio
        wave_obj = sa.WaveObject.from_wave_file(self.extract_audio(self.file_path))
        play_obj = wave_obj.play()

        for frame in self.video_clip.iter_frames(fps=self.video_clip.fps, dtype='uint8'):
            img = Image.fromarray(frame)
            img = ImageTk.PhotoImage(img)
            self.canvas.create_image(0, 0, anchor=tk.NW, image=img)
            self.canvas.image = img
            self.master.update()
            time.sleep(0.6 / self.video_clip.fps)  # Adjust the sleep duration based on the desired speed

        play_obj.stop()  # Stop audio playback when the video ends

    def execute_function(self, msg):
        selected_function = self.function_var.get()

        if selected_function == "DistilBert":
            word = msg
            results = predict_res(word)
            op = HuggingFace_pred(word)
            text1 = "Result: sarcasm with "+op
            text2 = "Result: Not sarcasm with "+op
            if results > 0.3:
                print("sarcasm")
                self.function_message_label.config(text=text1, fg="#FF4500")  # Orange text for sarcasm
            else:
                print("not sarcasm")
                self.function_message_label.config(text=text2, fg="#32CD32")  # Green text for not sarcasm

        elif selected_function == "Bert":
            reviews = [msg]
             
            res = bert_model.predict(reviews)
            print(res)
            if res > 0.3:
                print("sarcasm")
                self.function_message_label.config(text="Sarcasm", fg="#FF4500")  # Orange text for sarcasm
            else:
                print("not sarcasm")
                self.function_message_label.config(text="Not Sarcasm", fg="#32CD32")  # Green text for not sarcasm

            #self.function_message_label.config(text="model 2.", fg="#008B8B")  # Dark cyan text for Model 2
        else:
            self.function_message_label.config(text="Select a Model!.", fg="#FF0000")  # Red text for invalid function


def main():
    root = tk.Tk()

    # Dark theme style
    style = ttk.Style(root)
    style.theme_use('clam')
    style.configure('Dark.TFrame', background='#1E1E1E')  # Dark gray background for frames
    style.configure('Dark.TCombobox', background='#3E3E3E', foreground='#FFFFFF')  # Darker gray for Combobox

    app = VideoTranscriberApp(root)
    root.mainloop()

if __name__ == "__main__":
    main()


MoviePy - Writing audio in C:\Users\ANKIT\AppData\Local\Temp\temp_audio.wav


No model was supplied, defaulted to facebook/wav2vec2-base-960h and revision 55bb623 (https://huggingface.co/facebook/wav2vec2-base-960h).
Using a pipeline without specifying a model name and revision in production is not recommended.


MoviePy - Done.


Some weights of the model checkpoint at facebook/wav2vec2-base-960h were not used when initializing Wav2Vec2ForCTC: ['wav2vec2.encoder.pos_conv_embed.conv.weight_g', 'wav2vec2.encoder.pos_conv_embed.conv.weight_v']
- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0']
You sho

sarcasm


In [3]:
#import keras
from transformers import AutoTokenizer
model_ckpt = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
from transformers import AutoModel
import torch
model = AutoModel.from_pretrained(model_ckpt)

from datasets import load_dataset
emotion = load_dataset('emotion')
emotion.set_format(type='pandas')
classes = emotion['train'].features['label'].names
#classes
from transformers import AutoModelForSequenceClassification
num_labels = len(classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels = num_labels).to(device)
def HuggingFace_pred(Trans):  
  text = Trans
  input_encoded = tokenizer(text, return_tensors='pt').to(device)
  with torch.no_grad():
    outputs = model(**input_encoded)

  logits = outputs.logits
  pred = torch.argmax(logits, dim=1).item()
  op = classes[pred]
  return op
model = AutoModelForSequenceClassification.from_pretrained("./NLP-Tutorials-with-HuggingFace/").to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
#load model

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
path = "bertMustard++test.h5"
bert_model = tf.keras.models.load_model(
       (path),
       custom_objects={'KerasLayer':hub.KerasLayer},
       compile=False
)
bert_model.compile() 