In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers[torch] datasets[audio]
!pip install librosa
!pip install tiktoken



In [None]:
!pip install accelerate
!pip install tqdm

Looking in indexes: https://pypi.org/simple/


In [None]:
from transformers import pipeline
import librosa
import torch
import huggingface_hub as hub
import time
import re
import pandas as pd
import os
import re

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    # BitsAndBytesConfig,
)
base_path = "/content/drive/MyDrive/AudioTesting"

audio_files = os.listdir(os.path.join(base_path, "AudioFiles"))
if not os.path.exists(os.path.join(base_path, "AudioLogs")):
    os.makedirs(os.path.join(base_path, "AudioLogs"))

df = pd.DataFrame(columns=[
    "AUDIO_FILE_NAME",
    "ASR_MODEL_NAME",
    "TG_MODEL_NAME",
    "ASR_LOADING_DURATION",
    "ASR_S2T",
    "TG_LOADING_DURATION",
    "TG_DURATION",
    "TOTAL_DURATION",
    "S2T_OUTPUT",
    "GeneratedQNA",
])

ultra_offset = time.time()

hub.login(token="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")
checkpoint = "mistralai/Mistral-7B-Instruct-v0.2"
audio_checkpoint = "openai/whisper-tiny"

# config = BitsAndBytesConfig(
#    load_in_4bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
#    bnb_4bit_compute_dtype=torch.bfloat16
# )

# loading models required for compute
offset = time.time()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
whisper = pipeline("automatic-speech-recognition", device = device,
                        model=audio_checkpoint)

audio_load_duration = time.time() - offset

offset = time.time()
mistral_tokenizer = AutoTokenizer.from_pretrained(checkpoint)
mistral = AutoModelForCausalLM.from_pretrained(
    checkpoint, # quantization_config=config,
    device_map=device,
)
model_load_duration = time.time() - offset

mistral_pipe = pipeline("text-generation",
                        model=mistral, tokenizer=mistral_tokenizer,do_sample=True,
                        max_new_tokens=1024, device_map=device, temperature=0.1)

ultra_duration = time.time() - ultra_offset

for file_name in audio_files:
    generation_offset = time.time()
    filepath = os.path.join(base_path, "AudioFiles", file_name)
    audio, sr = librosa.load(filepath, sr=16_000)
    offset = time.time()
    output = whisper(
        audio,
        return_timestamps=True,
        generate_kwargs = {"task": "transcribe"},
        chunk_length_s=30,
    )

    audio_to_text_duration = time.time() - offset

    item = output["text"]

    prompt = "Generate as many as possible difficulty level hard Multiple choice questions with four options and answer using this text:"+" "+item+'''\n Give me output in this JSON array format:[{"question": string, "options":List[string], "answer":string}]'''
    prompt_template= f'''[INST] <s> You have Phd in history and best in generating Multiple choice questions with demanded JSON format. </s> {prompt}[/INST]'''

    offset = time.time()
    mcqs = mistral_pipe(prompt_template)
    text_to_qna_duration = time.time() - offset

    generation_duration = time.time() - generation_offset
    loading_minutes, loading_seconds = divmod(ultra_duration, 60)
    generation_minutes, generation_seconds = divmod(generation_duration, 60)
    print("total duration audio to qna : %d mins %d seconds" % (generation_minutes, generation_seconds))

    pattern = r"{(?:[^{}]|)*}"
    matches = re.findall(pattern, mcqs[0]["generated_text"])
    qnas = "\n".join(matches) if matches != [] else "No qna generated"

    dirs = filepath.split("/")
    df1 = pd.DataFrame({
        "AUDIO_FILE_NAME": [dirs[-2] + "/" + dirs[-1]],
        "ASR_MODEL_NAME": [audio_checkpoint],
        "TG_MODEL_NAME": [checkpoint],
        "ASR_LOADING_DURATION": ["{} mins {:.2f} seconds".format(*divmod(audio_load_duration, 60))],
        "ASR_S2T": ["{} mins {:.2f} seconds".format(*divmod(audio_to_text_duration, 60))],
        "TG_LOADING_DURATION": ["{} mins {:.2f} seconds".format(*divmod(model_load_duration, 60))],
        "TG_DURATION": ["{} mins {:.2f} seconds".format(*divmod(text_to_qna_duration, 60))],
        "TOTAL_DURATION": ["{} mins {:.2f} seconds".format(*divmod(generation_duration, 60))],
        "S2T_OUTPUT": output["text"],
        "GeneratedQNA": [qnas],
    })

    df = pd.concat([df, df1])
    df.to_excel(os.path.join(base_path, "AudioLogs/AudioLogs.xlsx"), index=False)


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Time taken: 2.0 minutes  40.25 seconds


In [None]:
df.to_excel(os.path.join(base_path, "AudioLogs.xlsx"), index=False)

In [None]:
df

Unnamed: 0,AUDIO_FILE_NAME,ASR_MODEL_NAME,TG_MODEL_NAME,EMBEDDING_MODEL_NAME,ASR_LOADING_DURATION,ASR_S2T,TG_LOADING_DURATION,TG_DURATION,EMBEDDING_LOADING_DURATION,TOTAL_DURATION,S2T_OUTPUT,GeneratedQNA,Question_count,PROMPT_USED,REFINED_PROMPT_USED
0,nature-of-society-individuals-and-groups-audio...,openai/whisper-small,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 39.40 seconds,2.0 mins 12.76 seconds,2.0 mins 13.01 seconds,13.0 mins 25.96 seconds,0.0 mins 2.05 seconds,18.0 mins 52.49 seconds,"Well, friends, today we are going to discuss ...",['1. What is the definition of society accordi...,51,You are an expert at creating questions based ...,You are an expert at creating practice questi...
1,nature-of-society-individuals-and-groups-audio...,openai/whisper-large-v3,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 43.84 seconds,6.0 mins 32.19 seconds,2.0 mins 26.82 seconds,4.0 mins 31.76 seconds,0.0 mins 3.30 seconds,14.0 mins 39.68 seconds,"Well friends, today we are going to discuss t...",['1. What is the definition of society accordi...,20,You are an expert at creating questions based ...,You are an expert at creating practice questi...
2,nature-of-society-individuals-and-groups-audio...,openai/whisper-tiny,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 27.04 seconds,1.0 mins 1.36 seconds,2.0 mins 15.83 seconds,5.0 mins 30.72 seconds,0.0 mins 2.73 seconds,9.0 mins 47.84 seconds,"Well first, today we are going to discuss the...",['1. What is the definition of society accordi...,20,You are an expert at creating questions based ...,You are an expert at creating practice questi...
3,ASR/Mod-01 Lec-03 Nature of society Individual...,openai/whisper-base,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 19.28 seconds,1.0 mins 29.89 seconds,1.0 mins 12.10 seconds,9.0 mins 45.20 seconds,0.0 mins 1.42 seconds,12.0 mins 48.54 seconds,"Well, first today we are going to discuss the...","[""1. What is the definition of society accordi...",27,You are an expert at creating questions based ...,You are an expert at creating practice questi...
4,ASR/Mod-01 Lec-03 Nature of society Individual...,openai/whisper-tiny,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 12.18 seconds,1.0 mins 8.68 seconds,1.0 mins 20.21 seconds,7.0 mins 28.07 seconds,0.0 mins 1.02 seconds,10.0 mins 10.21 seconds,"Well first, today we are going to discuss the...",['1. What is the definition of society accordi...,24,You are an expert at creating questions based ...,You are an expert at creating practice questi...
5,ASR/Mod-01 Lec-03 Nature of society Individual...,openai/whisper-tiny,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 15.79 seconds,1.0 mins 9.23 seconds,1.0 mins 24.34 seconds,5.0 mins 29.20 seconds,0.0 mins 1.62 seconds,8.0 mins 20.88 seconds,"Well first, today we are going to discuss the...",['1. What is the definition of society accordi...,19,You are an expert at creating questions based ...,You are an expert at creating practice questi...
6,ASR/Mod-01 Lec-03 Nature of society Individual...,openai/whisper-tiny,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 15.42 seconds,1.0 mins 10.78 seconds,1.0 mins 19.33 seconds,5.0 mins 33.74 seconds,0.0 mins 1.41 seconds,8.0 mins 21.31 seconds,"Well first, today we are going to discuss the...",['1. What is the definition of society accordi...,21,You are an expert at creating questions based ...,You are an expert at creating practice questi...
7,ASR/Mod-01 Lec-03 Nature of society Individual...,openai/whisper-tiny,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 15.19 seconds,1.0 mins 9.74 seconds,1.0 mins 18.81 seconds,5.0 mins 15.32 seconds,0.0 mins 0.97 seconds,8.0 mins 0.67 seconds,"Well first, today we are going to discuss the...",['1. What is the definition of society accordi...,19,You are an expert at creating questions based ...,You are an expert at creating practice questi...
8,ASR/Mod-01 Lec-03 Nature of society Individual...,openai/whisper-tiny,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 32.81 seconds,1.0 mins 12.72 seconds,3.0 mins 46.97 seconds,7.0 mins 8.06 seconds,0.0 mins 2.79 seconds,12.0 mins 45.15 seconds,"Well first, today we are going to discuss the...",['1. What is the definition of society accordi...,21,You are an expert at creating questions based ...,You are an expert at creating practice questi...
9,ASR/Mod-01 Lec-03 Nature of society Individual...,openai/whisper-tiny,mistralai/Mistral-7B-Instruct-v0.2,sentence-transformers/all-MiniLM-L6-v2,0.0 mins 34.79 seconds,1.0 mins 13.49 seconds,2.0 mins 58.15 seconds,7.0 mins 38.80 seconds,0.0 mins 3.92 seconds,12.0 mins 33.11 seconds,"Well first, today we are going to discuss the...","[""1. What is the definition of society accordi...",21,You are an expert at creating questions based ...,You are an expert at creating practice questi...


In [None]:
df

NameError: name 'df' is not defined