In [None]:
! pip install requests ffmpeg-python
! pip install deepgram-sdk --upgrade
! pip install requests



In [None]:
from deepgram import DeepgramClient, PrerecordedOptions, FileSource
import base64
import json

# Deepgram API key
DG_KEY = "DG_API_KEY"

# Deepgram options for transcription
options = PrerecordedOptions(
    model="nova-2",
    smart_format=True,
    diarize=True,
)

# Initialize Deepgram client
deepgram = DeepgramClient(DG_KEY)

def transcribe_audio(buffer_data):
    try:
        # Prepare the audio payload for Deepgram
        payload: FileSource = {
            "buffer": buffer_data,
        }

        # Transcribe the audio file
        response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)
        return response.to_dict()

    except Exception as e:
        print(f"Exception during transcription: {e}")
        return None

def process_transcription(transcript_data):
    try:
        # Extract the paragraphs
        paragraphs = transcript_data['results']['channels'][0]['alternatives'][0]['paragraphs']['paragraphs']

        # Prepare transcript text with Q: and A:
        transcript_text = ""
        for paragraph in paragraphs:
            speaker_label = "Q:" if paragraph['speaker'] == 0 else "A:"
            for sentence in paragraph['sentences']:
                transcript_text += f"{speaker_label} {sentence['text']}\n"

        return transcript_text

    except Exception as e:
        print(f"Exception during processing: {e}")
        return ""

def save_to_txt(transcript_text):
    try:
        # Save the transcript text to a .txt file
        with open('transcript.txt', 'w') as file:
            file.write(transcript_text)

        print("Transcript saved successfully.")

    except Exception as e:
        print(f"Exception during TXT saving: {e}")

def main():
    try:
        # Read the audio file
        with open('audio.mp3', 'rb') as audio_file:
            buffer_data = audio_file.read()

        # Transcribe the audio file
        transcript_data = transcribe_audio(buffer_data)

        if transcript_data:
            # Process transcription to get formatted transcript text
            transcript_text = process_transcription(transcript_data)

            # Save the transcript text to a .txt file
            save_to_txt(transcript_text)

    except Exception as e:
        print(f"Exception: {e}")

if __name__ == "__main__":
    main()


  response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)


Transcript saved successfully.


In [None]:
def print_transcript_file():
    try:
        # Read and print the content of the transcript.txt file
        with open('transcript.txt', 'r') as file:
            transcript_content = file.read()

        print("\n--- Transcript Content ---\n")
        print(transcript_content)

    except Exception as e:
        print(f"Exception while reading the file: {e}")

# Call the function to print the file content
print_transcript_file()


--- Transcript Content ---

Q: I am interviewing Tulika.
Q: Hi, Tulika.
Q: Hello.
Q: How are you?
A: I'm good.
Q: Can you tell me about your background including your age group, profession, and education level?
A: I'm Tulika Giri currently pursuing pursuing GTech IT from Manipal University, Jaipur.
A: My age is 21.
Q: How would you describe your personality type?
A: I'm socially selective and, like, I mostly am reserved around people.
A: But if I like the vibe, I talk to.
Q: How often do you experience stress or anxiety and what methods do you use to cope with it?
A: I don't easily get stressed.
A: But if I get stressed, I try to distract myself by watching some series or something.
Q: Can you describe your overall mental health and any practices you engage in to maintain it?
A: Overall mental health, I think I'm it's okay only.
A: I'm not, like, very sad or depressed.
A: I kind of try to be happy like I try to keep myself happy.
A: That's my first priority.
A: So if anything that is 

In [None]:
# Code to process the transcript file and generate a modified version
# Importing the necessary library
import re

# Reading the transcript from the file
with open('transcript.txt', 'r') as file:
    lines = file.readlines()

# Initializing variables to store the combined Q and A
combined_q = ''
combined_a = ''
result = []

# Processing each line
for line in lines:
    line = line.strip()  # Remove leading and trailing whitespace
    if line.startswith('Q:'):
        if combined_a:  # Save the previous combined A before starting a new Q
            result.append(f'A: {combined_a.strip()}')
            combined_a = ''
        combined_q += ' ' + line[2:].strip()  # Combine Q lines
    elif line.startswith('A:'):
        if combined_q:  # Save the previous combined Q before starting a new A
            result.append(f'Q: {combined_q.strip()}')
            combined_q = ''
        combined_a += ' ' + line[2:].strip()  # Combine A lines

# Append any remaining combined Q or A
if combined_q:
    result.append(f'Q: {combined_q.strip()}')
if combined_a:
    result.append(f'A: {combined_a.strip()}')

# Write the result to a new file
with open('new_transcript.txt', 'w') as file:
    for line in result:
        file.write(line + '\n')

# Print the output file content
with open('new_transcript.txt', 'r') as file:
    print(file.read())

Q: I am interviewing Tulika. Hi, Tulika. Hello. How are you?
A: I'm good.
Q: Can you tell me about your background including your age group, profession, and education level?
A: I'm Tulika Giri currently pursuing pursuing GTech IT from Manipal University, Jaipur. My age is 21.
Q: How would you describe your personality type?
A: I'm socially selective and, like, I mostly am reserved around people. But if I like the vibe, I talk to.
Q: How often do you experience stress or anxiety and what methods do you use to cope with it?
A: I don't easily get stressed. But if I get stressed, I try to distract myself by watching some series or something.
Q: Can you describe your overall mental health and any practices you engage in to maintain it?
A: Overall mental health, I think I'm it's okay only. I'm not, like, very sad or depressed. I kind of try to be happy like I try to keep myself happy. That's my first priority. So if anything that is affecting me, I try to not think about it. Like, it doesn't

In [None]:
import csv

# Read the modified transcript from new_transcript.txt
with open('new_transcript.txt', 'r') as file:
    lines = file.readlines()

# Initialize variables to store questions and answers
questions = []
answers = []

# Process each line to separate Q: and A:
for line in lines:
    line = line.strip()  # Remove leading and trailing whitespace
    if line.startswith('Q:'):
        questions.append(line[2:].strip())  # Store the question (without the Q: prefix)
    elif line.startswith('A:'):
        answers.append(line[2:].strip())  # Store the answer (without the A: prefix)

# Write the questions and answers to a CSV file
with open('test.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(['Question', 'Answer'])  # Write the header row

    # Assuming each question has a corresponding answer
    for q, a in zip(questions, answers):
        csvwriter.writerow([q, a])  # Write each question and its corresponding answer

# Print the content of the CSV file
with open('test.csv', 'r') as file:
    print(file.read())


Question,Answer
"I am interviewing Tulika. Hi, Tulika. Hello. How are you?",I'm good.
"Can you tell me about your background including your age group, profession, and education level?","I'm Tulika Giri currently pursuing pursuing GTech IT from Manipal University, Jaipur. My age is 21."
How would you describe your personality type?,"I'm socially selective and, like, I mostly am reserved around people. But if I like the vibe, I talk to."
How often do you experience stress or anxiety and what methods do you use to cope with it?,"I don't easily get stressed. But if I get stressed, I try to distract myself by watching some series or something."
Can you describe your overall mental health and any practices you engage in to maintain it?,"Overall mental health, I think I'm it's okay only. I'm not, like, very sad or depressed. I kind of try to be happy like I try to keep myself happy. That's my first priority. So if anything that is affecting me, I try to not think about it. Like, it doesn't ma

In [None]:
!pip install sentence-transformers


import csv
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict

# Load the pre-trained model
model = SentenceTransformer('distilbert-base-nli-mean-tokens')

# Load the CSV file
questions = []
answers = []
with open('test.csv', mode='r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        questions.append(row['Question'])
        answers.append(row['Answer'])

# Encode all questions
encodings = model.encode(questions)

# Set threshold for cosine similarity
threshold = 0.50
group_assignment = [-1] * len(questions)  # Initialize group assignment with -1

group_id = 0
group_assignment[0] = group_id  # Start with the first question in group 0

# Process the questions and group them
for i in range(1, len(questions)):
    # Check if the current question contains "next" and "question"
    if "next" in questions[i].lower() and "question" in questions[i].lower():
        group_id += 1  # Start a new group
        group_assignment[i] = group_id
    else:
        # Calculate similarity with the previous question
        similarity = cosine_similarity([encodings[i]], [encodings[i-1]])[0][0]

        if similarity > threshold:
            group_assignment[i] = group_id  # Assign to the current group
        else:
            group_id += 1  # Start a new group
            group_assignment[i] = group_id

# Create groups with Main Question and Follow-up Questions
groups = defaultdict(list)
for idx, group in enumerate(group_assignment):
    groups[group].append(idx)

# Write the grouped questions to final.txt with Scenario labels
with open('final.txt', 'w') as outfile:
    for group, indices in groups.items():
        if len(indices) > 1:
            outfile.write(f"\nScenario 1 (Group {group + 1}):\n")
        else:
            outfile.write(f"\nScenario 0 (Group {group + 1}):\n")

        if indices:
            main_question_idx = indices[0]
            outfile.write(f"Main Question: {questions[main_question_idx]}\n")
            outfile.write(f"Answer: {answers[main_question_idx]}\n")
            for idx in indices[1:]:
                outfile.write(f"Follow-up Question: {questions[idx]}\n")
                outfile.write(f"Answer: {answers[idx]}\n")

# Print the grouped questions to console with Scenario labels
for group, indices in groups.items():
    if len(indices) > 1:
        print(f"\nScenario 1 (Group {group + 1}):")
    else:
        print(f"\nScenario 0 (Group {group + 1}):")

    if indices:
        main_question_idx = indices[0]
        print(f"Main Question: {questions[main_question_idx]}")
        print(f"Answer: {answers[main_question_idx]}")
        for idx in indices[1:]:
            print(f"Follow-up Question: {questions[idx]}")
            print(f"Answer: {answers[idx]}")


Scenario 0 (Group 1):
Main Question: I am interviewing Tulika. Hi, Tulika. Hello. How are you?
Answer: I'm good.

Scenario 1 (Group 2):
Main Question: Can you tell me about your background including your age group, profession, and education level?
Answer: I'm Tulika Giri currently pursuing pursuing GTech IT from Manipal University, Jaipur. My age is 21.
Follow-up Question: How would you describe your personality type?
Answer: I'm socially selective and, like, I mostly am reserved around people. But if I like the vibe, I talk to.
Follow-up Question: How often do you experience stress or anxiety and what methods do you use to cope with it?
Answer: I don't easily get stressed. But if I get stressed, I try to distract myself by watching some series or something.
Follow-up Question: Can you describe your overall mental health and any practices you engage in to maintain it?
Answer: Overall mental health, I think I'm it's okay only. I'm not, like, very sad or depressed. I kind of try to be h