In [2]:
import io
import json
import yaml
from supabase import create_client
from elevenlabs import ElevenLabs
import pandas as pd


import yaml
from supabase import create_client
from collections import Counter
import pandas as pd
import numpy as np

with open("../env.yaml") as stream:
    KEYS = yaml.safe_load(stream)
    SUPABASE_API_KEY = KEYS["SUPABASE_API_KEY"]
    ELEVENLABS_API_KEY = KEYS["ELEVENLABS_API_KEY"]


URL = 'https://coltpedcrfibsozxvgvu.supabase.co'
CONVERSATION_ID = "YzI93GFnFsCOo924uKlo"
BUCKET_ID = 'transcription'
video_id = "408bef4d-0f92-4dae-aa5b-f556acaa037f"
    
supabase = create_client(URL, SUPABASE_API_KEY)    
client = ElevenLabs(api_key=ELEVENLABS_API_KEY)



In [3]:
# GETTING THE CONVERSATION 
conversation = client.conversational_ai.get_conversation(
    conversation_id=CONVERSATION_ID
)

response = (
    supabase
    .storage
    .from_(BUCKET_ID)
    .upload(
        f"{CONVERSATION_ID}.json",
        io.BytesIO(json.dumps(conversation.dict()).encode("utf-8")).read()
    )
)

In [6]:
# EXTRACTING INFO FROM THE CONVERSATION
dynamic_variables = conversation.dict()['conversation_initiation_client_data']['dynamic_variables']

MOVIE_TITLE = dynamic_variables["movie_title"]
MOVIE_GENRE = dynamic_variables["movie_genre"]
SETTING = dynamic_variables["scene_setting"]
SCENE_CONTEXT = dynamic_variables["scene_context"]
CHARACTERS = dynamic_variables["characters"]
SCENE_DESCRIPTION = dynamic_variables["scene_description"]
ADDITIONONAL_INFO = dynamic_variables["additional_info"]

USER_NAME = dynamic_variables["user_name"]
SCREENPLAY_TEXT = dynamic_variables["screenplay_text"]

USER_ROLE = dynamic_variables["user_role"]
AGENT_ROLE = dynamic_variables["agent_role"]

In [7]:
# EXTRACTING TRANSCRIPTION FROM THE CONVERSATION
duration = conversation.dict()['metadata']['call_duration_secs']

transcript = pd.DataFrame(conversation.dict()["transcript"])[["role","message","time_in_call_secs"]]
transcript['role'] = transcript['role'].apply(lambda x: x.upper())
transcript['role'] = transcript['role'].apply(lambda x: "ACTOR" if x == "USER" else x)
transcript['time_in_call_secs_end'] = transcript['time_in_call_secs'].shift(-1).fillna(duration).astype(int)

In [8]:
def _concat_emotions_audio(row):
    emotion_columns = ["angry", "boredom", "disgust", "fear", "happy", "neutral", "sad", "surprise"]
    emotions = {col: row[col] for col in emotion_columns if row[col] > 0.1}  
    sorted_emotions = sorted(emotions, key=emotions.get, reverse=True)  
    return ', '.join(sorted_emotions) 




In [10]:
# AUDIO
response = (
    supabase.table("audio_emotions")
    .select("*")
    .eq("video_id", video_id)
    .execute()
)
audio_emotions = pd.DataFrame(response.data)
audio_emotions["start_time"] = audio_emotions["start_time"] / 1000
audio_emotions["end_time"] = audio_emotions["end_time"] / 1000
# audio_emotions["audio_second_start"] = audio_emotions["start_time"].apply(np.floor).astype(int)
# audio_emotions["audio_second_end"] = audio_emotions["end_time"].apply(np.ceil).astype(int)
# emotion_columns = ["angry", "boredom", "disgust", "fear", "happy", "neutral", "sad", "surprise"]

# audio_processed = audio_emotions[['audio_second_start', 'audio_second_end', *emotion_columns]]
# audio_processed.loc[:,["audio_concatenated_emotions"]] = audio_processed.apply(_concat_emotions_audio, axis=1)
# audio_processed = audio_processed[["audio_second_start", "audio_second_end", "audio_concatenated_emotions"]]

In [11]:
# VIDEO

def _timestamp_to_seconds(timestamp: str) -> float:
    hh, mm, ss, ms = map(int, timestamp.split(":"))
    return hh * 3600 + mm * 60 + ss + ms / 1000

response = (
    supabase.table("video_emotions")
    .select("*")
    .eq("video_id", video_id)
    .execute()
)
video_emotions = pd.DataFrame(response.data)


video_emotions["video_time_in_seconds"] = video_emotions["video_time"].apply(_timestamp_to_seconds)
video_emotions = video_emotions.sort_values('video_time_in_seconds') 


emotion_columns = ['angry', 'contempt', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise', 'arousal', 'valence', 'intensity']

video_emotions_per_line = []
for _, line in transcript.iterrows():
    emotions_mean = (
         video_emotions
         .query(f"{line['time_in_call_secs']} <= video_time_in_seconds <= {line['time_in_call_secs_end']}")
        [emotion_columns].mean()
    )
    
    emotions = {col: emotions_mean[col] for col in emotion_columns if emotions_mean[col] > 0.1}  
    sorted_emotions = sorted(emotions, key=emotions.get, reverse=True)  
    video_emotions_per_line.append(', '.join(sorted_emotions))

In [12]:
transcript = pd.concat([transcript,pd.DataFrame({"video_emotions":video_emotions_per_line})], axis=1)
#temporary
transcript = pd.concat([transcript,pd.DataFrame({"voice_emotions":video_emotions_per_line})], axis=1)
transcript

Unnamed: 0,role,message,time_in_call_secs,time_in_call_secs_end,video_emotions,voice_emotions
0,AGENT,"Hi Jacob, are you ready to start where you wil...",0,5,"happy, neutral","happy, neutral"
1,ACTOR,I don't know. You think I'm ready?,5,7,happy,happy
2,AGENT,It's all about how you feel. If you're comfort...,7,16,"neutral, happy","neutral, happy"
3,ACTOR,Let's fucking go.,16,18,,
4,AGENT,Great energy! Just let me know when you're rea...,18,22,,
5,ACTOR,I'm ready.,22,24,,
6,AGENT,Perfect! Please go ahead with your line when y...,24,28,,
7,ACTOR,You fought in the Clone Wars?,28,31,,
8,AGENT,"Yes, I was once a Jedi Knight the same as your...",31,36,,
9,ACTOR,I wish I'd known him.,36,38,,


In [None]:
# PERSONALITY
response = (
    supabase.table("personality")
    .select("*")
    .eq("video_id", video_id)
    .execute()
)
personality = pd.DataFrame(response.data)

personality_values = personality.drop(columns=['id','video_id']).to_dict(orient='records')[0]
PERSONALITY = f"Extraversion: {personality_values['extraversion']*100:.0f}%, Neuroticism: {personality_values['neuroticism']*100:.0f}%, Agreeableness: {personality_values['agreeableness']*100:.0f}%, Conscientiousness: {personality_values['conscientiousness']*100:.0f}%, Openness: {personality_values['openness']*100:.0f}%"

In [None]:
for _, line in transcript.iterrows():
    print(f"{line["role"]}: {line["message"]} (Vision: {line["video_emotions"]} ; Voice: {line["voice_emotions"]})")

"     role                                                                                                                                        message  time_in_call_secs  time_in_call_secs_end  video_emotions  voice_emotions\n0   AGENT                                                                                     Hi Jacob, are you ready to start where you will play LUKE?                  0                      5  happy, neutral  happy, neutral\n1   ACTOR                                                                                                             I don't know. You think I'm ready?                  5                      7           happy           happy\n2   AGENT  It's all about how you feel. If you're comfortable and ready to dive into the scene, we can begin. Just let me know when you're set to start!                  7                     16  neutral, happy  neutral, happy\n3   ACTOR                                                                              

In [None]:
from jinja2 import Template

with open("prompt_template.txt", "r") as file:
    template_content = file.read()

template = Template(template_content)
data = {
    "USER_NAME": USER_NAME,
    "USER_ROLE": USER_ROLE,
    "MOVIE_TITLE": MOVIE_TITLE,
    "MOVIE_GENRE": MOVIE_GENRE,
    "SETTING": SETTING,
    "SCENE_CONTEXT": SCENE_CONTEXT,
    "SCENE_DESCRIPTION": SCENE_DESCRIPTION,
    "CHARACTERS": CHARACTERS,
    "ADDITIONONAL_INFO": ADDITIONONAL_INFO,
    "SCRIPT" : SCREENPLAY_TEXT,
    "TRANSCRIPTION": ,
    "PERSONALITY": PERSONALITY,
    
    "delivery_time": 3
}

# Render the template with values
final_prompt = template.render(data)

print(final_prompt)

'Personality traits: Extraversion: 45%, Neuroticism: 51%, Agreeableness: 21%, Conscientiousness: 44%, Openness: 33%'

In [None]:
# poskladac prompt, wyslac do openai, zapisac odpowiedz do supabase