In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
from google import genai
from google.genai import types
import re
import nltk

In [None]:
def client_response(text, api_key, model, temperature):
    
    client = genai.Client(api_key=api_key)

    try:
        return client.models.generate_content(
        model=model,
        
        config=types.GenerateContentConfig(
            system_instruction="You are LipReadFixer, a text-restoration editor for noisy, punctuation-free transcripts in English produced by lip-reading VSR models.",
            temperature=temperature),
        
        contents=f"Primary goals (in this order): \
                1. Restore readability: casing, punctuation, sentence/paragraph breaks. \
                2. Correct spelling and grammar. \
                3. Improve coherence and flow while preserving the intended meaning inferred from context. \
                \
                Hard constraints: \
                - Do not invent new facts, names, numbers, dates, or claims not implied by the input or optional context. \
                - Keep the language the same as the input unless explicitly asked to translate.\
                - If something is ambiguous or garbled, pick the most ordinary, context-plausible phrasing. \
                \
                Editing latitude: \
                - You may reorder words, replace terms, merge/split sentences, and add punctuation for clarity. \
                \
                Quality: \
                - Aim for natural, idiomatic phrasing. \
                - Prefer short, clear sentences over long, tangled ones.  \
                \
                INPUT VSR TEXT: {text}"
    )
        
    except Exception as e:
        print(f"Error generating content: {e}")
        return None

In [None]:
def get_text(response):
    
    texts = []
    
    if hasattr(response, "candidates"):
        for cand in response.candidates:
            if hasattr(cand, "content") and cand.content and hasattr(cand.content, "parts"):
                for part in cand.content.parts:
                    if hasattr(part, "text") and part.text:
                        texts.append(part.text)
                        
    return "\n".join(texts).strip()

In [None]:
text = "which i'm asked is doubled and today we're going over to three fundamentals of talking to tampon is their training to this number one is comfort not confidence we're not needed to focus on being comfort and being more than we actually are instead we are focusing on it being uncomfortably being comfortable and relaxed and maybe uncomfortably reacts we can be more ourselves and more authentic to build confront neighbor historian is two things number one scanning to the state just do these different exercises before you grow to get one to be more relaxed and comfortable we should be working out doing out so the specific brilliant exercise change your body is central getting more into your mind will make you more comfortable with you give it and number number two it is repetition so the point you draw the camera the more comfortable you will be on camero so if you haven't talked to haven't talked to camera before do you not expect yourself to be the supercomfortable in your french video you might start to get comfortably on your 10th or your 50th video around them in the first one from the first one the second fundamental to draw it well on camera is the icontact one of the biggest mistakes people make when they're talking this doing this including in the streets and check themselves out while it's annoyed to camera and it feels like they are also a tissue that ignorance themselves because they altered themselves as they looked at themselves they look at themselves what's the northern camera shutting out of the buddhist architect so instead you've got to write down the lens and you know what you don't do for you don't want to go to find the other way you're going to stand down the lines for 100 of the time and just belonging and talking to every like this because i'm angry but few people out so up so today's in the porn and look around just keep talking and then come back and you're with the normal conversations by maintaining like your time most of the time when you talk to somebody and they and then you might also think about something you say a battle so you want to miss out about 70 80 percent of icontact on the camera right in the legacy and the rest the legacy and the rest of the top is to whatever you want the third fundamental is dynamism even if you're comfortable in april and you maintain a line content you that you might just be really comfortable to the really stereotype i'm a good springstreaming but you're coming onto the ancient you want to be tiny want to be dynamic in your delivery so how can you be more diamon the thing i struggled with a lot when i first started to go to camera was a modern vost he created many creative videos in books and worked out of bright and at the time come this is being used this personal voice really makes me quite boys me quite boring to listen to you i would always be drawing it on the same level like this to do exercises such as deliberately being exaggerated in your international and you're going to hear more arranged by the phone grades in your earlier which sucks out pretty weird and certainly sound naturally it might be kind of strange but but what this is going to do is over time help you to expand that vocal range and when you actually draw up to court to camera it all come up it will come up with naturally hopefully and i'm doing now for all of them the molecule i had before i think the thing you can do is make yourself more dynamic and authoritarity is and getting into states again before going on time once you're really shaking your body maybe having an offer and maybe just doing this it gives you more energy and this and they saw that it was energy and the final thing that you could be more dynamic is explore the different versions of yourself in a single video video so i might be talking to in a way that's very enthusiastic and stage and a center attention attention and make you feel like have something really important today but in the same video it also takes out your culture and just for a purpose or a a little bit slower skin more deeper and introspective and then turn to the video i'm going to wrap up again it's given him more it's a very important thing to get in so just as accident i don't really like to call it actor i even think of it as acting because i'm still being at myself myself i'm reducing it in my own emotions i am connected to my body i m connecting to myself when i was doing this it might be a little might be somewhat of a performance that i am controlling however i'm still being authentically sobering myself to hide that tends i m not acting even though i must be intentionally tensionally changing the way that i'm expressing myself you can like these three social situations as well if somebody's telling a great story and it really engages a story your story they met purpose funny do some things to make that story a bit more attention they didn't mean they act so those are the reasons but why be comfortable numbers you take numbers don't take your high context on point and then let me just feel like number three making sure you can make the mistakes like that in your number 3 make sure the u s dynamic the u s dynamic is enthusiastical camera rather than being monotonous so it's the fundamentals so i hope you guys enjoy this video on this video if you're interested in again and again one pixel in enough which is a genius with a lot of information from the bicycle that's have down you need to have down to the techniques and how to deliver the addiction is what they've talked about and if you would have that printing out on your rule ready to go next let me show you a camera to quickly refer to it to refer to it and then say if you're there for freeze free it's free or being left in the distribute of this video you enjoy that i'm afforded to the video comes to buy clean sheets cheese you know"

def main():
    # Initialize NLTK
    nltk.download('punkt_tab', quiet=True)
    
    # Configuration
    model = "gemini-2.5-flash"
    temperature = 0.1
    
    # Load api-key and generate response
    api_key = os.getenv("GEMINI_API_KEY")
    
    response = client_response(text, api_key, model, temperature)
    output = get_text(response)
    print(output)

In [None]:
if __name__ == "__main__":
    main()

### **Backup**

In [None]:
def display_response(response):
    
    output = []
    
    for chunk in response:
        words = nltk.word_tokenize(chunk.text)
        sentence = " ".join(words)  # join tokens with spaces

        # --- Clean punctuation spacing ---
        # remove spaces before punctuation
        sentence = re.sub(r'\s+([.,!?;:)])', r'\1', sentence)
        # Don't add space after punctuation if end of text
        sentence = re.sub(r'([.!?"])\s*$', r'\1', sentence)
        # remove spaces after opening brackets
        sentence = re.sub(r'([(])\s+', r'\1', sentence)
        # normalize multiple spaces
        sentence = re.sub(r'\s+', ' ', sentence)
        # remove space inbetween paired delimiters
        #sentence = re.sub(r"(\*\s*\*|``|'')\s*(.*?)\s*(\1)", r"\1\2\1", sentence)
        #sentence = re.sub(r"\s+(['’`´*])\s+", r"\1", sentence) 
        
        output.append(sentence)
        
    output = " ".join(output)
    return output
        