In [98]:
from dotenv import load_dotenv
load_dotenv()
import os
import google.generativeai as genai
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
from youtube_transcript_api import YouTubeTranscriptApi

model_config = {
  "temperature": 0.1,
  "top_p": 1,
  "top_k": 1,
}

summarizer_prompt="""You are a Youtube video summarizer. 
You will be taking the transcript text and summarizing the content. Please provide the summary of the text given here : """

##Getting the transcript data from yt videos
def extract_transript_details(youtube_video_url):
    try:
        video_id=youtube_video_url.split("=")[1]
        # print(video_id)
        transcript_text=YouTubeTranscriptApi.get_transcript(video_id)
        transcript=""
        for i in transcript_text:
            transcript+=" "+i["text"]

    except Exception as e:
        raise e
    return transcript

#Getting the summary based on prompt from Google Gemini Pro
def generate_gemini_content(transcript_text, prompt):
    model=genai.GenerativeModel("gemini-pro", generation_config= model_config)
    response=model.generate_content(prompt+transcript_text)
    return response.text


In [99]:
claim_prompt='''You are a medical expert who needs to research the validity of claims for scientific evidence.
You will be provided a text. Identify at most 5 health/medicine/science/nutrition related claims that you would like to verify.
Ignore opinions. The claims needs to be given in single line points separated by *. Make sure to include medical/ health related terminology. 
'''

def generate_gemini_claims(summary, prompt):
    model=genai.GenerativeModel('gemini-pro', generation_config= model_config)
    response=model.generate_content(prompt+summary)
    return response.text

In [100]:
youtube_link= "https://www.youtube.com/watch?v=iFtqudy39sA"
transcript_text=extract_transript_details(youtube_link)
summary=generate_gemini_content(transcript_text, summarizer_prompt)
claims=generate_gemini_claims(summary, claim_prompt)

I0000 00:00:1724998884.145052  270049 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [101]:
lines= claims.strip().split("\n")
claims_list= [line.lstrip('* ').strip() for line in lines if line.startswith('* ')]
claims_list

['Morning sunlight improves mood, energy, and sleep.',
 'Caffeine consumption after 1.5-2 hours of waking prevents afternoon crashes.',
 'Protein-rich, fiber-filled breakfasts sustain energy throughout the day.',
 'Post-meal walks regulate glucose levels and avoid afternoon slumps.',
 'Cold showers boost dopamine levels and provide a temporary energy boost.']

In [120]:
keyword_prompt= """You are a medical researcher who wants to check the validity of the following claim by searching for articles from pubmed. 
Extract at most 3 medical/health/nutrition related keywords summarizing the claim. The keywords should be single word as much as possible"""

def generate_gemini_keywords(claims, keyword_prompt):
    model=genai.GenerativeModel('gemini-pro', generation_config= model_config)
    response=model.generate_content(keyword_prompt+claims)
    return response.text


In [121]:
claims_list

['Morning sunlight improves mood, energy, and sleep.',
 'Caffeine consumption after 1.5-2 hours of waking prevents afternoon crashes.',
 'Protein-rich, fiber-filled breakfasts sustain energy throughout the day.',
 'Post-meal walks regulate glucose levels and avoid afternoon slumps.',
 'Cold showers boost dopamine levels and provide a temporary energy boost.']

In [122]:

for claim in claims_list:
    response= generate_gemini_keywords(claims= claim, keyword_prompt=keyword_prompt)
    print(response)
    print("\n")

- Sunlight
- Mood
- Sleep


- Caffeine
- Afternoon crashes
- Wakefulness


- Protein
- Fiber
- Breakfast


- Glucose
- Postprandial
- Ambulation


- Dopamine
- Energy
- Cold showers


