In [41]:
import os
import tiktoken
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.document_loaders import YoutubeLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.chains import LLMChain
from typing import List
from dotenv import load_dotenv

### Functions

In [34]:
def get_transcript_from_multiple_videos(video_urls: List[str]) -> List:
    transcripts = []
    for video_url in video_urls:
        loader = YoutubeLoader.from_youtube_url(video_url)
        transcript = loader.load()
        transcripts.append(transcript[0].page_content)
    return transcripts

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

def create_db_from_textfile(filePath: str, embeddings, persist_dir: str, chunk_size: int) -> Chroma:
    raw_document = TextLoader(filePath).load()
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size, chunk_overlap=0)
    docs = text_splitter.split_documents(raw_document)
    db = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=persist_dir)
    return db


### Download transcript and save it to file

In [13]:
galpin_videos = [
    # "https://www.youtube.com/watch?v=zEYE-vcVKy8&list=PLPNW_gerXa4N_PVVoq0Za03YKASSGCazr",
    # "https://www.youtube.com/watch?v=CyDLbrZK75U&list=PLPNW_gerXa4N_PVVoq0Za03YKASSGCazr&index=2",
    # "https://www.youtube.com/watch?v=oNkDA2F7CjM&list=PLPNW_gerXa4N_PVVoq0Za03YKASSGCazr&index=3",
    # "https://www.youtube.com/watch?v=UIy-WQCZd4M&list=PLPNW_gerXa4N_PVVoq0Za03YKASSGCazr&index=4",
    # "https://www.youtube.com/watch?v=juD99_sPWGU&list=PLPNW_gerXa4N_PVVoq0Za03YKASSGCazr&index=5",
    # "https://www.youtube.com/watch?v=q37ARYnRDGc&list=PLPNW_gerXa4N_PVVoq0Za03YKASSGCazr&index=6"
]

sleep_video =[
    "https://www.youtube.com/watch?v=BMTt8gSl13s"
]

video_urls = sleep_video
transcript = get_transcript_from_multiple_videos(video_urls=video_urls)

filePath = "hub_sleep.txt"
with open(filePath, "w") as file:
    for item in transcript:
        file.write("%s\n" % item)

### Load from saved file show tokens 

In [14]:
interview = ''
with open(filePath, "r") as file:
    interview = file.read()
num_tokens_from_string(interview, "cl100k_base")

31075

### Esto es para ir viendo dado el chunk_size cómo te quedan los textos

In [15]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, chunk_overlap=0
)
texts = text_splitter.split_text(interview)
idx = 0
num_tokens_from_string(texts[idx], "cl100k_base")
print(texts[idx])

### Configuraciones

In [20]:
# En el archivo tk.env puse OPENAI_API_KEY="tu token"
load_dotenv('tk.env')
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
llm = OpenAI(model_name="gpt-3.5-turbo-0613")



### Crear la vector database y persistir. Esto para utilizarla y ahorrar llamadas a la api más abajo

In [35]:
persist_directory = 'sleep_db'
chunk_size = 4000
db = create_db_from_textfile(filePath, embeddings, persist_directory, chunk_size)
db.persist()

In [36]:
# Usas esto para cargarla y te ahorras los embeddings si ya los hiciste
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

#### Acá me traigo el similarity search. Esto por ejemplo se lo puedo pegar al Web ChatGPT como contexto o más abajo se lo paso al chain

In [37]:
query1 = "sleep importance"
docs = vectordb.similarity_search(query1)

# print results
print(docs[0].page_content)
similarity_result = docs[0].page_content


and sleep, and sleep? GINA POE: Let them sleep. ANDREW HUBERMAN: OK. So that's the one exception. What about-- GINA POE: Just like babies. ANDREW HUBERMAN: OK. GINA POE: When you're developing
something in your brain or the rest of your
body, you really need sleep to help
organize that. Sleep is doing really hard
work in organizing our brains and making it develop right. And if we deprive
ourselves of sleep, we will-- actually,
also just like I said, we have a daily clock. We also have a
developmental clock. And we can miss a
developmental window if we don't let ourselves
sleep extra like we need to. ANDREW HUBERMAN: What other
things inhibit growth hormone release or other components
of this first stage of sleep? In other words, if I
go to sleep religiously every night at
10:00 PM, are there things that I, perhaps,
do in the preceding hours of the preceding day
like ingest caffeine or alcohol that can make that
first stage of sleep less effective even if I'm
going to sleep at the same

### Los templates. Uno de HTML y Otro Markdown

In [61]:
templateHTML = """

Given the context:
"{document}"

Please generate an article for an online magazine using the following HTML structure:

---
<!DOCTYPE html>
<html>
<head>
    <title>[LLM: Extract a suitable title from the context or craft one that best represents the main theme.]</title>
</head>
<body>

<!-- Header Section -->
<div class="header">
    <h1>[LLM: Reiterate the blog title.]</h1>
    <h3>By [Author Name]</h3>
    <p>Date: [LLM: Insert today's date in 'Month Day, Year' format.]</p>
</div>

<!-- Main Content -->
<div class="main-content">

    <!-- Introduction -->
    <div class="section introduction">
        <h2>Introduction</h2>
        <p>[LLM: Briefly introduce the topic or the central theme extracted from the context.]</p>
        <p>[LLM: Share why this topic is relevant or timely for the readers.]</p>
    </div>

    <!-- Subheading 1 -->
    <div class="section">
        <h2>[LLM: Extract or generate a subheading title from the context.]</h2>
        <p>[LLM: Extract or write an interesting fact, anecdote, or quote from the context.]</p>
        <p>[LLM: Discuss the main points related to this subheading based on the context.]</p>
    </div>

    <!-- Subheading 2 -->
    <div class="section">
        <h2>[LLM: Extract or generate another subheading title from the context.]</h2>
        <p>[LLM: Dive deeper into the topic using the context, sharing more specifics.]</p>
        <p>[LLM: Consider using bullet points or numbered lists for clarity based on the context.]</p>
    </div>

    <!-- Subheading 3 -->
    <div class="section">
        <h2>[LLM: Extract or generate another subheading title from the context.]</h2>
        <p>[LLM: Discuss contrasting views, recent advancements, or personal experiences from the context.]</p>
        <p>[LLM: Highlight expert opinions or quotes from the context to build credibility.]</p>
    </div>

    <!-- Conclusion -->
    <div class="section conclusion">
        <h2>Conclusion</h2>
        <p>[LLM: Summarize the main points discussed in the article.]</p>
        <p>[LLM: Share your final thoughts or personal takeaways based on the context.]</p>
        <p>[LLM: End with a question or call-to-action (CTA) to engage readers in comments or sharing.]</p>
    </div>

</div>

<!-- Author Bio -->
<div class="author-bio">
    <h3>About the Author</h3>
    <p>[LLM: Craft a short description of the author using the context or provide a generic background. Optionally, use links if mentioned in context.]</p>
</div>

<!-- Related Posts -->
<div class="related-posts">
    <h3>Related Posts</h3>
    <ul>
        <li>[LLM: If there are any related articles mentioned in the context, list them here.]</li>
    </ul>
</div>

<!-- Comments Section -->
<div class="comments-section">
    <h3>Leave a Comment Below:</h3>
    <form>
        <textarea></textarea>
        <button type="submit">Submit</button>
    </form>
</div>

</body>
</html>

"""

In [None]:
templateMD = """
Given the context:
"{document}"

Please generate an article for an online magazine using the following structure:

---
layout: post
title: "[LLM: Extract a suitable title from the context or craft one that best represents the main theme.]"
author: "[Your Name or LLM: Suggest an author name based on the context.]"
date: "[LLM: Insert today's date in 'YYYY-MM-DD' format.]"
---

![Header Image](URL_TO_YOUR_IMAGE)

# [LLM: Reiterate the blog title.]

**By [Author Name]**  
*Date [Month Day, Year]*

## Introduction
- [LLM: Briefly introduce the topic or the central theme extracted from the context.]
- [LLM: Share why this topic is relevant or timely for the readers.]

## [LLM: Extract or generate a subheading title from the context.]
- [LLM: Extract or write an interesting fact, anecdote, or quote from the context.]
- [LLM: Discuss the main points related to this subheading based on the context.]

## [LLM: Extract or generate another subheading title from the context.]
- [LLM: Dive deeper into the topic using the context, sharing more specifics.]
- [LLM: Consider using bullet points or numbered lists for clarity based on the context.]

## [LLM: Extract or generate another subheading title from the context.]
- [LLM: Discuss contrasting views, recent advancements, or personal experiences from the context.]
- [LLM: Highlight expert opinions or quotes from the context to build credibility.]

## Conclusion
- [LLM: Summarize the main points discussed in the article.]
- [LLM: Share your final thoughts or personal takeaways based on the context.]
- [LLM: End with a question or call-to-action (CTA) to engage readers in comments or sharing.]

---

#### Author Bio:
[LLM: Craft a short description of the author using the context or provide a generic background. Optionally, use links if mentioned in context.]  

---

### Related Posts
- [LLM: If there are any related articles mentioned in the context, list them here.]

---

**Leave a Comment Below:**

"""

### Finalmente ejecutas la llamada

In [62]:
template = templateMD
llm = OpenAI(model_name="gpt-3.5-turbo-0613")
# llm = OpenAI(model_name="gpt-4")
prompt = PromptTemplate(input_variables=["document"], template=template)
chain = LLMChain(llm=llm, prompt=prompt)
chain_result = chain.run(document=similarity_result)
print(chain_result)

<!DOCTYPE html>
<html>
<head>
    <title>The Importance of Sleep and its Impact on Brain Development</title>
</head>
<body>

<!-- Header Section -->
<div class="header">
    <h1>The Importance of Sleep and its Impact on Brain Development</h1>
    <h3>By SleepEnthusiast123</h3>
    <p>Date: October 10, 2022</p>
</div>

<!-- Main Content -->
<div class="main-content">

    <!-- Introduction -->
    <div class="section introduction">
        <h2>Introduction</h2>
        <p>Sleep is an essential aspect of our lives, and its significance goes beyond mere rest and relaxation. Recent research has shed light on the crucial role sleep plays in brain development and overall cognitive function. In this article, we will explore the various stages of sleep and delve into how they contribute to the growth and organization of our brains.</p>
        <p>Understanding the impact of sleep on brain development is particularly important for individuals seeking to optimize their mental capabilities and en