In [1]:
import os
from PIL import Image
import requests
from transformers import BlipProcessor, BlipForConditionalGeneration

def generate_caption(image_name):
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

    # Path to the image in the images directory
    img_path = os.path.join('docs/images', image_name)
    raw_image = Image.open(img_path).convert('RGB')

    # conditional image captioning
    text = "a photography of"
    inputs = processor(raw_image, text, return_tensors="pt")

    out = model.generate(**inputs)
    print( processor.decode(out[0], skip_special_tokens=True))

    # unconditional image captioning
    inputs = processor(raw_image, return_tensors="pt")

    out = model.generate(**inputs)
    return processor.decode(out[0], skip_special_tokens=True)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

# ------------- Retrieval-Augmented Generation  ------------- #

def get_docs():
    """
    Loads each file into one document, like knowledge base
    :return: docs
    """

    loader = DirectoryLoader("docs/lyrics", "*.txt", loader_cls=TextLoader)  # Reads custom data from local files

    docs = loader.load()
    return docs

def split_text(docs):
    """
    Get chunks from docs. Our loaded doc may be too long for most models, and even if it fits is can struggle to find relevant context. So we generate chunks
    :param docs: docs to be split
    :return: chunks
    """

    text_splitter = RecursiveCharacterTextSplitter( # recommended splitter for generic text
        chunk_size=250,
        chunk_overlap=25,
        add_start_index=True
    )
    chunks = text_splitter.split_documents(docs)

    return chunks

def get_data_store(chunks, batch_size=1000):
    """
    Store chunks into a db. ChromaDB uses vector embeddings as the key, creates a new DB from the documents
    :param docs:
    :param chunks:
    :return: database
    """
    embeddings = HuggingFaceEmbeddings( #  embedding=OpenAIEmbeddings() rate limit
        model_name='sentence-transformers/all-MiniLM-L6-v2',
        model_kwargs={'device': 'cpu'} # TODO gpu
    )

    db = None
    for i in range(0, len(chunks), batch_size):
        batch = chunks[i:i + batch_size]
        if db is None:
            db = Chroma.from_documents(documents=batch, embedding=embeddings)
        else:
            db.add_documents(batch)

    return db

In [3]:
import os, sys, warnings
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

docs = get_docs()           # Load custom files
chunks = split_text(docs)   # Split into chunks
db = get_data_store(chunks) # Generate vectorstore

  embeddings = HuggingFaceEmbeddings( #  embedding=OpenAIEmbeddings() rate limit


In [4]:
N_RETRIEVED_CHUNKS = 10

In [5]:
from langchain_community.llms import HuggingFaceHub
from langchain.chains import RetrievalQA

def generate_response(db, prompt):
    """
    Generate a response with a LLM based on previous custom context
    :return: chatbot response
    """

    hf_llm = HuggingFaceHub(
        repo_id="HuggingFaceH4/zephyr-7b-beta",  # Model id
        task="text-generation",                  # Specific task the model is intended to perform
        model_kwargs={
            "max_new_tokens": 512,               # The maximum number of tokens to generate in the response.  Limits the length of the generated text to ensure responses are concise or fit within certain constraints.
            "top_k": 30,                         # Limits the sampling pool to the top k tokens, increasing focus on more likely tokens
            "temperature": 0.3,                  # Controls the randomness of predictions, with lower values making the output more deterministic. : Produces more focused and less random text by making the model more confident in its choices.
            "repetition_penalty": 1.2,           # Penalizes repeated tokens to avoid repetitive output.  Discourages the model from repeating the same token sequences, resulting in more varied and natural text.
        },
    )

    chain = RetrievalQA.from_chain_type( # Generate chat model based on previous llm
        llm=hf_llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="mmr", search_kwargs={"k": N_RETRIEVED_CHUNKS}),
        verbose=False,
    )

    response = chain.run(prompt)
    return response

In [6]:
def get_retrieved_chunks(response, retriever_k):
    chosen_chunks = response.split('\n\n')
    return chosen_chunks[1:retriever_k]

In [7]:
def postprocess_response(response):
    retrived_chunks = get_retrieved_chunks(response, N_RETRIEVED_CHUNKS)
    answer_start = response.find("Helpful Answer: ")
    if answer_start != -1:
        answer = response[answer_start + len("Helpful Answer: "):].strip()
    else:
        answer = response.strip()

    return answer, retrived_chunks

In [8]:
def query_rewriting(response):
    answer_start = response.find("araf")
    if answer_start != -1:
        answer = response[answer_start + len("araful"):].strip()
    else:
        answer = response.strip()

    return answer

In [14]:
print("Chatbot: Hola! Por favor inserte el nombre de la imagen que quieras usar para escribir una canción. Las imágenes se encuentran dentro de la carpeta /docs/. Escribe done cuando termines.")
#user_input = ""
user_input = input("You: ")
cap = ""
while user_input != "done":
    cap += query_rewriting(generate_caption(user_input))
    user_input = input("You: ")

response = generate_response(db, f"Write a song lyrics about \"{cap}\" having intro, verse 1, chorus 1, verse 2, chorus 2, bridge, chorus 3, outro. Ignore the words intro, verse, chorus and outro.")
out, chunks = postprocess_response(response)
print(f"Chatbot: {out}")

Chatbot: Hola! Por favor inserte el nombre de la imagen que quieras usar para escribir una canción. Las imágenes se encuentran dentro de la carpeta /docs/. Escribe done cuando termines.




a photography of a lake with a mountain in the background
Chatbot: Intro:
Amidst the crisp autumn air,
The mountain stands tall and proud,
A tranquil lake nestled at its base,
Red leaves dancing in the wind like a crowd. Verse 1:
As I gaze upon that distant sight,
My heart skips a beat or two,
Memories flood back in an instant,
Of times when life was brand new. Chorus 1:
Oh, how the beauty takes my breath away,
This scene etched deep within my soul,
With every passing moment,
It grows more radiant, more whole. Verse 2:
Walking along the shoreline's edge,
Leaves rustling underfoot,
Whispers echoing off the stillness,
Nature's symphony, pure and moot. Chorus 2:
Here where time seems to slow,
Each step taken is a gift,
For in these moments, nothing else matters,
Only the mountain, the lake, and the shift. Bridge:
Letting go of what once held sway,
Embracing change with open arms,
Surrendering to the ebb and flow,
Of nature's rhythmic charms. Chorus 3:
May this place always hold such power

In [11]:
chunks

['[Intro]\nPlease, please tell me now\nPlease, please tell me now\nPlease, please tell me now\nPlease, please tell me now\n[Verse 1]\nI made a break, I run out yesterday\nTried to find my mountain hideaway\nMaybe next year, maybe no go',
 "Mountains come out of the sky and they stand there\nOne mile over we'll be there and we'll see you\nTen true summers we'll be there and laughing tooTwenty four before my love you'll see\nI'll be there with you\n[Outro with Vocalizations]",
 "[Bridge]\n(Hoo hoo hoo)\n[Verse 2]Reach your hand down into the cooler\nDon't drink it if the mountains aren't blueTry to keep it steady as you recline on your black inner tube\n[Chorus]\n[Bridge]\n(Hoo hoo hoo)\n(Hoo hoo hoo)",
 "Cause I'm counting on\nA new beginning, a reason for living\nA deeper meaning, yeah\n[Chorus]\nI want to stand with you on a mountain\nI want to bathe with you in the sea\nI want to lay like this forever\nUntil the sky falls down on me",
 'There are hills and mountains between us\nAlway