In [1]:
import os
from PIL import Image
import requests
from transformers import BlipProcessor, BlipForConditionalGeneration

def generate_caption(image_name):
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

    # Path to the image in the images directory
    img_path = os.path.join('docs/images', image_name)
    raw_image = Image.open(img_path).convert('RGB')

    # conditional image captioning
    text = "a photography of"
    inputs = processor(raw_image, text, return_tensors="pt")

    out = model.generate(**inputs)
    print( processor.decode(out[0], skip_special_tokens=True))

    # unconditional image captioning
    inputs = processor(raw_image, return_tensors="pt")

    out = model.generate(**inputs)
    return processor.decode(out[0], skip_special_tokens=True)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

# ------------- Retrieval-Augmented Generation  ------------- #

def get_docs():
    """
    Loads each file into one document, like knowledge base
    :return: docs
    """

    loader = DirectoryLoader("docs/lyrics", "*.txt", loader_cls=TextLoader)  # Reads custom data from local files

    docs = loader.load()
    return docs

def split_text(docs):
    """
    Get chunks from docs. Our loaded doc may be too long for most models, and even if it fits is can struggle to find relevant context. So we generate chunks
    :param docs: docs to be split
    :return: chunks
    """

    text_splitter = RecursiveCharacterTextSplitter( # recommended splitter for generic text
        chunk_size=250,
        chunk_overlap=25,
        add_start_index=True
    )
    chunks = text_splitter.split_documents(docs)

    return chunks

def get_data_store(chunks, batch_size=1000):
    """
    Store chunks into a db. ChromaDB uses vector embeddings as the key, creates a new DB from the documents
    :param docs:
    :param chunks:
    :return: database
    """
    embeddings = HuggingFaceEmbeddings( #  embedding=OpenAIEmbeddings() rate limit
        model_name='sentence-transformers/all-MiniLM-L6-v2',
        model_kwargs={'device': 'cpu'} # TODO gpu
    )

    db = None
    for i in range(0, len(chunks), batch_size):
        batch = chunks[i:i + batch_size]
        if db is None:
            db = Chroma.from_documents(documents=batch, embedding=embeddings)
        else:
            db.add_documents(batch)

    return db

In [3]:
import os, sys, warnings
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

docs = get_docs()           # Load custom files
chunks = split_text(docs)   # Split into chunks
db = get_data_store(chunks) # Generate vectorstore

  embeddings = HuggingFaceEmbeddings( #  embedding=OpenAIEmbeddings() rate limit


In [4]:
from langchain_community.llms import HuggingFaceHub
from langchain.chains import RetrievalQA

def generate_response(db, prompt):
    """
    Generate a response with a LLM based on previous custom context
    :return: chatbot response
    """

    hf_llm = HuggingFaceHub(
        repo_id="HuggingFaceH4/zephyr-7b-beta",  # Model id
        task="text-generation",                  # Specific task the model is intended to perform
        model_kwargs={
            "max_new_tokens": 512,               # The maximum number of tokens to generate in the response.  Limits the length of the generated text to ensure responses are concise or fit within certain constraints.
            "top_k": 30,                         # Limits the sampling pool to the top k tokens, increasing focus on more likely tokens
            "temperature": 0.3,                  # Controls the randomness of predictions, with lower values making the output more deterministic. : Produces more focused and less random text by making the model more confident in its choices.
            "repetition_penalty": 1.2,           # Penalizes repeated tokens to avoid repetitive output.  Discourages the model from repeating the same token sequences, resulting in more varied and natural text.
        },
    )

    chain = RetrievalQA.from_chain_type( # Generate chat model based on previous llm
        llm=hf_llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="mmr", search_kwargs={"k": 10}),
        verbose=False
    )

    response = chain.run(prompt)
    return response

In [5]:
def postprocess_response(response):
    answer_start = response.find("Helpful Answer: ")
    if answer_start != -1:
        answer = response[answer_start + len("Helpful Answer: "):].strip()
    else:
        answer = response.strip()

    return answer

In [6]:
def query_rewriting(response):
    answer_start = response.find("araf")
    if answer_start != -1:
        answer = response[answer_start + len("araful"):].strip()
    else:
        answer = response.strip()

    return answer

In [9]:
print("Chatbot: Hola! Por favor inserte el nombre de la imagen que quieras usar para escribir una canción. Las imágenes se encuentran dentro de la carpeta /docs/. Escribe done cuando termines.")
#user_input = ""
user_input = input("You: ")
cap = ""
while user_input != "done":
    cap += query_rewriting(generate_caption(user_input))
    user_input = input("You: ")

response = generate_response(db, f"Write a song lyrics about \"{cap}\" having intro, verse 1, chorus 1, verse 2, chorus 2, bridge, chorus 3, outro. Ignore the words intro, verse, chorus and outro.")
postprocess_response(response)
print(f"Chatbot: {(response)}")

Chatbot: Hola! Por favor inserte el nombre de la imagen que quieras usar para escribir una canción. Las imágenes se encuentran dentro de la carpeta /docs/. Escribe done cuando termines.




a photography of a busy city street with taxis and pedestrians
a photography of a lake with a mountain in the background


FileNotFoundError: [Errno 2] No such file or directory: '/Users/milagros.cordinez/Documents/Facultad/image-lyrics/docs/images/done'