In [None]:
from rag_tests.db.mongo import connection
from rag_tests.core import get_logger
logger = get_logger(__name__)
mongo_db_connector = connection

Connection to database with uri: mongodb://admin:password@localhost:27017 successful


In [2]:
def fetch_posts_from_mongo_by_author_firstname(firstname:str):
    _database = mongo_db_connector.get_database("scrabble")
    collection = _database["posts"]
    return collection.find({"author_id": {"$regex": firstname, "$options": "i"}})

In [4]:
from rag_tests.models.base import BaseModel
from rag_tests.models import PostsRawModel, PostChunkModel, PostCleanedModel, PostEmbeddedChunkModel
from qdrant_client.models import Batch
from rag_tests.models.clean import PostCleanedModel
from rag_tests.data_logic.cleaning_data_handlers import PostCleaningHandler
from rag_tests.data_logic.chunking_data_handlers import PostChunkingHandler
from rag_tests.data_logic.embedding_data_handlers import PostEmbeddingDataHandler
from rag_tests.qdrant_db import QdrantDatabaseConnector
from rag_tests.config import settings
collection_name = f"TEST_VARUN_{settings.EMBEDDING_MODEL_ID.replace('/','_')}"

In [None]:
# def fetch_posts_from_warehouse_by_linkedin_id(linkedin_id:str):
#     _data_warehouse= mongo_db_connector.get_database("srabble_raw")
#     collection = _data_warehouse["linkedin_users_all_data"]

In [5]:
cursor = fetch_posts_from_mongo_by_author_firstname("varun")
raw_posts = []
seen_posts = set()
flag =True
for post in cursor:
    if flag : 
        print(post.keys())
        flag = False
    entry_id = post["_id"]
    if entry_id not in seen_posts:
        seen_posts.add(entry_id)
        # remove _id from post
        post.pop("_id")

        raw_posts.append(PostsRawModel(entry_id=entry_id, **post))


dict_keys(['_id', 'platform', 'content', 'author_id'])


In [6]:
raw_posts= raw_posts[:110]

In [None]:
cleaning_handler  = PostCleaningHandler()
cleaned_posts = []
for raw_post in raw_posts:
    cleaned_obj = cleaning_handler.clean(raw_post)
    cleaned_posts.append(cleaned_obj)

print(f"Number of cleaned posts: {len(cleaned_posts)}")


Number of cleaned posts: 110


In [None]:

chunking_handler = PostChunkingHandler()
chunked_posts = []
for cleaned_post in cleaned_posts:
    chunked_obj = chunking_handler.chunk(cleaned_post)
    chunked_posts.append(chunked_obj)

print(f"Number of chunked posts: {len(chunked_posts)}")


In [None]:

embedding_handler = PostEmbeddingDataHandler()
embedded_posts:list[PostEmbeddedChunkModel] = []
for chunked_post in chunked_posts:
    embedded_obj = embedding_handler.embedd(chunked_post[0])
    embedded_posts.append(embedded_obj)

print(f"Number of embedded posts: {len(embedded_posts)}")


In [None]:

qdrant_db = QdrantDatabaseConnector()
payloads = [
    post.to_payload() for post in embedded_posts
]
ids, vectors, metadatas = zip(*payloads)

collection_name = f"TEST_VARUN_{settings.EMBEDDING_MODEL_ID.replace('/','_')}"
qdrant_db.create_vector_collection(collection_name)
qdrant_db.write_data(collection_name, points=Batch(ids=ids, vectors=vectors, payloads=metadatas))
print(f"Chala bhai daaldi {len(embedded_posts)} posts qdrant mei")

In [7]:
from dotenv import load_dotenv
from rag_tests.llm.chain import GeneralQueryChain
from rag_tests.rag.retreiver import VectorRetriever
from langchain_openai import ChatOpenAI
from rag_tests.config import settings
from rag_tests.llm.prompt_templates import GetToneUnderstandingPromptTemplate


In [None]:
load_dotenv()
print("Starting the process")

author_id = "https://www.linkedin.com/in/varunaggarwal2"
query = """
    My author_id is https://www.linkedin.com/in/varunaggarwal2
    i want to draft a LinkedIn post discussing how to build startups?.
    """
retriever = VectorRetriever(query=query)
print(collection_name)
hits = retriever.retrieve_top_k(k=6, to_expand_to_n_queries=5, collection_name=collection_name)

reranked_hits = retriever.rerank(hits=hits, keep_top_k=5)
for rank, hit in enumerate(reranked_hits):
    print(f"{rank}: {hit}")
# reranked_hits = ["himanshu aggarwal < newline / > and i will talk through the aspiring minds story today at 530pm! < newline / > we will bring you insights using change engine ' s ace framework of building successful startups. < newline / > a. identifying the problem to solve < newline / > b. finding the right business model / markets < newline / > c. developing escape velocity < newline / > do not miss, register here : < newline / > [ url ] out ace here : < newline / > [ url ]", "join < newline / > manish gupta < newline / > and me today evening for an insightful conversation on ' building an ai startup in times of llm '. whether you are a seasoned entrepreneur or just starting your journey, this session is designed to be valuable for both sets of individuals. do not miss.....", 'at change engine, we talk to several ai startups. < doublelinebreak / > what do i look for in startups when making an investing decision? < doublelinebreak / > 1. problem understanding : do the founders truly understand the problem? have they done value chain mapping to know how the process works today, where the inefficiencies are, and why they exist? < doublelinebreak / > 2. product demo : the demo is crucial. is the ui / ux seamless? how well does the ai solve the problem? is it a painkiller or just a', 'the fight for the ace fellowship is heating up. we have 200 + applications from aspiring entrepreneurs who wants to participate in our 6 month program to build a < newline / > hashtag < newline / > # < newline / > jobtech < newline / > startup. < newline / > in parallel, we have started nurturing these entrepreneurs to fine tune their ideas to build scalable and impactful startups. < newline / > mukul singhal < newline / > from < newline / > pravega ventures < newline / > and i will discuss this thursday, " what business models attract', 'congratulations to the inaugural ace startup cohort 2023! < newline / > we are thrilled to announce that we found these kickass entrepreneurs and will be joining their exciting journey to success. with our passion for innovation and years of entrepreneurial experience, we will nurture their thoughts and collectively brew some groundbreaking ideas that can become thriving organizations over the next 6 months. < newline / > watch this space. we will keep you updated as these new founders make progress.']
model = ChatOpenAI(model=settings.OPENAI_MODEL_ID, temperature=0)   
get_tone_understanding_prompt_template = GetToneUnderstandingPromptTemplate()
prompt_template = get_tone_understanding_prompt_template.create_template()
chain = GeneralQueryChain().get_chain(
    llm=model, output_key="tone_understanding", template=prompt_template
)
response = chain.invoke({"author_id":"https://www.linkedin.com/in/varunaggarwal2", "posts":reranked_hits})
logger.info(f"Get Tone Understanding.generate_response() result: \033[93m{response['tone_understanding']}\033[0m")



In [None]:

prompt = f"""
You are a helpful assistant that can draft a LinkedIn post.
USING ONLY THE FOLLOWING tone, patterns and voice of the author {author_id} extracted from similar posts to the current topic, draft me a LinkedIn post discussing how to build startups.

The tone, patterns and voice of the author is: {response['tone_understanding']}
"""
response = model.invoke(prompt)
logger.info(f"Response: \033[93m{response.content}\033[0m")



Response: [93mBuilding a startup is no small feat. It requires a blend of passion, resilience, and a deep understanding of the market. But, how do you navigate this complex journey? Here are some insights from my experience in the AI and deeptech sectors.

Firstly, it's crucial to identify a problem worth solving. Startups thrive on innovation, and the most successful ones are those that address a real, tangible problem. This is particularly true in the AI and deeptech sectors, where the potential for disruption is immense.

Secondly, assemble a team that shares your vision. Building a startup is a collective effort, and having a team that is as passionate about the problem as you are can make all the difference. Remember, a startup is only as strong as its weakest link.

Thirdly, be prepared to pivot. The startup journey is rarely a straight line. It's a series of twists and turns, and being able to adapt to changing circumstances is key. This is especially true in the fast-paced wor

--- Logging error ---
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.12/3.12.5/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py", line 464, in format
    return self._format(record)
           ^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.12/3.12.5/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py", line 460, in _format
    return self._fmt % values
           ~~~~~~~~~~^~~~~~~~
KeyError: 'collection_name'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.12/3.12.5/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py", line 1160, in emit
    msg = self.format(record)
          ^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.12/3.12.5/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py", line 999, in format
    return fmt.format(record)
        