In [39]:
!pip install llama_index
!pip install transformers
!pip install google-generativeai
!pip install sentence-transformers
!pip install pathway
!pip install llama-index-retrievers-pathway



In [40]:
import os
import getpass
import pandas as pd
import pathway as pw
from sentence_transformers import SentenceTransformer
from pathway.xpacks.llm.vector_store import VectorStoreServer
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.retrievers.pathway import PathwayRetriever
import google.generativeai as genai
from google.colab import drive

In [41]:
drive.mount('/content/drive')

csv_path = '/content/drive/MyDrive/1_10_seasons_tbbt.csv'
df = pd.read_csv(csv_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
import os
import getpass
from google.colab import userdata

api_key = userdata.get('GOOGLE_API_KEY')

In [43]:
import pathway as pw

data_sources = []
data_sources.append(
    pw.io.fs.read(
        "./data",
        format="binary",
        mode="streaming",
        with_metadata=True,
    )
    # to track the files in the data directory
)

In [44]:
from sentence_transformers import SentenceTransformer
from typing import List

class CustomEmbedding:
    def __init__(self):
        self.model = SentenceTransformer('all-MiniLM-L6-v2')

    def embed_documents(self, documents: List[str]) -> List[List[float]]:
        return self.model.encode(documents).tolist()

    def embed_query(self, query: str) -> List[float]:
        return self.model.encode([query])[0].tolist()

# Instance of CustomEmbedding
embed_model = CustomEmbedding()



In [45]:
from google.colab import output
output.enable_custom_widget_manager()

Support for third party widgets will remain active for the duration of the session. To disable support:

In [46]:
from google.colab import output
output.disable_custom_widget_manager()

In [47]:
from pathway.xpacks.llm.vector_store import VectorStoreServer
from llama_index.core.node_parser import TokenTextSplitter
from pathway.xpacks.llm.vector_store import VectorStoreServer
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.retrievers.pathway import PathwayRetriever

In [48]:
!pip install llama-index-embeddings-huggingface



In [49]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import google.generativeai as genai

# Embedding model
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [50]:
transformations_example = [
    TokenTextSplitter(
        chunk_size=150,
        chunk_overlap=10,
        separator=" ",
    ),
    embed_model,
]

processing_pipeline = VectorStoreServer.from_llamaindex_components(
    *data_sources,
    transformations=transformations_example,
)

# Pathway host and port
PATHWAY_HOST = "127.0.0.1"
PATHWAY_PORT = 8754

# Running the server
processing_pipeline.run_server(
    host=PATHWAY_HOST, port=PATHWAY_PORT, with_cache=False, threaded=True
)

<Thread(VectorStoreServer, started 134363324872256)>

In [51]:
from llama_index.retrievers.pathway import PathwayRetriever

retriever = PathwayRetriever(host=PATHWAY_HOST, port=PATHWAY_PORT)

In [52]:
!pip install google-generativeai

from google.colab import userdata
import google.generativeai as genai

genai.configure(api_key=userdata.get('GOOGLE_API_KEY'))

# Use a different variable name to avoid overwriting the module
gemini_pro_model = genai.GenerativeModel('gemini-pro')

def generate_response(prompt, context):
    full_prompt = f"""You are simulating a conversation with Sheldon Cooper from the TV show 'The Big Bang Theory'.
    Use the following context to answer the question in Sheldon's characteristic style:

    Context: {context}

    Human: {prompt}

    Sheldon Cooper:"""

    # model variable
    response = gemini_pro_model.generate_content(full_prompt)

    # if no text
    if hasattr(response, 'text') and response.text:
        return response.text
    else:
        print("Warning: Model did not generate a response or response is not in expected format. Check prompt, context, and API setup.")  # Log the issue
        print("Response object:", response) # response object for debugging
        if hasattr(response, 'candidate') and hasattr(response.candidate, 'safety_ratings'):
            print("Safety ratings:", response.candidate.safety_ratings) # safety blocks
        return "I'm sorry, I don't have an answer to that." # default response



In [53]:
class CustomQueryEngine:
    def __init__(self, retriever):
        self.retriever = retriever

    def query(self, query_str):
        retrieved_nodes = self.retriever.retrieve(query_str)
        context = "\n".join([node.get_content() for node in retrieved_nodes])
        return generate_response(query_str, context)

query_engine = CustomQueryEngine(retriever)

In [54]:
def talk_to_sheldon(query):
    response = query_engine.query(query)
    return response

# Example
print(talk_to_sheldon("What's your opinion on String Theory?"))

String Theory? A fascinating concept, but ultimately a speculative and unproven hypothesis that has yet to gain widespread acceptance within the scientific community. It proposes the existence of tiny, vibrating strings as the fundamental building blocks of the universe, but lacks a clear experimental framework for testing its validity. While it offers certain mathematical elegancies, its lack of empirical verification leaves it, at present, as a theoretical construct with limited scientific traction.


In [None]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ['quit', 'exit', 'bye']:
        print("Goodbye!")
        break
    response = talk_to_sheldon(user_input)
    print("Sheldon:", response)

Sheldon: Greetings, puny human. It is I, the incomparable Sheldon Cooper. How may I assist you on this intellectually deficient planet?
