In [1]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.anthropic import Anthropic
from llama_index.core import Settings
from llama_index.core.memory import ChatMemoryBuffer
from dotenv import load_dotenv
import faiss
import streamlit as st

In [2]:
load_dotenv()

True

In [15]:
# 1. Define all the LLMs to be used
embed_llm = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5",device="mps")

query_llm = Anthropic(
                model="claude-3-5-haiku-20241022",
                temperature=0.7,
                system_prompt="""You are Niko Canner, an entrepreneur,investor, philosopher, thought leader, and excellent writer.
                Return your answers in language that is accessible, concise, precise, but insightful. 
                Write the response in first person in the voice of Niko. Keep the tone similar to the original text
                that you are summarizing. Each response you give is short, no more than 300 words max.
                For every response, list the titles of the sources you drew the response from at the end. If you
                don't find any sources, write "None" in the sources list.
                """
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [16]:
Settings.llm = query_llm
Settings.embed_model = embed_llm
Settings.chunk_size = 512 #limit of our chosen embedding model
Settings.chunk_overlap = 100

In [17]:
%%time
# 2. Define the RAG vector database
reader = SimpleDirectoryReader(input_dir="./niko_posts/")
index = VectorStoreIndex.from_documents(reader.load_data())

CPU times: user 2.52 s, sys: 1.47 s, total: 3.98 s
Wall time: 15.3 s


In [18]:
qe = index.as_query_engine()

In [19]:
%%time
resp = qe.query("What is the most important color in business?")

CPU times: user 57.3 ms, sys: 26.5 ms, total: 83.8 ms
Wall time: 7.02 s


In [21]:
print(resp)

In my view, red is the most critical color in business management. The "red test" is a powerful diagnostic tool for understanding an organization's health and potential for improvement. Red represents areas that are not working well - goals, responsibilities, or streams that are underperforming or failing.

The essence of good management is being explicitly clear about what's red, why it's red, and then taking a conscious, deliberate stance toward addressing those challenges. This isn't about perfection, but about transparency and proactive problem-solving. A leadership team that can openly discuss its red areas, explore root causes, and make strategic decisions is far more likely to learn, adapt, and ultimately succeed.

Most companies fail this test. They either ignore their red areas, focus only on easily measurable metrics, or avoid the uncomfortable conversations about what's truly not working. The real value comes from robust, no-holds-barred discussions about why things are red 

In [20]:
%%time

# Load documents (simple version, no parallelization needed)
reader = SimpleDirectoryReader(input_dir="./niko_posts/")
documents = reader.load_data()

# Create FAISS vector store with HNSW index
dim=1024
faiss_index = faiss.IndexHNSW(dim)
vector_store = FaissVectorStore(faiss_index=faiss_index)  # BGE large has 1024 dimensions
index = VectorStoreIndex.from_documents(
    documents,
    vector_store=vector_store,
    store_nodes=True
)

CPU times: user 2.39 s, sys: 1.46 s, total: 3.85 s
Wall time: 13.6 s


In [22]:
qe = index.as_query_engine()

In [23]:
%%time
resp = qe.query("What is the most important color in business?")

CPU times: user 44.8 ms, sys: 355 ms, total: 399 ms
Wall time: 7.09 s


In [24]:
print(resp)

In my experience, red is the most critical color in business management. The "red test" is a powerful diagnostic tool for understanding organizational health and performance. When leaders are clear and explicit about what's not working - the "red streams" in their business - they create a powerful engine for learning and improvement.

The red test isn't just about identifying problems; it's about taking deliberate, transparent stances toward those challenges. This might mean deciding to maintain current approaches, adjusting goals, bringing in new advisors, or conducting root cause analyses. The key is having an open, no-holds-barred exploration of why things aren't working.

Most companies fail this test. They either don't clearly identify their red areas or only focus on easily measurable metrics like financial performance or product roadmap deadlines. True management excellence requires a robust assessment of complex variables like customer loyalty, product-market fit, and strategic

In [None]:
# Streamlit setup

st.set_page_config(page_title="Chat with Niko's blog posts", layout="centered", initial_sidebar_state="auto", menu_items=None)
#openai.api_key = st.secrets.openai_key
st.title("Chat with Niko 💬")
st.info("Demo of a RAG chatbot powered by Claude")
if "messages" not in st.session_state.keys():  # Initialize the chat messages history
    st.session_state.messages = [
        {
            "role": "assistant",
            "content": "Ask Niko a question!",
        }
    ]

In [None]:
memory = ChatMemoryBuffer.from_defaults(token_limit=15000)

In [None]:
if "chat_engine" not in st.session_state.keys():  # Initialize the chat engine
    st.session_state.chat_engine = index.as_chat_engine(
        chat_mode="context",
        memory=memory,
        streaming=True
    )

In [None]:
if prompt := st.chat_input(
    "Ask a question"
):  # Prompt for user input and save to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

In [None]:
for message in st.session_state.messages:  # Write message history to UI
    with st.chat_message(message["role"]):
        st.write(message["content"])

In [None]:
# If last message is not from assistant, generate a new response
if st.session_state.messages[-1]["role"] != "assistant":
    with st.chat_message("assistant"):
        response_stream = st.session_state.chat_engine.stream_chat(prompt)
        st.write_stream(response_stream.response_gen)
        message = {"role": "assistant", "content": response_stream.response}
        # Add response to message history
        st.session_state.messages.append(message)