In [1]:
#!pip install colab-xterm
#%load_ext colabxterm

In [1]:
import requests
import chromadb
from chromadb.utils import embedding_functions
import json

In [None]:
# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")

# Define the embedding model (Use OpenAI's API)
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key="OPENAI-API-KEY", model_name="text-embedding-ada-002"
)

# Create or retrieve the ChromaDB collection
collection = chroma_client.get_or_create_collection(name="knowledge_base", embedding_function=openai_ef)

def fetch_ipfs(cid):
    """Retrieve a file from IPFS given its CID."""
    url = f"https://ipfs.io/ipfs/{cid}"
    response = requests.get(url)
    if response.status_code == 200:
        print(f"✅ Successfully fetched file from IPFS (CID: {cid})")
        return response.text
    else:
        print(f"⚠️ Error fetching IPFS CID {cid}")
        return None

# Example CID (Replace with your actual CID)
ipfs_cid = "bafkreih2myueo4kzqnhc3kltxw3vt3c3ir6tts7smlqwaex4izmkutf5zq"

# Fetch document from IPFS
document_text = fetch_ipfs(ipfs_cid)

if document_text:
    # Store the document in ChromaDB
    doc_id = f"ipfs_{ipfs_cid}"  # Unique ID for the document
    collection.add(ids=[doc_id], documents=[document_text], metadatas=[{"source": "ipfs", "cid": ipfs_cid}])
    print("✅ Document from IPFS stored in ChromaDB.")
else:
    print("❌ Failed to retrieve document from IPFS.")




✅ Successfully fetched file from IPFS (CID: bafkreih2myueo4kzqnhc3kltxw3vt3c3ir6tts7smlqwaex4izmkutf5zq)




✅ Document from IPFS stored in ChromaDB.


In [None]:
## Test: Retrieve docs from Database to make sure CIDS were stored correctly
def retrieve_relevant_docs(query, top_k=3):
    """Fetch relevant documents from ChromaDB for a given query."""
    results = collection.query(query_texts=[query], n_results=top_k)
    return results["documents"][0] if results["documents"] else []

# Example query
query_text = "¿Qué información contiene el archivo de IPFS?"
retrieved_docs = retrieve_relevant_docs(query_text)

# Print results
print(f"🔎 Relevant documents found: {retrieved_docs}")



🔎 Relevant documents found: ['esta es una propuesta', 'Titulo de novela "Phiro"\r\nPhiro\r\nUna caja de fósforos que tenia veinticinco fósforos dentro, tenía uno llamado Phiro el cual estaba debajo de los demás en la cajetilla de fósforos y ansiaba saber que pasaría cuando lo sacaran, pasaba cada día esperando en la eterna oscuridad viendo como sacaban uno a uno a sus hermanos, hasta que un día por fin lo sacaron y todo estaba igual de oscuro que dentro de la caja pero cuando lo pasaron por la caja… por unos breves segundos fue feliz ya que acabó con toda la oscuridad que había a su alrededor e ilumino el rostro feliz de una niña.\r\n']


In [None]:
### Test
import os
import time
import psutil
import requests
import chromadb
from chromadb.utils import embedding_functions
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from pyngrok import ngrok
import uvicorn
from threading import Thread
import ollama

# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key="OPENAI-API-KEY", model_name="text-embedding-ada-002"
)
collection = chroma_client.get_or_create_collection(name="knowledge_base", embedding_function=openai_ef)

# Define FastAPI app
app = FastAPI()

# Request schemas
class ChatRequest(BaseModel):
    messages: list
    model: str = "mistral"
    temperature: float = 0.7

class AddCIDRequest(BaseModel):
    cid: str

# Helper functions
def is_port_in_use(port=8000):
    """Check if a port is in use."""
    for proc in psutil.process_iter(attrs=["connections"]):
        for conn in proc.info["connections"] if proc.info["connections"] else []:
            if conn.laddr.port == port:
                return True
    return False

def fetch_ipfs(cid):
    """Retrieve a file from IPFS given its CID."""
    url = f"https://ipfs.io/ipfs/{cid}"
    response = requests.get(url)
    if response.status_code == 200:
        print(f"✅ Successfully fetched file from IPFS (CID: {cid})")
        print(f"TEXTO DOCUMENTO {str(response.text)}")
        return response.text
    else:
        print(f"⚠️ Error fetching IPFS CID {cid}")
        return None

# Endpoints
@app.post("/api/v1/add_cid")
def add_cid(request: AddCIDRequest):
  try:
    cid = request.cid
    stored_cids = collection.get()["metadatas"]
    stored_cids = [item["cid"] for item in stored_cids] if stored_cids else []
    if cid in stored_cids:
        return {"status": "❌ CID already exists in ChromaDB", "cid": cid}

    document_text = fetch_ipfs(cid)
    if document_text:
        doc_id = f"ipfs_{cid}"
        collection.add(ids=[doc_id], documents=[document_text], metadatas=[{"source": "ipfs", "cid": cid}])
        return {"status": "✅ CID added to ChromaDB", "cid": cid}
    else:
        raise HTTPException(status_code=404, detail="Failed to fetch CID from IPFS")
  except Exception as e:
    print(f"Error en CID no esperado {e}")

@app.post("/api/v1/chat")
def chat(request: ChatRequest):
    query = request.messages[-1]["content"]  # Extraer la consulta del usuario
    stored_docs = collection.count()  # Contar documentos en la colección de ChromaDB
    top_k = min(2, stored_docs)  # Limitar el número de resultados relevantes

    # Generar el contexto desde ChromaDB
    if top_k == 0:
        context = "No relevant data found."
    else:
        # Consultar ChromaDB
        results = collection.query(query_texts=[query], n_results=top_k)
        context = "\n".join(results["documents"][0]) if results["documents"] else "No relevant data found."

    # Añadir el contexto como información del sistema
    request.messages.insert(0, {"role": "system", "content": f"Use the following information to answer:\n{context}"})

    # Llamar a Ollama para generar la respuesta basada en los mensajes y el contexto
    try:
        response = ollama.chat(
            model=request.model,
            messages=request.messages,
            options={"temperature": request.temperature}
        )
        return {"response": response["message"]["content"]}
    except Exception as e:
        return {"error": str(e)}

In [None]:
# Server and ngrok functions
def start_uvicorn():
    """Run Uvicorn directly."""
    if is_port_in_use(8000):
        print("🚀 Restarting Uvicorn...")
    else:
        print("✅ Uvicorn is starting.")
        uvicorn.run(app, host="0.0.0.0", port=8000)

def restart_ngrok():
    """Keep ngrok tunnel active."""
    while True:
        if is_port_in_use(8000):
            try:
                print("🔄 Restarting ngrok...")
                os.system("pkill -f ngrok")
                time.sleep(3)
                global public_url
                public_url = ngrok.connect(8000).public_url
                print(f"🔥 Public API URL: {public_url}")
            except Exception as e:
                print(f"⚠️ Error restarting ngrok: {e}")
        else:
            print("❌ Uvicorn is not running. Fix it before restarting ngrok.")
            start_uvicorn()
        time.sleep(1800)  # Renew every 30 minutes

# Run server and ngrok
if __name__ == "__main__":
    # Start Uvicorn in a separate thread
    Thread(target=start_uvicorn).start()
    time.sleep(5)  # Wait for Uvicorn to start

    # Start ngrok if Uvicorn is running
    if is_port_in_use(8000):
        print("🔄 Starting ngrok...")
        public_url = ngrok.connect(8000).public_url
        print(f"🔥 Public API URL: {public_url}")
    else:
        print("❌ Uvicorn failed to start. Check manually.")

    # Keep renewing ngrok in a separate thread
    Thread(target=restart_ngrok).start()

    # Keep the session alive
    while True:
        print("🔄 Keeping the Google Colab session alive...")
        time.sleep(600)

🚀 Restarting Uvicorn...
🔄 Starting ngrok...
🔥 Public API URL: https://e67c-35-243-235-7.ngrok-free.app
🔄 Keeping the Google Colab session alive...
🔄 Restarting ngrok...
🔥 Public API URL: https://7cb4-35-243-235-7.ngrok-free.app
INFO:     2806:2f0:9261:cc05:585e:ebc8:cc45:2326:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     2806:2f0:9261:cc05:585e:ebc8:cc45:2326:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     2806:2f0:9261:cc05:585e:ebc8:cc45:2326:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     2806:2f0:9261:cc05:585e:ebc8:cc45:2326:0 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     2806:2f0:9261:cc05:585e:ebc8:cc45:2326:0 - "POST /api/v1/chat HTTP/1.1" 200 OK
INFO:     2806:2f0:9261:cc05:585e:ebc8:cc45:2326:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     2806:2f0:9261:cc05:585e:ebc8:cc45:2326:0 - "GET /docs HTTP/1.1" 200 OK
