<a href="https://colab.research.google.com/github/sachincredible9/Python_basics/blob/main/newSQL_Rag.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install langchain langchain-community langchain-huggingface streamlit chromadb
!pip install ctransformers[cuda]
!pip install pyngrok
!pip install sqlalchemy



In [2]:
import os
import shutil
# Removed: from huggingface_hub import HfApi, hf_hub_cache_dir # Changed get_hf_cache_home to hf_hub_cache_dir

# Instantiate HfApi (not strictly needed for cache clearing, but kept for context if user adds more HfApi calls)
# api = HfApi()

# Get cache directory path directly
cache_dir = os.path.expanduser('~/.cache/huggingface/hub')

print(f"Hugging Face cache directory: {cache_dir}")

# Remove the cache directory (use with caution, this will delete all downloaded models)
if os.path.exists(cache_dir):
    shutil.rmtree(cache_dir)
    print("Hugging Face cache cleared successfully.")
else:
    print("Hugging Face cache directory not found.")

Hugging Face cache directory: /root/.cache/huggingface/hub
Hugging Face cache cleared successfully.


In [3]:
# After running the above cell and ensuring you are logged in,
# you should re-run the cell that starts the Streamlit app to try downloading the model again.
# If the issue persists, there might be a problem with the specific model file or network connectivity.


from huggingface_hub import login
from google.colab import userdata

# Retrieve the Hugging Face token from Colab secrets
hf_token = userdata.get('HF_TOKEN')

if hf_token:
    login(token=hf_token)
    print("Successfully logged in to Hugging Face Hub.")
else:
    print("Hugging Face token (HF_TOKEN) not found in Colab secrets. Please add it.")

Successfully logged in to Hugging Face Hub.


In [12]:
%%writefile app.py
import streamlit as st
from langchain_community.llms import CTransformers
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import create_sql_agent
# FIX: Use the generic import to avoid "ModuleNotFoundError"
# from langchain.agents import AgentType
import os
import tempfile

# 1. Page Config
st.set_page_config(page_title="TinyLlama SQL Chat", layout="centered")
st.title("âš¡ Fast SQL Chat with TinyLlama")

# 2. Sidebar
st.sidebar.header("Configuration")
uploaded_file = st.sidebar.file_uploader("Upload your .sqlite file", type=["sqlite", "db"])

# 3. Model Loading (With STOP TOKENS to prevent hanging)
@st.cache_resource
def load_llm():
    llm = CTransformers(
        model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
        model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
        model_type="llama",
        gpu_layers=50,
        config={
            'max_new_tokens': 256,   # Lower limit for speed
            'context_length': 2048,
            'temperature': 0.0,      # Precise answers
            # FIX: Stop tokens prevent the model from rambling
            'stop': ["Observation:", "\nObservation:", "Observation"]
        }
    )
    return llm

try:
    llm = load_llm()
except Exception as e:
    st.error(f"Model Load Error: {e}")

# 4. Main App
if uploaded_file and 'llm' in locals():
    with tempfile.NamedTemporaryFile(delete=False, suffix=".sqlite") as tmp_file:
        tmp_file.write(uploaded_file.getvalue())
        tmp_file_path = tmp_file.name

    db_uri = f"sqlite:///{tmp_file_path}"
    db = SQLDatabase.from_uri(db_uri)
    st.success("Database Connected!")

    # FIX: Loop limits and parsing instructions to prevent hanging and improve parsing
    agent_executor = create_sql_agent(
        llm=llm,
        db=db,
        agent_type="zero-shot-react-description", # Revert to zero-shot-react-description
        verbose=True,
        handle_parsing_errors=True, # Ensure this is a boolean
        max_iterations=5,                 # Stop after 5 tries
        early_stopping_method="generate"  # Return best guess if stuck
        # Removed agent_executor_kwargs as these parameters are direct arguments

    )

    if "messages" not in st.session_state:
        st.session_state.messages = []

    for msg in st.session_state.messages:
        with st.chat_message(msg["role"]):
            st.markdown(msg["content"])

    if prompt := st.chat_input("Ask a question..."):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                try:
                    response = agent_executor.run(prompt)
                    st.markdown(response)
                    print(f"Response: {response}")
                    st.session_state.messages.append({"role": "assistant", "content": response})
                except Exception as e:
                    st.error(f"Error: {e}")

Overwriting app.py


In [5]:
import os
import time
from pyngrok import ngrok

# 1. Add your token if you haven't already (Uncomment below)
ngrok.set_auth_token("38i8oj28FgtSTDuo2ZhJdM1tqws_ZD7azhiVFnjyQKDfmxTJ")

# 2. Kill old processes just in case
os.system("pkill streamlit")

# 3. Run Streamlit and save errors to a log file
print("Starting Streamlit...")
os.system("streamlit run app.py --server.port 8501 > streamlit.log 2>&1 &")

# 4. Wait for 10 seconds to ensure it didn't crash immediately
time.sleep(10)

# 5. Connect ngrok
try:
    public_url = ngrok.connect(8501).public_url
    print(f"ðŸš€ App is live: {public_url}")
except Exception as e:
    print(f"Ngrok Error: {e}")

# 6. Check for crash logs immediately
print("\n--- Last 5 lines of Server Log ---")
os.system("tail -n 5 streamlit.log")

Starting Streamlit...
ðŸš€ App is live: https://nonoriginally-unvital-everett.ngrok-free.dev

--- Last 5 lines of Server Log ---


0