In [None]:
!pip install -q gradio
!pip install -q ratelimit backoff
!pip install -q PyPDF2
!pip install -q -U google-genai
!pip install -q pypdf pandas
!pip install pymupdf

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ratelimit (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import logging
import google.generativeai as genai
import gradio as gr
import pandas as pd
from PyPDF2 import PdfReader
from google.api_core.exceptions import TooManyRequests
from ratelimit import limits, RateLimitException
from backoff import on_exception, expo
from google.colab import userdata
import time
import fitz

# API Configuration
GOOGLE_API_KEY = userdata.get("googleapi")
genai.configure(api_key=GOOGLE_API_KEY)

# Setup for logging
logging.basicConfig(
    filename="api_errors.log", level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s"
)

# Token and Rate Limits
MAX_TOKENS = 1_000_000         # 1 million tokens for the context window
TOKEN_WARNING_THRESHOLD = 0.8  # Warn at 80% of token limit
MAX_RPM = 15                   # Requests per minute
MAX_RPD = 1500                 # Requests per day
MAX_RETRIES=3

# Track global token usage and request counts
tokens_used = 0
requests_made_today = 0
tokens_used_this_minute = 0
last_request_time = time.time()

In [None]:
def retry_on_rate_limit_error(func): # Retries a function call on rate limit error with a 60s wait time.

    def wrapper(*args, **kwargs):
        retries = 3  # Max retries for rate limit errors
        for attempt in range(retries):
            try:
                return func(*args, **kwargs)
            except RateLimitException as e:
                print(f"⚠️ Rate limit exceeded. Retrying after 60 seconds... (Attempt {attempt + 1})")
                time.sleep(60)  # Wait for 60 seconds before retrying
                continue
            except Exception as e:
                print(f"❌ Error: {e}")
                return None
        return "❌ Error: Max retry attempts reached."
    return wrapper

@retry_on_rate_limit_error
@on_exception(expo, RateLimitException, max_tries=3)
@limits(calls=MAX_RPM, period=60)
def call_api(prompt, chat=None, text_mode=True): # Sends API requests for text or chat mode
    try:
        if text_mode:
            return text_generation_with_token_tracking(prompt)
        else:
            return chat_mode_with_history_tracking(prompt, chat)
    except RateLimitException as e:
        print(f"⚠️ Rate limit exceeded. Retrying after 60 seconds... {str(e)}")
        time.sleep(60)  # Wait for 60 seconds before retrying
        return call_api(prompt, chat, text_mode)  # Retry after waiting

In [None]:
def extract_text_from_files(files): # Extracts text from multiple txt, csv, and pdf files.

    extracted_texts = []

    for file in files:
        try:
            if file.name.endswith(".txt"):
                extracted_texts.append(file.read().decode("utf-8"))
            elif file.name.endswith(".csv"):
                df = pd.read_csv(file)
                extracted_texts.append(df.to_string())
            elif file.name.endswith(".pdf"):
                with fitz.open(file.name) as doc:
                    text = ""
                    for page in doc:
                        text += page.get_text()
                    extracted_texts.append(text)
            else:
                extracted_texts.append(f"Unsupported file format: {file.name}")
        except Exception as e:
            extracted_texts.append(f"Error reading file {file.name}: {str(e)}")

    return "\n\n".join(extracted_texts)

In [None]:
def text_generation_with_token_tracking(prompt): # Generates text while tracking token usage,  Issues a warning if the prompt is nearing the context window limit.

    global tokens_used

    # Estimate tokens for the prompt
    prompt_tokens = len(prompt.split())

    # Check if adding this prompt would exceed the total allowed tokens
    if tokens_used + prompt_tokens > MAX_TOKENS:
        warning = f"❌ Error: Token limit exceeded! (Used: {tokens_used}/{MAX_TOKENS})"
        print(warning)
        return warning

    # Warn if approaching threshold
    if tokens_used + prompt_tokens > TOKEN_WARNING_THRESHOLD * MAX_TOKENS:
        print(f"⚠️ Warning: Token usage is nearing the limit ({tokens_used + prompt_tokens}/{MAX_TOKENS}).")

    # Add prompt tokens to total usage
    tokens_used += prompt_tokens

    model = genai.GenerativeModel("gemini-1.5-flash-latest")  # Create the model instance

    try:
        response = model.generate_content(prompt)
        if response is None or not hasattr(response, "text"):
            error = "❌ Error: No response from model."
            print(error)
            return error

        # Estimate tokens in the response and add them to usage
        response_tokens = len(response.text.split())
        tokens_used += response_tokens

        # Warn if after adding response tokens, we're near the limit
        if tokens_used > TOKEN_WARNING_THRESHOLD * MAX_TOKENS:
            print(f"⚠️ Warning: After generation, token usage is high: {tokens_used}/{MAX_TOKENS} tokens used.")

        # Print token usage for debugging
        print(f"📊 Text Generation Token Usage: {tokens_used}/{MAX_TOKENS} tokens used.")
        return response.text
    except Exception as e:
        error_msg = f"❌ Error during text generation: {e}"
        print(error_msg)
        return error_msg

In [None]:
def chat_mode_with_history_tracking(prompt, chat): # It calculates tokens from both the history and the new prompt, warning when the context is close to its limit.

    global tokens_used

    # Calculate tokens used in the conversation history
    history_tokens = 0
    for message in chat.history:
        history_tokens += len(message.parts[0].text.split())

    prompt_tokens = len(prompt.split())
    total_new_tokens = history_tokens + prompt_tokens

    # Check if this chat request will exceed the token limit.
    if tokens_used + total_new_tokens > MAX_TOKENS:
        warning = f"❌ Error: Token limit exceeded in chat mode! (Used: {tokens_used}/{MAX_TOKENS})"
        print(warning)
        return warning

    # Warn if approaching threshold
    if tokens_used + total_new_tokens > TOKEN_WARNING_THRESHOLD * MAX_TOKENS:
        print(f"⚠️ Warning: Chat context token usage is nearing limit ({tokens_used + total_new_tokens}/{MAX_TOKENS}).")

    # Update global token usage with current conversation tokens
    tokens_used += total_new_tokens

    # Call the Gemini chat API with the current prompt and history
    try:
        response = chat.send_message(prompt)
        if response is None or not hasattr(response, "text"):
            error = "❌ Error: No response from model in chat mode."
            print(error)
            return error

        # Count tokens in the response and update
        response_tokens = len(response.text.split())
        tokens_used += response_tokens

        # Warn if token usage becomes too high after the response
        if tokens_used > TOKEN_WARNING_THRESHOLD * MAX_TOKENS:
            print(f"⚠️ Warning: After chat response, token usage is high: {tokens_used}/{MAX_TOKENS} tokens used.")

        print(f"📊 Chat Mode Token Usage: {tokens_used}/{MAX_TOKENS} tokens used.")
        return response.text
    except Exception as e:
        error_msg = f"❌ Error during chat mode: {e}"
        print(error_msg)
        return error_msg



In [None]:
def chat_interface(prompt, files, mode): # Processes input, files, and chooses text/chat mode

    file_text = ""

    # Process files if any are uploaded
    if files:
        file_text = extract_text_from_files(files)

    combined_prompt = f"{prompt}\n\n[File Content]:\n{file_text}" if file_text else prompt

  # Call the appropriate function based on selected mode
    if mode == "Text Generation":
        return call_api(combined_prompt, text_mode=True)  #  Uses call_api for proper handling
    else:  # Chat Mode
        return call_api(combined_prompt, chat, text_mode=False)  #  Ensures correct chat API call

def build_chatbot(system_instruction): #  Defines a function to create a chatbot with instructions

    model = genai.GenerativeModel("gemini-1.5-flash-latest", system_instruction=system_instruction)
    chat = model.start_chat(history=[])
    return chat

# System prompt
system_prompt = """You are an attentive and supportive academic assistant.
Your task is to provide assistance.
I will provide you the question.

If the answer cannot be found,
kindly respond with 'I don't know'.

After answering each question, please provide a detailed
explanation, breaking down the answer step by step.

If you are ready, I will provide you the question.
"""

# Create a chatbot instance using a system prompt
chat = build_chatbot(system_prompt)

# Updated Gradio UI with Mode Selector
demo = gr.Interface(
    fn=chat_interface,
    inputs=[
        gr.Textbox(label="Prompt", value=""),
        gr.Files(label="Upload files (optional)", type="filepath"),
        gr.Radio(["Text Generation", "Chat Mode"], label="Mode", value="Chat Mode")  # Default: Chat Mode
    ],
    outputs="markdown",
    title="Chat with Gemini",
    description="Choose between Text Generation or Chat Mode. Type your question and optionally upload files."
)

# Launch UI
demo.launch(share=True, debug=True)

# Debugging token usage
print(f"Total tokens used: {tokens_used}")

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://56e1f36ba20d0c4c6a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


📊 Chat Mode Token Usage: 1549/1000000 tokens used.
📊 Chat Mode Token Usage: 4647/1000000 tokens used.
📊 Chat Mode Token Usage: 9326/1000000 tokens used.
📊 Chat Mode Token Usage: 15667/1000000 tokens used.
📊 Chat Mode Token Usage: 23603/1000000 tokens used.
📊 Chat Mode Token Usage: 33131/1000000 tokens used.
📊 Chat Mode Token Usage: 44242/1000000 tokens used.
