In [1]:
# Install necessary libraries
!pip install transformers accelerate bitsandbytes

# Import libraries
import torch
import re
import datetime
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load a better model that should still run on Colab
model_name = "microsoft/phi-2" # 2.7B parameter model, better reasoning

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_4bit=True  # Use 4-bit quantization to save memory
)

# Create a text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15
)

# Math detection and calculation function
def detect_and_solve_math(query):
    # Basic arithmetic regex pattern
    pattern = r'(\d+(?:\.\d+)?)\s*([\+\-\*\/])\s*(\d+(?:\.\d+)?)'
    match = re.search(pattern, query)

    if match:
        try:
            num1 = float(match.group(1))
            operator = match.group(2)
            num2 = float(match.group(3))

            if operator == '+':
                result = num1 + num2
            elif operator == '-':
                result = num1 - num2
            elif operator == '*':
                result = num1 * num2
            elif operator == '/' and num2 != 0:
                result = num1 / num2
            else:
                return None  # Division by zero or unsupported operator

            # Format as integer if result is a whole number
            if result.is_integer():
                result = int(result)

            return f"The answer to {num1} {operator} {num2} is {result}."
        except:
            return None
    return None

# Date and time function
def handle_date_time_query(query):
    now = datetime.datetime.now()

    if re.search(r'what\s+(is|\'s)\s+the\s+time', query.lower()):
        return f"The current time is {now.strftime('%I:%M %p')}."

    if re.search(r'what\s+(is|\'s)\s+the\s+date', query.lower()):
        return f"Today's date is {now.strftime('%A, %B %d, %Y')}."

    if re.search(r'what\s+day\s+(is|\'s)\s+it', query.lower()):
        return f"Today is {now.strftime('%A')}."

    return None

# Simple facts database
simple_facts = {
    "what is the capital of france": "The capital of France is Paris.",
    "what is the capital of japan": "The capital of Japan is Tokyo.",
    "what is the capital of usa": "The capital of the United States is Washington, D.C.",
    "who created you": "I was created as TimiAI, a custom AI assistant.",
    "what is your name": "My name is TimiAI, your personal AI assistant.",
}

# Interactive chat function with improved prompting
def chat_with_ai(prompt):
    # Check for simple facts
    clean_prompt = prompt.lower().strip('?!.,')
    if clean_prompt in simple_facts:
        return simple_facts[clean_prompt]

    # Check if it's a date/time question
    date_time_answer = handle_date_time_query(prompt)
    if date_time_answer:
        return date_time_answer

    # Check if it's a math question
    math_answer = detect_and_solve_math(prompt)
    if math_answer:
        return math_answer

    # If not a special case, use the model with better prompting
    system_prompt = "You are TimiAI, a helpful and accurate assistant. Answer questions directly and accurately."
    formatted_prompt = f"{system_prompt}\n\nHuman: {prompt}\n\nTimiAI:"

    response = pipe(formatted_prompt, max_new_tokens=512)[0]['generated_text']

    # Extract just the assistant's response - adjust based on the model's output format
    try:
        assistant_response = response.split("TimiAI:")[1].strip()
    except:
        # Fallback if the splitting doesn't work
        assistant_response = response.split(prompt)[1].strip()

    return assistant_response

# Create a simple interface with ipywidgets
from ipywidgets import widgets, Layout
from IPython.display import display, HTML

# Display title for TimiAI
display(HTML("<h1 style='color: #4B8BBE;'>TimiAI</h1>"))

input_box = widgets.Textarea(
    value='',
    placeholder='Type your message here...',
    description='You:',
    layout=Layout(width='80%', height='100px')
)

output_box = widgets.Textarea(
    value='Hello! I am TimiAI, your personal assistant. How can I help you today?',
    description='TimiAI:',
    layout=Layout(width='80%', height='200px')
)

def on_send_button_clicked(b):
    user_input = input_box.value
    input_box.value = ''

    if user_input.strip() == '':
        return

    # Get AI response
    ai_response = chat_with_ai(user_input)

    # Update the output box
    output_box.value = ai_response

send_button = widgets.Button(
    description="Send",
    button_style='primary',
    tooltip='Send your message to TimiAI'
)
send_button.on_click(on_send_button_clicked)

# Display the chat interface
display(input_box, send_button, output_box)

print("TimiAI is ready to chat! Type your message and click Send.")

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0


Textarea(value='', description='You:', layout=Layout(height='100px', width='80%'), placeholder='Type your mess…

Button(button_style='primary', description='Send', style=ButtonStyle(), tooltip='Send your message to TimiAI')

Textarea(value='Hello! I am TimiAI, your personal assistant. How can I help you today?', description='TimiAI:'…

TimiAI is ready to chat! Type your message and click Send.
