In [None]:
!pip install transformers accelerate huggingface_hub gradio beautifulsoup4 requests torch

Collecting gradio
  Downloading gradio-5.16.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.9.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

In [None]:
import os
import time
import logging
import requests
import gradio as gr
import torch
from pathlib import Path
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from bs4 import BeautifulSoup

In [None]:
# Configure Logging
LOGS_DIR = Path("Logs")
LOGS_DIR.mkdir(parents=True, exist_ok=True)

def log_to_file(message):
    with open(LOGS_DIR / "timing_logs.txt", "a") as f:
        f.write(message + "\n")

logging.basicConfig(
    level=logging.INFO,
    format="[%(asctime)s: %(levelname)s: %(message)s]",
    handlers=[logging.FileHandler(LOGS_DIR / "logs.log"), logging.StreamHandler()],
)
logger = logging.getLogger("UITarsLogger")

In [None]:
# Load UI-TARS-7B-DPO Model
model_name = "bytedance-research/UI-TARS-7B-DPO"

device = "cuda" if torch.cuda.is_available() else "cpu"

model = Qwen2VLForConditionalGeneration.from_pretrained(
    model_name,
    torch_dtype=torch.float16,  # Use FP16 for efficiency
    device_map="auto"
)

processor = AutoProcessor.from_pretrained(model_name)

logger.info("UI-TARS-7B-DPO Model Loaded Successfully!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/56.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.69G [00:00<?, ?B/s]

`Qwen2VLRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.46


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]



preprocessor_config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.58k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/392 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

In [None]:
def extract_website_info(url: str, query: str) -> str:
    """Extracts relevant content from a website while ignoring unnecessary elements."""
    headers = {"User-Agent": "Mozilla/5.0"}

    try:
        response = requests.get(url, headers=headers, timeout=5)  # ⏳ Reduced timeout for speed
        if response.status_code != 200:
            return f"Failed to retrieve page: HTTP {response.status_code}"

        soup = BeautifulSoup(response.text, "html.parser")

        # Remove unnecessary elements (ads, scripts, styles, etc.)
        for tag in soup(["script", "style", "header", "footer", "aside", "nav"]):
            tag.decompose()

        # Extract main content
        extracted_text = []
        for tag in soup.find_all(["h1", "h2", "h3", "p", "li"]):
            text = tag.get_text().strip()
            if text and len(text) > 30:  # Filter out short/irrelevant text
                extracted_text.append(text)

        # Join extracted content
        full_text = "\n".join(extracted_text)
        log_to_file(f"Extracted text length: {len(full_text)} characters")

        # Limit to 2000 characters to prevent LLM overload
        full_text = full_text[:2000] if len(full_text) > 2000 else full_text

        return f"""
        Given the following extracted webpage content, answer concisely:
        {full_text}

        Question: {query}
        Answer:
        """
    except requests.exceptions.Timeout:
        return "Error: Website took too long to respond."
    except requests.exceptions.ConnectionError:
        return "Error: Could not connect to the website."
    except Exception as e:
        return f"Error fetching webpage: {str(e)}"

In [None]:
def generate_response(user_input: str) -> str:
    """Generates response using UI-TARS-2B-SFT model."""
    inputs = processor(text=[user_input], return_tensors="pt").to(device)
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=150,  # ⬅ Limit response length for faster output
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            early_stopping=True,
            num_beams=1  # ⬅ Faster decoding
        )
    return processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [None]:
def gradio_ui(url: str, query: str):
    start_time = time.time()

    # Extract website content
    extraction_start = time.time()
    extracted_info = extract_website_info(url, query)
    extraction_time = time.time() - extraction_start
    log_to_file(f"Extraction Time: {extraction_time:.2f} sec")

    if extraction_time > 10:
        return f"Web extraction took too long ({extraction_time:.2f} sec). Optimization needed."

    # Generate AI response
    llm_start = time.time()
    response = generate_response(extracted_info)
    llm_time = time.time() - llm_start
    log_to_file(f"LLM Response Time: {llm_time:.2f} sec")

    total_time = time.time() - start_time
    log_to_file(f"Total Time Taken: {total_time:.2f} sec")

    return response

In [None]:
def debug_run(url: str, query: str):
    response = gradio_ui(url, query)  # This will print all timings
    print("Debugging Response:", response)

# Example Debugging
debug_run("https://en.wikipedia.org/wiki/Elon_Musk", "What companies has Elon Musk founded?")

Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.


Debugging Response: 
        Given the following extracted webpage content, answer concisely:
        CEO and product architect of Tesla, Inc.
Founder, CEO, and chief engineer of SpaceX
Owner, CTO and executive chairman of X (formerly Twitter)
Founder of the Boring Company, X Corp., and xAI
Co-founder of Neuralink, OpenAI, Zip2, and X.com (part of PayPal)
President of the Musk Foundation
Head of Department of Government Efficiency temporary organization
Justine Wilson
​ ​(m. 2000; div. 2008)​
Talulah Riley
​ ​(m. 2010; div. 2012)​ 
​
 ​(m. 2013; div. 2016)​
Tesla, Inc.
SolarCity
Energy
criticism
litigation
The Boring Company
Boring Test Tunnel
Hyperloop
X Corp.
Twitter under Elon Musk
Twitter, Inc.
acquisition
Department of Government Efficiency
Elon Musk and trade unions
Tesla
"One Crew over the Crewcoo's Morty"
List of Tesla Autopilot crashes
Elon Reeve Musk (/ˈiːlɒn mʌsk/; born June 28, 1971) is a businessman and U.S. special government employee, best known for his key roles in Tesl

In [None]:
iface = gr.Interface(
    fn=gradio_ui,
    inputs=[
        gr.Textbox(label="Enter Website URL"),
        gr.Textbox(label="Enter Your Question"),
    ],
    outputs=gr.Textbox(label="Generated Response"),
    title="UI-TARS Web Agent",
    description="Enter a website URL and a question, and the AI will scrape the content and generate an answer."
)

iface.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://803c8836deb4150a85.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.
