# connecting to google drive

In [None]:
# %%capture
!pip install unsloth newspaper3k lxml[html_clean]
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip -q install streamlit beautifulsoup4 requests

Collecting unsloth
  Downloading unsloth-2025.5.6-py3-none-any.whl.metadata (46 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting unsloth_zoo>=2025.5.7 (from unsloth)
  Downloading unsloth_zoo-2025.5.7-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.20-py3-none-any.whl.metadata (10 kB)
Collecting datasets>=3.4.1 (from unsloth)
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting t

Found existing installation: unsloth 2025.5.6
Uninstalling unsloth-2025.5.6:
  Successfully uninstalled unsloth-2025.5.6
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-6kwtavw3/unsloth_c17c7a19efcd4ccdb3309641c4d691b4
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-6kwtavw3/unsloth_c17c7a19efcd4ccdb3309641c4d691b4
  Resolved https://github.com/unslothai/unsloth.git to commit 3e5024ceed423252c2d098b797961276112aba82
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth: filename=unsloth-2025.5.6-py3-none-any.w

#main code ui

In [None]:
! pip  -q install streamlit
!pip -q install --upgrade requests
!pip -q install beautifulsoup4
!pip -q install unsloth transformers
!pip -q install newspaper3k

In [None]:
%%writefile app.py
import os
import streamlit as st
import requests
from bs4 import BeautifulSoup
from unsloth import FastLanguageModel
from transformers import TextStreamer
import re
import torch

# Streamlit app config
st.set_page_config(
    page_title="Article Summarizer",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# Session state setup
if 'url_input' not in st.session_state:
    st.session_state.url_input = ""
if 'text_input_area' not in st.session_state:
    st.session_state.text_input_area = ""
if 'summary_output_state' not in st.session_state:
     st.session_state.summary_output_state = ""
if 'original_text_state' not in st.session_state:
     st.session_state.original_text_state = ""
if 'input_processed' not in st.session_state:
    st.session_state.input_processed = False
if 'clear_requested' not in st.session_state:
    st.session_state.clear_requested = False
if 'max_new_tokens' not in st.session_state:
    st.session_state.max_new_tokens = 512 # Default summary length

# Handle Clear button action
if st.session_state.clear_requested:
    st.session_state.url_input = ""
    st.session_state.text_input_area = ""
    st.session_state.summary_output_state = ""
    st.session_state.original_text_state = ""
    st.session_state.input_processed = False
    st.session_state.max_new_tokens = 512 # Reset length
    st.session_state.clear_requested = False


# Load model with caching
@st.cache_resource
def load_model():
    # model_folder_path = '/content/drive/MyDrive/my_trained_model/my_trained_model' # Commented out the Drive path
    max_seq_length = 2048
    dtype = None
    load_in_4bit = True

    try:
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name="punit16/automatic_news_summarizer",  # Use the Hugging Face model name
            max_seq_length=max_seq_length,
            dtype=dtype,
            load_in_4bit=load_in_4bit,
        )
        model.eval()
        return model, tokenizer
    except Exception as e:
        st.error(f"Failed to load model: {e}. Ensure the model name is correct and accessible.")
        st.stop()

model, tokenizer = load_model()


# Fetch article content from URL
def fetch_url(url):
    if not url:
        return "Error: No URL provided."

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Referer': 'https://www.google.com/'
        }

        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')

        article_content = soup.find('article')
        if not article_content:
            common_classes = ['article-body', 'article__content', 'entry-content', 'post-content', 'story-body', 'td-post-content', 'body-content']
            for class_name in common_classes:
                article_content = soup.find('div', class_=class_name)
                if article_content: break

        if not article_content:
             paragraphs = soup.find_all('p')
             raw_text = '\n\n'.join([p.get_text() for p in paragraphs])
        else:
             raw_text = article_content.get_text()

        if not raw_text:
             raw_text = soup.get_text()

        text_content = re.sub(r'\s+', ' ', raw_text).strip()

        if not text_content:
             return "Error: Could not extract text from URL."

        return text_content

    except requests.exceptions.RequestException as e:
        return f"Error fetching URL: {e}"
    except Exception as e:
        return f"Error parsing content: {e}"


def summarize(text_content, tokenizer, model, max_new_tokens_limit):
    if not text_content or text_content.startswith("Error"):
        return "Could not summarize."

    system_content = ""
    include_title_format = True

    if max_new_tokens_limit == 256:
        system_content = """You are a helpful assistant for article summarization.
            Your task is to provide a short summary of the given text article in 2 to 3 sentences. Do not generate a title.
            Return empty if content is not meaningful.
            """
        include_title_format = False

    elif max_new_tokens_limit == 1024:
        system_content = """You are a helpful assistant for article summarization.
            Your task is to provide a medium-length summary of the given text article, forming a single paragraph containing a minimum of 5 sentences. Ensure you cover all the main topics and distinct points discussed. Generate a title.
            Return empty if content is not meaningful.
            """

    else:
         system_content = """You are a helpful assistant for article summarization.
            Your task is to provide a long summary of the given text article in 7 to 8 sentences forming a paragraph, or as 2 small paragraphs (2-3 sentences each). Generate a title.
            Return empty if content is not meaningful.
            """

    if include_title_format:
        messages = [
            {
                "role": "system",
                "content": system_content + """
Format:
Title: [Generated Title]

[Generated Summary]
                """,
            },
            {"role": "human", "content": text_content},
        ]
    else:
         messages = [
            {
                "role": "system",
                "content": system_content,
            },
            {"role": "human", "content": text_content},
        ]


    device = "cuda" if torch.cuda.is_available() else "cpu"

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True,
        return_tensors = "pt",
    ).to(device)

    model.to(device)

    with st.spinner(f"Generating summary (max {max_new_tokens_limit} tokens)..."):
        lora_output = model.generate(
            input_ids = inputs,
            max_new_tokens = max_new_tokens_limit,
            use_cache = True,
            temperature = 0.7,
            min_p = 0.5,
            do_sample = True,
            pad_token_id=tokenizer.eos_token_id
        )

    newly_generated_tokens = lora_output[0][len(inputs[0]):]
    summary = tokenizer.decode(newly_generated_tokens, skip_special_tokens=True)
    summary = summary.strip()


    if not include_title_format and summary.lower().startswith("title:"):
         match = re.search(r'(?i)Title:\s*.*?\n\n(.*)', summary, re.DOTALL)
         if match:
              summary = match.group(1).strip()
         else:
             lines = summary.split('\n', 1)
             if len(lines) > 1 and lines[0].lower().startswith("title:"):
                 summary = lines[1].strip()


    if not summary:
        return "Summary could not be generated by model."

    return summary


# --- Streamlit App Layout ---
st.title("📰 Automatic News Summarizer")
st.markdown("""
    Summarize articles from a URL or provided text. Choose the summary length.
    """, unsafe_allow_html=True)


col1, col2 = st.columns(2)

with col1:
    st.subheader("Input")
    input_type = st.radio("Choose input type:", ("URL", "Text"), key="input_type_radio")

# Summary length selection
    summary_length_option = st.selectbox(
        "Select summary length:",
        options=["Short (approx. 256 tokens)", "Medium (approx. 512 tokens)", "Long (approx. 1024 tokens)"],
        index=1,
        key="summary_length_selectbox"
    )

    # Map option to tokens
    if summary_length_option == "Short (approx. 256 tokens)":
        st.session_state.max_new_tokens = 256
    elif summary_length_option == "Medium (approx. 512 tokens)":
        st.session_state.max_new_tokens = 512
    elif summary_length_option == "Long (approx. 1024 tokens)":
        st.session_state.max_new_tokens = 1024

    if input_type == "URL":
        url = st.text_input("Enter URL:", key="url_input")
        process_button = st.button("Summarize URL", key="summarize_url_button")
    else:
        text_content_input = st.text_area("Paste text here:", height=350, key="text_input_area")
        process_button = st.button("Summarize Text", key="summarize_text_button")

    if st.button("Clear All", key="clear_button"):
        st.session_state.clear_requested = True
        st.rerun()


with col2:
    st.subheader("Output")
    summary_placeholder = st.empty()
    original_text_placeholder = st.empty()


# --- Processing Logic ---
if process_button and ( (input_type == "URL" and (st.session_state.url_input and st.session_state.url_input.strip())) or (input_type == "Text" and st.session_state.text_input_area and st.session_state.text_input_area.strip()) ):
    st.session_state.input_processed = True
    st.session_state.summary_output_state = ""
    st.session_state.original_text_state = ""

    current_input = st.session_state.url_input if input_type == "URL" else st.session_state.text_input_area
    selected_max_new_tokens = st.session_state.max_new_tokens

    if input_type == "URL":
        with st.spinner("Fetching article content..."):
            text_content = fetch_url(current_input)

        if text_content.startswith("Error"):
            st.session_state.summary_output_state = text_content
            st.session_state.original_text_state = ""
        else:
            st.session_state.original_text_state = text_content
            summary = summarize(text_content, tokenizer, model, selected_max_new_tokens)
            st.session_state.summary_output_state = summary

    else:
        text_content = current_input.strip()
        if not text_content:
             st.session_state.summary_output_state = "Please enter some text."
             st.session_state.original_text_state = ""
        else:
            st.session_state.original_text_state = text_content
            summary = summarize(text_content, tokenizer, model, selected_max_new_tokens)
            st.session_state.summary_output_state = summary

    st.rerun()


# --- Display Results ---
if st.session_state.input_processed:
    if st.session_state.original_text_state and not (st.session_state.original_text_state.startswith("Error") or st.session_state.original_text_state.startswith("Could not")):
        original_text_placeholder.subheader("Original Text Preview:")
        with original_text_placeholder.expander("View original text"):
            word_count = len(st.session_state.original_text_state.split())
            char_count = len(st.session_state.original_text_state)
            st.info(f"Words: {word_count} | Chars: {char_count}")
            st.text_area("Full Text", st.session_state.original_text_state, height=300, key="displayed_original_text", disabled=True)


    if st.session_state.summary_output_state:
        if st.session_state.summary_output_state.startswith("Error") or st.session_state.summary_output_state.startswith("Could not"):
            summary_placeholder.error(st.session_state.summary_output_state)
            if st.session_state.original_text_state and not (st.session_state.original_text_state.startswith("Error") or st.session_state.original_text_state.startswith("Could not")):
                 summary_placeholder.write("Summary could not be generated based on extracted text.")
        else:
            summary_placeholder.subheader("Summary:")
            summary_placeholder.write(st.session_state.summary_output_state)

    elif st.session_state.original_text_state and not st.session_state.summary_output_state:
         summary_placeholder.subheader("Summary:")
         summary_placeholder.write("Summary not generated (model output empty).")

Overwriting app.py


In [None]:
!wget -q -O - ipv4.icanhazip.com

34.124.154.96


In [None]:
! streamlit run app.py & npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.124.154.96:8501[0m
[0m
your url is: https://crazy-pens-clean.loca.lt
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
2025-05-21 07:04:36.768238: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747811076.795605    8058 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747811076.804198    8058 cuda_blas.cc:1418] Unable to register cuBLAS fac