In [8]:
%%capture
!pip install unsloth newspaper3k lxml[html_clean]
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip -q install streamlit beautifulsoup4 requests

In [9]:
from google.colab import drive
import os
from unsloth import FastLanguageModel

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Define your model's path within Google Drive
model_folder_path = '/content/drive/MyDrive/my_trained_model/my_trained_model' # Update with your model's path

# 3. Check if the folder exists
if os.path.exists(model_folder_path):
  print(f"Model folder found at: {model_folder_path}")
  # 4. List the contents of the folder (optional)
  print(os.listdir(model_folder_path))
else:
  print(f"Model folder not found at: {model_folder_path}")

max_seq_length = 2048
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained( # Unpack the tuple into model and tokenizer
    model_name=model_folder_path,  # Use the full path here
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)
# 4. Set to evaluation mode - now on the model object
model.eval()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model folder found at: /content/drive/MyDrive/my_trained_model/my_trained_model
['special_tokens_map.json', 'tokenizer.json', 'adapter_config.json', 'tokenizer_config.json', 'README.md', 'adapter_model.safetensors']
==((====))==  Unsloth 2025.5.6: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 3072, padding_idx=128004)
        (layers): ModuleList(
          (0): LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear

#testing

In [10]:
from unsloth import FastLanguageModel
# if False:
#     from unsloth import FastLanguageModel
#     model, tokenizer = FastLanguageModel.from_pretrained(
#         model_name = "lora_model",
#         max_seq_length = max_seq_length,
#         dtype = dtype,
#         load_in_4bit = load_in_4bit,
#     )
#     FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# model = FastLanguageModel.from_pretrained('/content/drive/MyDrive/my_trained_model/my_trained_model')

# max_seq_length = 2048
# dtype = None
# load_in_4bit = True
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "/content/drive/MyDrive/my_trained_model/my_trained_model",
#     max_seq_length = max_seq_length,
#     dtype = dtype,
#     load_in_4bit = load_in_4bit,
# )
FastLanguageModel.for_inference(model)
text_content="""
Responding to geopolitical challenges
He then went on to frame the challenging times in geopolitics. ‚ÄúWe meet at a difficult time in world affairs. Two major conflicts are underway, each with its own global repercussions. The Covid pandemic has left many in the developing world deeply devastated. Disruptions of various kinds ‚Äì ranging from extreme climate events to supply chain uncertainties and financial volatility ‚Äì are impacting growth and development. Debt is a serious concern, even as the world falls behind in achieving SDG targets. Technology holds great promise, as well as raising a new host of concerns. How should the members of the SCO respond to these challenges?‚Äù he asked.

Festive offer
‚ÄúThe answers lie in the Charter of our organisation,‚Äù he said, adding, ‚ÄúAnd I urge you to reflect on Article 1 that spells out the goals and tasks of the SCO. Let me summarise it for our collective consideration. The objective is to strengthen mutual trust, friendship and good neighbourliness. It is to develop multi-faceted cooperation, especially of a regional nature. It is to be a positive force in terms of balanced growth, integration and conflict prevention. The Charter was equally clear what the key challenges were. And these were primarily three, that the SCO was committed to combatting: one, terrorism; two, separatism; and three, extremism.‚Äù

Jaishankar stated that only by reaffirming the commitment to the Charter most sincerely that they can fully realise the benefits of cooperation and integration that it envisages. ‚ÄúThis is not just an endeavour for our own benefit. We all realise that the world is moving towards multi-polarity. Globalisation and rebalancing are realities that cannot be denied. Cumulatively, they have created many new opportunities in terms of trade, investment, connectivity, energy flows and other forms of collaboration. There is no question that our region would benefit immensely if we take this forward. Not just that, others too would draw their own inspiration and lessons from such efforts.‚Äù
"""
messages = [
    {
        "role": "system",
        "content": """You are a helpful assistant that specialize in article summarization
            your task is to summarize given text article and generate title for it
            If the provided article doesnt contain coherent and meaningful content,
            just return empty response""",
    },
    {"role": "human", "content": text_content},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
lora_output = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Article:   Responding to geopolitical challenges 
Festive offer 

The SCO (Shanghai Cooperation Organisation) seeks to strengthen trust and friendship among its members.  Jaishankar stated that reaffirming commitment to the SCO‚Äôs charter can enable it to achieve the benefits of cooperation and integration it seeks to foster.<|eot_id|>


#main code ui

In [11]:
! pip  -q install streamlit
!pip -q install --upgrade requests
!pip -q install beautifulsoup4
!pip -q install unsloth transformers
!pip -q install newspaper3k

[0m

In [18]:
%%writefile app.py
import os
import streamlit as st
import requests
from bs4 import BeautifulSoup
from unsloth import FastLanguageModel
from transformers import TextStreamer
import re
import torch

# Streamlit app config
st.set_page_config(
    page_title="Article Summarizer",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# Session state setup
if 'url_input' not in st.session_state:
    st.session_state.url_input = ""
if 'text_input_area' not in st.session_state:
    st.session_state.text_input_area = ""
if 'summary_output_state' not in st.session_state:
     st.session_state.summary_output_state = ""
if 'original_text_state' not in st.session_state:
     st.session_state.original_text_state = ""
if 'input_processed' not in st.session_state:
    st.session_state.input_processed = False
if 'clear_requested' not in st.session_state:
    st.session_state.clear_requested = False
if 'max_new_tokens' not in st.session_state:
    st.session_state.max_new_tokens = 512 # Default summary length

# Handle Clear button action
if st.session_state.clear_requested:
    st.session_state.url_input = ""
    st.session_state.text_input_area = ""
    st.session_state.summary_output_state = ""
    st.session_state.original_text_state = ""
    st.session_state.input_processed = False
    st.session_state.max_new_tokens = 512 # Reset length
    st.session_state.clear_requested = False


# Load model with caching
@st.cache_resource
def load_model():
    model_folder_path = '/content/drive/MyDrive/my_trained_model/my_trained_model'
    max_seq_length = 2048
    dtype = None
    load_in_4bit = True

    try:
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_folder_path,
            max_seq_length=max_seq_length,
            dtype=dtype,
            load_in_4bit=load_in_4bit,
        )
        model.eval()
        return model, tokenizer
    except Exception as e:
        st.error(f"Failed to load model: {e}. Ensure model path is correct and Drive is mounted.")
        st.stop()

model, tokenizer = load_model()


# Fetch article content from URL
def fetch_url(url):
    if not url:
        return "Error: No URL provided."

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Referer': 'https://www.google.com/'
        }

        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')

        article_content = soup.find('article')
        if not article_content:
            common_classes = ['article-body', 'article__content', 'entry-content', 'post-content', 'story-body', 'td-post-content', 'body-content']
            for class_name in common_classes:
                article_content = soup.find('div', class_=class_name)
                if article_content: break

        if not article_content:
             paragraphs = soup.find_all('p')
             raw_text = '\n\n'.join([p.get_text() for p in paragraphs])
        else:
             raw_text = article_content.get_text()

        if not raw_text:
             raw_text = soup.get_text()

        text_content = re.sub(r'\s+', ' ', raw_text).strip()

        if not text_content:
             return "Error: Could not extract text from URL."

        return text_content

    except requests.exceptions.RequestException as e:
        return f"Error fetching URL: {e}"
    except Exception as e:
        return f"Error parsing content: {e}"


def summarize(text_content, tokenizer, model, max_new_tokens_limit):
    if not text_content or text_content.startswith("Error"):
        return "Could not summarize."

    system_content = ""
    include_title_format = True

    if max_new_tokens_limit == 256:
        system_content = """You are a helpful assistant for article summarization.
            Your task is to provide a short summary of the given text article in 2 to 3 sentences. Do not generate a title.
            Return empty if content is not meaningful.
            """
        include_title_format = False

    elif max_new_tokens_limit == 1024:
        system_content = """You are a helpful assistant for article summarization.
            Your task is to provide a medium-length summary of the given text article, forming a single paragraph containing a minimum of 5 sentences. Ensure you cover all the main topics and distinct points discussed. Generate a title.
            Return empty if content is not meaningful.
            """

    else:
         system_content = """You are a helpful assistant for article summarization.
            Your task is to provide a long summary of the given text article in 7 to 8 sentences forming a paragraph, or as 2 small paragraphs (2-3 sentences each). Generate a title.
            Return empty if content is not meaningful.
            """

    if include_title_format:
        messages = [
            {
                "role": "system",
                "content": system_content + """
Format:
Title: [Generated Title]

[Generated Summary]
                """,
            },
            {"role": "human", "content": text_content},
        ]
    else:
         messages = [
            {
                "role": "system",
                "content": system_content,
            },
            {"role": "human", "content": text_content},
        ]


    device = "cuda" if torch.cuda.is_available() else "cpu"

    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True,
        return_tensors = "pt",
    ).to(device)

    model.to(device)

    with st.spinner(f"Generating summary (max {max_new_tokens_limit} tokens)..."):
        lora_output = model.generate(
            input_ids = inputs,
            max_new_tokens = max_new_tokens_limit,
            use_cache = True,
            temperature = 0.7,
            min_p = 0.5,
            do_sample = True,
            pad_token_id=tokenizer.eos_token_id
        )

    newly_generated_tokens = lora_output[0][len(inputs[0]):]
    summary = tokenizer.decode(newly_generated_tokens, skip_special_tokens=True)
    summary = summary.strip()


    if not include_title_format and summary.lower().startswith("title:"):
         match = re.search(r'(?i)Title:\s*.*?\n\n(.*)', summary, re.DOTALL)
         if match:
              summary = match.group(1).strip()
         else:
             lines = summary.split('\n', 1)
             if len(lines) > 1 and lines[0].lower().startswith("title:"):
                 summary = lines[1].strip()


    if not summary:
        return "Summary could not be generated by model."

    return summary


# --- Streamlit App Layout ---
st.title("üì∞ AI Article Summarizer")
st.markdown("""
    Summarize articles from a URL or provided text. Choose the summary length.
    """, unsafe_allow_html=True)


col1, col2 = st.columns(2)

with col1:
    st.subheader("Input")
    input_type = st.radio("Choose input type:", ("URL", "Text"), key="input_type_radio")

# Summary length selection
    summary_length_option = st.selectbox(
        "Select summary length:",
        options=["Short (approx. 256 tokens)", "Medium (approx. 512 tokens)", "Long (approx. 1024 tokens)"],
        index=1,
        key="summary_length_selectbox"
    )

    # Map option to tokens
    if summary_length_option == "Short (approx. 256 tokens)":
        st.session_state.max_new_tokens = 256
    elif summary_length_option == "Medium (approx. 512 tokens)":
        st.session_state.max_new_tokens = 512
    elif summary_length_option == "Long (approx. 1024 tokens)":
        st.session_state.max_new_tokens = 1024

    if input_type == "URL":
        url = st.text_input("Enter URL:", key="url_input")
        process_button = st.button("Summarize URL", key="summarize_url_button")
    else:
        text_content_input = st.text_area("Paste text here:", height=350, key="text_input_area")
        process_button = st.button("Summarize Text", key="summarize_text_button")

    if st.button("Clear All", key="clear_button"):
        st.session_state.clear_requested = True
        st.rerun()


with col2:
    st.subheader("Output")
    summary_placeholder = st.empty()
    original_text_placeholder = st.empty()


# --- Processing Logic ---
if process_button and ( (input_type == "URL" and (st.session_state.url_input and st.session_state.url_input.strip())) or (input_type == "Text" and st.session_state.text_input_area and st.session_state.text_input_area.strip()) ):
    st.session_state.input_processed = True
    st.session_state.summary_output_state = ""
    st.session_state.original_text_state = ""

    current_input = st.session_state.url_input if input_type == "URL" else st.session_state.text_input_area
    selected_max_new_tokens = st.session_state.max_new_tokens

    if input_type == "URL":
        with st.spinner("Fetching article content..."):
            text_content = fetch_url(current_input)

        if text_content.startswith("Error"):
            st.session_state.summary_output_state = text_content
            st.session_state.original_text_state = ""
        else:
            st.session_state.original_text_state = text_content
            summary = summarize(text_content, tokenizer, model, selected_max_new_tokens)
            st.session_state.summary_output_state = summary

    else:
        text_content = current_input.strip()
        if not text_content:
             st.session_state.summary_output_state = "Please enter some text."
             st.session_state.original_text_state = ""
        else:
            st.session_state.original_text_state = text_content
            summary = summarize(text_content, tokenizer, model, selected_max_new_tokens)
            st.session_state.summary_output_state = summary

    st.rerun()


# --- Display Results ---
if st.session_state.input_processed:
    if st.session_state.original_text_state and not (st.session_state.original_text_state.startswith("Error") or st.session_state.original_text_state.startswith("Could not")):
        original_text_placeholder.subheader("Original Text Preview:")
        with original_text_placeholder.expander("View original text"):
            word_count = len(st.session_state.original_text_state.split())
            char_count = len(st.session_state.original_text_state)
            st.info(f"Words: {word_count} | Chars: {char_count}")
            st.text_area("Full Text", st.session_state.original_text_state, height=300, key="displayed_original_text", disabled=True)


    if st.session_state.summary_output_state:
        if st.session_state.summary_output_state.startswith("Error") or st.session_state.summary_output_state.startswith("Could not"):
            summary_placeholder.error(st.session_state.summary_output_state)
            if st.session_state.original_text_state and not (st.session_state.original_text_state.startswith("Error") or st.session_state.original_text_state.startswith("Could not")):
                 summary_placeholder.write("Summary could not be generated based on extracted text.")
        else:
            summary_placeholder.subheader("Summary:")
            summary_placeholder.write(st.session_state.summary_output_state)

    elif st.session_state.original_text_state and not st.session_state.summary_output_state:
         summary_placeholder.subheader("Summary:")
         summary_placeholder.write("Summary not generated (model output empty).")

Overwriting app.py


In [19]:
!wget -q -O - ipv4.icanhazip.com

34.125.110.161


In [20]:
! streamlit run app.py & npx localtunnel --port 8501

[1G[0K‚†ô[1G[0K‚†π
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.125.110.161:8501[0m
[0m
your url is: https://curvy-snails-rescue.loca.lt
ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
2025-05-20 07:23:18.020644: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747725798.044434    8598 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747725798.051809    8598 cuda_blas.cc:1418] Unable to register cuBLAS fact