#pretraining part

In [None]:
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2025.5.4: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

from datasets import load_dataset
huggingface_dataset_name = "FiscalNote/billsum"

dataset = load_dataset(huggingface_dataset_name)

In [None]:
from unsloth.chat_templates import standardize_sharegpt

def format_to_conversations(example):

    conversation = [
        {"role": "system", "content": """You are a helpful assistant that specialize in article summarization your task is to summarize given text article and generate title for it If the provided article doesnt contain coherent and meaningful content,just return empty response"""},
        {"role": "user", "content": example["text"]},
        {"role": "assistant", "content": example["summary"]},
    ]
    return {"conversations": conversation}

# Apply the formatting function to the dataset
dataset = dataset['train'].map(format_to_conversations)

# Now you can use standardize_sharegpt
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched=True)

In [None]:
dataset[5]["conversations"]

[{'content': 'You are a helpful assistant that specialize in article summarization your task is to summarize given text article and generate title for it If the provided article doesnt contain coherent and meaningful content,just return empty response',
  'role': 'system'},
 {'content': "SECTION 1. SHORT TITLE.\n\n    This Act may be cited as the ``Holocaust Victims Insurance Relief \nAct of 2001''.\n\nSEC. 2. FINDINGS AND PURPOSE.\n\n    (a) Findings.--The Congress finds the following:\n            (1) The Holocaust, including the murder of 6,000,000 \n        European Jews, the systematic destruction of families and \n        communities, and the wholesale theft of their assets, was one \n        of the most tragic crimes in modern history.\n            (2) When Holocaust survivors or heirs of Holocaust victims \n        presented claims to insurance companies after World War II, \n        many were rejected because the claimants did not have death \n        certificates or physical 

In [None]:
print("\nChecking sample after train_on_responses_only:")
sample_after_masking = dataset[0] # Check the first sample
if isinstance(sample_after_masking, dict):
    for key, value in sample_after_masking.items():
        if isinstance(value, list):
             print(f"  Key: {key}, Type: {type(value)}, Length: {len(value)}")
        else:
             print(f"  Key: {key}, Type: {type(value)}")
else:
    print(f"  Sample type: {type(sample_after_masking)}")




Checking sample after train_on_responses_only:
  Key: text, Type: <class 'str'>
  Key: summary, Type: <class 'str'>
  Key: title, Type: <class 'str'>
  Key: conversations, Type: <class 'list'>, Length: 3


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    # dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer,model=model),
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

In [None]:
tokenizer.decode(trainer.train_dataset[5]["input_ids"])

"<|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\nYou are a helpful assistant that specialize in article summarization your task is to summarize given text article and generate title for it If the provided article doesnt contain coherent and meaningful content,just return empty response<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSECTION 1. SHORT TITLE.\n\n    This Act may be cited as the ``Holocaust Victims Insurance Relief \nAct of 2001''.\n\nSEC. 2. FINDINGS AND PURPOSE.\n\n    (a) Findings.--The Congress finds the following:\n            (1) The Holocaust, including the murder of 6,000,000 \n        European Jews, the systematic destruction of families and \n        communities, and the wholesale theft of their assets, was one \n        of the most tragic crimes in modern history.\n            (2) When Holocaust survivors or heirs of Holocaust victims \n        presented claim

In [None]:
# %%writefile app.py
import os
import streamlit as st
import requests
from bs4 import BeautifulSoup
from unsloth import FastLanguageModel, get_chat_template
from transformers import TextStreamer
import torch # Make sure torch is imported

# --- Model Loading ---
# Load the model and tokenizer once when the app starts
@st.cache_resource
def load_my_model(model_path):
    # Define model loading parameters - these should match your training
    max_seq_length = 2048
    dtype = None # Or torch.float16, torch.bfloat16
    load_in_4bit = True

    try:
        # Ensure model_path exists
        if not os.path.exists(model_path):
             st.error(f"Model folder not found at: {model_path}. Please check the path.")
             return None, None # Return None if path is invalid

        st.spinner(f"Loading model from {model_path}...")
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_path,  # Use the full path here
            max_seq_length=max_seq_length,
            dtype=dtype,
            load_in_4bit=load_in_4bit,
        )
        # Set to evaluation mode
        model.eval()
        st.success("Model loaded successfully!")
        return model, tokenizer
    except Exception as e:
        st.error(f"Error loading model from {model_path}: {e}")
        return None, None # Return None if loading fails


# Define the path to your trained model in Google Drive
# Make sure this path is correct and your Drive is mounted correctly in the Colab environment
# when you write and run this script.
# In a real deployment, you'd likely copy the model locally or use a different storage.
model_folder_path = '/content/drive/MyDrive/my_trained_model/final_model'

# Load the model and tokenizer
model, tokenizer = load_my_model(model_folder_path)

if model is None or tokenizer is None:
    st.stop() # Stop the app if model loading failed


# --- Streamlit App Interface ---

st.title("Automated News Summarizer ")
st.markdown("### Enter a URL or paste your text below:")

input_type = st.radio("Choose the input type:", ("URL", "Text"))

# --- Data Fetching and Processing ---

def fetch_url(url):
    # Define standard browser headers
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
    }

    try:
        if not url:
            # Fetch a default URL if none provided (optional, based on your needs)
            url_to_fetch = "https://www.bing.com/news" # Example default
        else:
            url_to_fetch = url

        response = requests.get(url_to_fetch, headers=headers, timeout=10) # Add headers and timeout
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)

        soup = BeautifulSoup(response.content, 'html.parser')

        # Extracting the text content - You might need to refine this
        # to get specific article text and avoid navigation, ads, etc.
        # This simple get_text() will get everything on the page.
        text_content = soup.get_text()

        # Basic cleanup (remove excessive whitespace) - you might need more sophisticated parsing
        text_content = ' '.join(text_content.split())

        return text_content

    except requests.exceptions.RequestException as e:
        return f"Error fetching URL: {e}" # Return error message if fetching fails
    except Exception as e:
        return f"An unexpected error occurred during fetching: {e}" # Catch other potential errors


# --- Summarization Function ---

# Pass tokenizer and model as arguments, they are loaded globally by @st.cache_resource
def summarize(text_content, current_model, current_tokenizer):
    if not current_model or not current_tokenizer:
        return "Model not loaded." # Return early if model/tokenizer are not available

    # Prepare messages for inference
    messages = [
        {
            "role": "system",
            "content": """You are a helpful assistant that specialize in article summarization.
                Your task is to summarize the given text article and generate a title for it.
                If the provided article doesn't contain coherent and meaningful content,
                just return an empty response.""",
        },
        {"role": "user", "content": text_content},
    ]

    try:
        # Apply chat template and tokenize
        inputs = current_tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True, # Must add for generation
            return_tensors="pt",
        ).to("cuda") # Ensure input is on the correct device

        # Generate output
        # Use text_streamer if you want streaming output in console (not directly in Streamlit write)
        # For Streamlit, you might just want the final output.
        # text_streamer = TextStreamer(current_tokenizer, skip_prompt=True) # Only if you want console streaming

        # Generate with torch.no_grad() for inference
        with torch.no_grad():
            lora_output = current_model.generate(
                input_ids=inputs,
                # streamer=text_streamer, # Uncomment if you want console streaming
                max_new_tokens=128, # Control the length of the summary
                use_cache=True,
                temperature=1.0, # Adjust temperature for creativity vs determinism
                min_p=0.1, # Minimum probability for sampling
                do_sample=True, # Enable sampling if temperature > 0 or min_p < 1.0
                pad_token_id=current_tokenizer.eos_token_id # Good practice to set pad token
            )

        # Decode the generated token IDs
        # Decode only the new tokens generated by the model (excluding prompt)
        # Find the length of the prompt
        input_length = inputs.shape[1]
        # Decode the generated tokens starting from the end of the prompt
        decoded_output = current_tokenizer.decode(lora_output[0, input_length:], skip_special_tokens=True)

        return decoded_output

    except Exception as e:
        return f"Error during summarization: {e}" # Return error message if generation fails


# --- Streamlit Logic Based on Input Type ---

if input_type == "URL":
    url = st.text_input("Enter the URL:")
    if st.button("Summarize"):
        if url:
            with st.spinner("Fetching and summarizing..."):
                text_content = fetch_url(url)
                if "Error fetching URL" not in text_content and "An unexpected error occurred" not in text_content:
                    st.subheader("Original Text:")
                    # Display a limited preview of the text content
                    st.text_area("Preview", text_content[:1000] + "...", height=150)
                    st.subheader("Summary:")
                    # Pass model and tokenizer to summarize function
                    summary = summarize(text_content, model, tokenizer)
                    st.write(summary)
                else:
                    st.error(text_content) # Display the fetch error
        else:
            st.warning("Please enter a valid URL.")

else: # input_type == "Text"
    text_content = st.text_area("Paste your text here:", height=300)
    if st.button("Summarize"):
        if text_content:
            with st.spinner("Summarizing..."):
                st.subheader("Original Text:")
                st.write(text_content)
                st.subheader("Summary:")
                # Pass model and tokenizer to summarize function
                summary = summarize(text_content, model, tokenizer)
                st.write(summary)
        else:
            st.warning("Please enter some text.")

# --- End of Streamlit App ---

In [None]:
print("\nChecking sample from trainer.train_dataset (should be lists before batching):")
sample_from_trainer_dataset = trainer.train_dataset[0]
if isinstance(sample_from_trainer_dataset, dict):
    for key, value in sample_from_trainer_dataset.items():
        if isinstance(value, list):
             print(f"  Key: {key}, Type: {type(value)}, Length: {len(value)}")
        elif isinstance(value, torch.Tensor):
             print(f"  Key: {key}, Type: {type(value)}, Shape: {value.shape}")
        else:
             print(f"  Key: {key}, Type: {type(value)}")
else:
    print(f"  Sample type: {type(sample_from_trainer_dataset)}")


# Now run the training
trainer_stats = trainer.train()


Checking sample from trainer.train_dataset (should be lists before batching):
  Key: text, Type: <class 'str'>
  Key: summary, Type: <class 'str'>
  Key: title, Type: <class 'str'>
  Key: conversations, Type: <class 'list'>, Length: 3
  Key: input_ids, Type: <class 'list'>, Length: 1403
  Key: attention_mask, Type: <class 'list'>, Length: 1403
  Key: labels, Type: <class 'list'>, Length: 1403


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 18,949 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)


RuntimeError: PassManager::run failed

In [None]:
#@title Show final memory and time stats
import torch

# Record the initial GPU memory usage
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)

# Get the total GPU memory
max_memory = round(torch.cuda.get_device_properties(0).total_memory / 1024 / 1024 / 1024, 3) # Added this line to get total GPU memory

used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
text_content="""
Responding to geopolitical challenges
He then went on to frame the challenging times in geopolitics. “We meet at a difficult time in world affairs. Two major conflicts are underway, each with its own global repercussions. The Covid pandemic has left many in the developing world deeply devastated. Disruptions of various kinds – ranging from extreme climate events to supply chain uncertainties and financial volatility – are impacting growth and development. Debt is a serious concern, even as the world falls behind in achieving SDG targets. Technology holds great promise, as well as raising a new host of concerns. How should the members of the SCO respond to these challenges?” he asked.

Festive offer
“The answers lie in the Charter of our organisation,” he said, adding, “And I urge you to reflect on Article 1 that spells out the goals and tasks of the SCO. Let me summarise it for our collective consideration. The objective is to strengthen mutual trust, friendship and good neighbourliness. It is to develop multi-faceted cooperation, especially of a regional nature. It is to be a positive force in terms of balanced growth, integration and conflict prevention. The Charter was equally clear what the key challenges were. And these were primarily three, that the SCO was committed to combatting: one, terrorism; two, separatism; and three, extremism.”

Jaishankar stated that only by reaffirming the commitment to the Charter most sincerely that they can fully realise the benefits of cooperation and integration that it envisages. “This is not just an endeavour for our own benefit. We all realise that the world is moving towards multi-polarity. Globalisation and rebalancing are realities that cannot be denied. Cumulatively, they have created many new opportunities in terms of trade, investment, connectivity, energy flows and other forms of collaboration. There is no question that our region would benefit immensely if we take this forward. Not just that, others too would draw their own inspiration and lessons from such efforts.”
"""
messages = [
    {
        "role": "system",
        "content": """You are a helpful assistant that specialize in article summarization
            your task is to summarize given text article and generate title for it
            If the provided article doesnt contain coherent and meaningful content,
            just return empty response""",
    },
    {"role": "human", "content": text_content},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
output = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 512,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

#model saving



In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

In [None]:
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

In [None]:
# Define the final model folder path
final_model_folder_path = '/content/drive/MyDrive/my_trained_model/final_model'

# Save the model to the final model folder
model.save_pretrained(final_model_folder_path)

# Save the tokenizer to the final model folder
tokenizer.save_pretrained(final_model_folder_path)

#using model from drive


#compulsory part

In [None]:
from google.colab import drive
import os

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Define your model's path within Google Drive
model_folder_path = '/content/drive/MyDrive/my_trained_model/final_model' # Update with your model's path

# 3. Check if the folder exists
if os.path.exists(model_folder_path):
  print(f"Model folder found at: {model_folder_path}")
  # 4. List the contents of the folder (optional)
  print(os.listdir(model_folder_path))
else:
  print(f"Model folder not found at: {model_folder_path}")

# 3. Load model and tokenizer - unpack the tuple here!

max_seq_length = 2048
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained( # Unpack the tuple into model and tokenizer
    model_name=model_folder_path,  # Use the full path here
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)
# 4. Set to evaluation mode - now on the model object
model.eval()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model folder found at: /content/drive/MyDrive/my_trained_model/my_trained_model
['special_tokens_map.json', 'tokenizer.json', 'adapter_config.json', 'tokenizer_config.json', 'README.md', 'adapter_model.safetensors']


#first method

In [None]:
from unsloth import FastLanguageModel
from google.colab import drive
import os
from transformers import TextStreamer
# from unsloth.chat_templates import get_chat_template

# 1. Mount Google Drive (if not already mounted)
# try:
#     drive.mount('/content/drive')
# except ValueError:
#     pass  # Already mounted

# # 2. Define your model's path within Google Drive
# model_folder_path = '/content/drive/MyDrive/my_trained_model/my_trained_model'  # Full path with subfolder

# # 3. Load model and tokenizer - unpack the tuple here!
# max_seq_length = 2048
# dtype = None
# load_in_4bit = True
# model, tokenizer = FastLanguageModel.from_pretrained( # Unpack the tuple into model and tokenizer
#     model_name=model_folder_path,  # Use the full path here
#     max_seq_length=max_seq_length,
#     dtype=dtype,
#     load_in_4bit=load_in_4bit,
# )
# # 4. Set to evaluation mode - now on the model object
# model.eval()


# 5. Define your text content
text_content="""
Responding to geopolitical challenges
He then went on to frame the challenging times in geopolitics. “We meet at a difficult time in world affairs. Two major conflicts are underway, each with its own global repercussions. The Covid pandemic has left many in the developing world deeply devastated. Disruptions of various kinds – ranging from extreme climate events to supply chain uncertainties and financial volatility – are impacting growth and development. Debt is a serious concern, even as the world falls behind in achieving SDG targets. Technology holds great promise, as well as raising a new host of concerns. How should the members of the SCO respond to these challenges?” he asked.

Festive offer
“The answers lie in the Charter of our organisation,” he said, adding, “And I urge you to reflect on Article 1 that spells out the goals and tasks of the SCO. Let me summarise it for our collective consideration. The objective is to strengthen mutual trust, friendship and good neighbourliness. It is to develop multi-faceted cooperation, especially of a regional nature. It is to be a positive force in terms of balanced growth, integration and conflict prevention. The Charter was equally clear what the key challenges were. And these were primarily three, that the SCO was committed to combatting: one, terrorism; two, separatism; and three, extremism.”

Jaishankar stated that only by reaffirming the commitment to the Charter most sincerely that they can fully realise the benefits of cooperation and integration that it envisages. “This is not just an endeavour for our own benefit. We all realise that the world is moving towards multi-polarity. Globalisation and rebalancing are realities that cannot be denied. Cumulatively, they have created many new opportunities in terms of trade, investment, connectivity, energy flows and other forms of collaboration. There is no question that our region would benefit immensely if we take this forward. Not just that, others too would draw their own inspiration and lessons from such efforts.”
"""

# 6. Prepare messages for inference
messages = [
    {
        "role": "system",
        "content": """You are a helpful assistant that specialize in article summarization
            your task is to summarize given text article and generate title for it
            If the provided article doesnt contain coherent and meaningful content,
            just return empty response""",
    },
    {"role": "human", "content": text_content},
]

# 7. Apply chat template
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")

# 8. Generate output
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
lora_output = model.generate(
    input_ids=inputs,
    streamer=text_streamer,
    max_new_tokens=128,
    use_cache=True,
    temperature=1.5,
    min_p=0.1,
)

# 9. Print decoded output
print(tokenizer.decode(lora_output[0]))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Article Title - 
**SCO members can respond to geopolitical challenges and develop cooperation by reaffirming commitment to the Charter.<|eot_id|>
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

You are a helpful assistant that specialize in article summarization
            your task is to summarize given text article and generate title for i

#second method

In [None]:
from unsloth import FastLanguageModel

model = FastLanguageModel.from_pretrained('/content/drive/MyDrive/my_trained_model/my_trained_model')

max_seq_length = 2048
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "/content/drive/MyDrive/my_trained_model/my_trained_model",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model)
text_content="""
Responding to geopolitical challenges
He then went on to frame the challenging times in geopolitics. “We meet at a difficult time in world affairs. Two major conflicts are underway, each with its own global repercussions. The Covid pandemic has left many in the developing world deeply devastated. Disruptions of various kinds – ranging from extreme climate events to supply chain uncertainties and financial volatility – are impacting growth and development. Debt is a serious concern, even as the world falls behind in achieving SDG targets. Technology holds great promise, as well as raising a new host of concerns. How should the members of the SCO respond to these challenges?” he asked.

Festive offer
“The answers lie in the Charter of our organisation,” he said, adding, “And I urge you to reflect on Article 1 that spells out the goals and tasks of the SCO. Let me summarise it for our collective consideration. The objective is to strengthen mutual trust, friendship and good neighbourliness. It is to develop multi-faceted cooperation, especially of a regional nature. It is to be a positive force in terms of balanced growth, integration and conflict prevention. The Charter was equally clear what the key challenges were. And these were primarily three, that the SCO was committed to combatting: one, terrorism; two, separatism; and three, extremism.”

Jaishankar stated that only by reaffirming the commitment to the Charter most sincerely that they can fully realise the benefits of cooperation and integration that it envisages. “This is not just an endeavour for our own benefit. We all realise that the world is moving towards multi-polarity. Globalisation and rebalancing are realities that cannot be denied. Cumulatively, they have created many new opportunities in terms of trade, investment, connectivity, energy flows and other forms of collaboration. There is no question that our region would benefit immensely if we take this forward. Not just that, others too would draw their own inspiration and lessons from such efforts.”
"""
messages = [
    {
        "role": "system",
        "content": """You are a helpful assistant that specialize in article summarization
            your task is to summarize given text article and generate title for it
            If the provided article doesnt contain coherent and meaningful content,
            just return empty response""",
    },
    {"role": "human", "content": text_content},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
lora_output = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
How Should SCO Members Respond to Geopolitical Challenges? 
Festive offer: Jaishankar urged fellow S

#code part for streamlit app

In [None]:
! pip  -q install streamlit
!pip -q install --upgrade requests
!pip -q install beautifulsoup4
!pip -q install unsloth transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m92.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m125.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m218.5/218.5 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
%%writefile app.py
import os
import streamlit as st
import requests
from bs4 import BeautifulSoup
from unsloth import FastLanguageModel, get_chat_template
from transformers import TextStreamer

st.title("Automated News Summarizer ")
st.markdown("### Enter a URL or paste your text below:")

input_type = st.radio("Choose the input type:", ("URL", "Text"))

def fetch_url(url):
    if(url):
        response = requests.get(url)
    else:
        response = requests.get("https://www.bing.com/news")

    # Checking if the request was successful
    if response.status_code == 200:
      soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting the text content
    text_content = soup.get_text()
    return text_content

def summarize(text_content,tokenizer):
    #1
    messages = [
    {
        "role": "system",
        "content": """You are a helpful assistant that specialize in article summarization
            your task is to summarize given text article and generate title for it
            If the provided article doesnt contain coherent and meaningful content,
            just return empty response""",
    },
    {"role": "human", "content": text_content},
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True, # Must add for generation
        return_tensors = "pt",
    ).to("cuda")

    text_streamer = TextStreamer(tokenizer, skip_prompt = True)
    lora_output = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
                      use_cache = True, temperature = 1.5, min_p = 0.1)
    return lora_output[0]




if input_type == "URL":
    url = st.text_input("Enter the URL:")
    text_content = fetch_url(url)
    if st.button("Summarize"):
        if url:
            with st.spinner("Fetching and summarizing..."):
                if "Error" not in text_content:
                    st.subheader("Original Text:")
                    st.write(text_content)
                    st.subheader("Summary:")
                    summary = summarize(text_content,tokenizer)
                    st.write(summary)
                else:
                    st.error(text_content)
        else:
            st.warning("Please enter a valid URL.")

else:
    text_content = st.text_area("Paste your text here:")
    if st.button("Summarize"):
        if text_content:
            with st.spinner("Summarizing..."):
                st.subheader("Original Text:")
                st.write(text_content)
                st.subheader("Summary:")
                summary = summarize(text_content)
                st.write(summary)
        else:
            st.warning("Please enter some text.")

Overwriting app.py


In [None]:
!wget -q -O - ipv4.icanhazip.com


34.87.7.137


In [None]:
! streamlit run app.py & npx localtunnel --port 8501

[1G[0K⠙
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠹[1G[0K⠸[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.87.7.137:8501[0m
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0Kyour url is: https://green-squids-act.loca.lt
[34m  Stopping...[0m
^C
