In [1]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

CUDA available: True
GPU: Tesla T4


In [2]:
!pip install unsloth trl peft accelerate bitsandbytes

Collecting unsloth
  Downloading unsloth-2026.1.4-py3-none-any.whl.metadata (66 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/66.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trl
  Downloading trl-0.27.2-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.49.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting unsloth_zoo>=2026.1.4 (from unsloth)
  Downloading unsloth_zoo-2026.1.4-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth)
  Downloading tyro-1.0.5-py3-none-any.whl.metadata (12 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.34-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.2 kB)
Collecting datasets!=4.0.*,!=4.1.0,<4.4.0,>=3.4.1 (from unsloth)
  Downloading datasets-4.3.0-py3-none-any.whl.metadata (18 kB)
Collecting hf_transfer (

In [1]:
import os
os.makedirs('data', exist_ok=True)

In [2]:
from unsloth import FastLanguageModel
import torch

model_name = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"

max_seq_length = 2048  # Choose sequence length
dtype = None  # Auto detection

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=True,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.1.4: Fast Llama patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.6.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.34. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

In [3]:
!pip install feedparser beautifulsoup4 requests pandas

Collecting feedparser
  Downloading feedparser-6.0.12-py3-none-any.whl.metadata (2.7 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading feedparser-6.0.12-py3-none-any.whl (81 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.5/81.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6046 sha256=ef235ac0de58ef5630cf94b041cc5a306c27d3eca7e6b322c4e954e274eba1f8
  Stored in directory: /root/.cache/pip/wheels/03/f5/1a/23761066dac1d0e8e683e5fdb27e12de53209d05a4a37e6246
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser
Successfully installed feedparser-6.0.12 sgmllib3k-1.0.0


In [4]:
import feedparser
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

class NewsScraper:
    def __init__(self, rss_url):
        self.rss_url = rss_url
        self.news_data = []

    def fetch_feeds(self):
        print(f"[*] Fetching feeds from: {self.rss_url}")
        feed = feedparser.parse(self.rss_url)

        for entry in feed.entries:
            item = {
                'title': entry.title,
                'link': entry.link,
                'published': entry.published,
                'summary': entry.summary
            }
            self.news_data.append(item)
        return self.news_data

    def get_full_content(self, url):
        """ดึงเนื้อหาข่าวฉบับเต็มจาก URL"""
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            # ปรับแต่งตามโครงสร้าง HTML ของแต่ละเว็บ (ตัวอย่างนี้เป็นแบบทั่วไป)
            # ส่วนใหญ่เนื้อหาข่าวจะอยู่ในแท็ก <article> หรือ <div> ที่มี class เกี่ยวกับ content
            paragraphs = soup.find_all('p')
            full_text = " ".join([p.get_text() for p in paragraphs])
            return full_text.strip()
        except Exception as e:
            print(f"[!] Error fetching {url}: {e}")
            return ""

    def run(self, limit=5):
        feeds = self.fetch_feeds()
        results = []

        # จำกัดจำนวนเพื่อทดสอบก่อน
        for i, item in enumerate(feeds[:limit]):
            print(f"[+] Scraping ({i+1}/{limit}): {item['title']}")
            content = self.get_full_content(item['link'])
            item['full_content'] = content
            results.append(item)
            time.sleep(1) # ป้องกันการโดนแบน

        return pd.DataFrame(results)

# --- วิธีใช้งาน ---
# แก้ไขในส่วน if __name__ == "__main__":
if __name__ == "__main__":
    URL = "http://feeds.bbci.co.uk/news/world/rss.xml"
    scraper = NewsScraper(URL)
    df = scraper.run(limit=10)

    if df.empty:
        print("[!] ไม่พบข้อมูลข่าว! กรุณาเช็ก URL หรือการเชื่อมต่ออินเทอร์เน็ต")
    elif 'title' not in df.columns:
        print(f"[!] พบข้อมูลแต่คอลัมน์ไม่ถูกต้อง คอลัมน์ที่มีคือ: {df.columns.tolist()}")
    else:
        print("\n--- ผลลัพธ์การดึงข้อมูล ---")
        print(df[['title', 'published']].head())
        df.to_csv('data/raw_news.csv', index=False, encoding='utf-8-sig')

[*] Fetching feeds from: http://feeds.bbci.co.uk/news/world/rss.xml
[+] Scraping (1/10): Thousands without power in freezing Ukraine as renewed Russian strikes continue
[+] Scraping (2/10): Melinda French Gates says ex-husband Bill has questions to answer over Epstein
[+] Scraping (3/10): Andrew and Epstein asked exotic dancer for 'sex acts,' legal letter claims
[+] Scraping (4/10): Israeli strikes kill 20 in Gaza, hospitals say, after soldier wounded by gunfire
[+] Scraping (5/10): Son of Norway's crown princess holds back tears giving evidence at rape trial
[+] Scraping (6/10): German activist jailed in Hungary for attacks at Nazi rally
[+] Scraping (7/10): Fears of new arms race as US-Russia nuclear weapons treaty due to expire
[+] Scraping (8/10): Boy, 14, stabbed art teacher because he had 'too much hatred', he tells police
[+] Scraping (9/10): Greece blames smugglers over migrant deaths but early accounts have been questioned before
[+] Scraping (10/10): Leader of South Africa's 

In [5]:
from unsloth import FastLanguageModel
import json
# สั่งให้โมเดลอยู่ในโหมด "ใช้งาน" (Inference)
FastLanguageModel.for_inference(model)

def analyze_with_unsloth(text):
    # จำกัดความยาวข้อความไม่ให้เกิน Context Window
    input_text = text[:1500]

    # ออกแบบ Prompt ให้กระชับ (Unsloth จะได้ทำงานเร็วขึ้น)
    messages = [
        {
            "role": "system",
            "content": f"""You are an expert International News Analyst.
            Your task is to:
            1. Extract names of public figures.
            2. Especially focus on this group: [{focus_str}].
            3. Analyze the overall sentiment of the news (Positive, Negative, or Neutral).

            Respond only in JSON format."""
        },
        {
            "role": "user",
            "content": f"News Content: {input_text}\n\nReturn JSON in this structure: {{\"persons\": [], \"sentiment\": \"\"}}"
        }
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=256,
        use_cache=True,
        temperature=0.1, # ปรับให้นิ่งที่สุด (ไม่ให้โมเดลไม่รู้)
    )

    decoded_output = tokenizer.batch_decode(outputs)

    # ดึงเฉพาะส่วนที่เป็น JSON ออกมาจากคำตอบ
    full_response = decoded_output[0].split("assistant")[-1].strip()

    try:
        # พยายามทำความสะอาด string เผื่อโมเดลแถมคำอื่นมา
        json_str = full_response[full_response.find("{"):full_response.rfind("}")+1]
        return json.loads(json_str)
    except:
        return {"persons": [], "sentiment": "Error Parsing"}

In [6]:
# Define focus_str before it's used in analyze_with_unsloth
# This is based on a previous focus group definition, providing an initial set of names.
initial_focus_group_list = ['Vladimir Putin', 'Melinda French Gates', 'Bill Gates', 'Jeffrey Epstein','Elon Musk']
focus_str = ", ".join(initial_focus_group_list)

analysis_results = []

# วนลูปให้ AI อ่านข่าวทีละข่าว
for index, row in df.iterrows():
    print(f"[*] AI กำลังอ่านข่าวที่ {index+1}...")
    # เรียกใช้ฟังก์ชันวิเคราะห์ที่เราเขียนไว้ก่อนหน้านี้
    result = analyze_with_unsloth(row['full_content'])
    analysis_results.append(result)

# แปลงผลวิเคราะห์เป็นตาราง
df_analysis = pd.DataFrame(analysis_results)

# รวมตารางข่าว (Raw) เข้ากับผลวิเคราะห์ (AI)
final_df = pd.concat([df.reset_index(drop=True), df_analysis.reset_index(drop=True)], axis=1)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


[*] AI กำลังอ่านข่าวที่ 1...
[*] AI กำลังอ่านข่าวที่ 2...
[*] AI กำลังอ่านข่าวที่ 3...
[*] AI กำลังอ่านข่าวที่ 4...
[*] AI กำลังอ่านข่าวที่ 5...
[*] AI กำลังอ่านข่าวที่ 6...
[*] AI กำลังอ่านข่าวที่ 7...
[*] AI กำลังอ่านข่าวที่ 8...
[*] AI กำลังอ่านข่าวที่ 9...
[*] AI กำลังอ่านข่าวที่ 10...


In [7]:
import plotly.express as px
from collections import Counter

# 1. เตรียมข้อมูลสำหรับนับจำนวนคนที่ถูกพูดถึง (Top Persons)
all_persons = []
for p_list in final_df['persons']:
    if isinstance(p_list, list):
        for person_item in p_list:
            if isinstance(person_item, dict):
                # Try to extract name, otherwise use title or a string representation
                if 'name' in person_item:
                    all_persons.append(person_item['name'])
                elif 'title' in person_item:
                    all_persons.append(person_item['title'])
                else:
                    all_persons.append(str(person_item)) # Fallback
            elif isinstance(person_item, str):
                all_persons.append(person_item)

person_counts = Counter(all_persons).most_common(10)
df_persons = pd.DataFrame(person_counts, columns=['Person', 'Count'])

# 2. สร้างกราฟแท่งแสดงบุคคลที่ถูกพูดถึงบ่อยสุด
fig_persons = px.bar(df_persons, x='Count', y='Person', orientation='h',
             title='Top 10 Public Persons in News',
             color='Count', color_continuous_scale='Viridis')
fig_persons.show()

# 3. สร้างกราฟวงกลมแสดงภาพรวม Sentiment
sentiment_counts = final_df['sentiment'].value_counts().reset_index()
fig_sentiment = px.pie(sentiment_counts, values='count', names='sentiment',
             title='Overall News Sentiment',
             color='sentiment',
             color_discrete_map={'Positive':'#2ecc71', 'Negative':'#e74c3c', 'Neutral':'#f1c40f'})
fig_sentiment.show()

In [None]:
# # เซฟไฟล์ที่วิเคราะห์เสร็จแล้ว (ไฟล์นี้แหละที่จะเอาไปโชว์)
# final_df.to_csv('data/analyzed_news_final.csv', index=False, encoding='utf-8-sig')
# print("[*] วิเคราะห์เสร็จสิ้น! ไฟล์สมบูรณ์อยู่ที่ data/analyzed_news_final.csv")
# print("คอลัมน์ที่มีตอนนี้:", final_df.columns.tolist())

In [8]:
extracted_names = []
for p_list in final_df['persons']:
    if isinstance(p_list, list):
        for person_item in p_list:
            if isinstance(person_item, dict) and 'name' in person_item:
                extracted_names.append(person_item['name'])
            elif isinstance(person_item, str):
                extracted_names.append(person_item)

print("First 20 extracted names:")
print(extracted_names[:20])
print(f"Total unique names extracted: {len(set(extracted_names))}")

First 20 extracted names:
['Volodymyr Zelensky', 'Donald Trump', 'Vladimir Putin', 'Melinda French Gates', 'Bill Gates', 'Jeffrey Epstein', 'Elon Musk', 'Melinda French Gates', 'Bill Gates', 'Jeffrey Epstein', 'Vladimir Putin', 'Melinda French Gates', 'Bill Gates', 'Jeffrey Epstein', 'Elon Musk', 'Andrew Mountbatten-Windsor', 'Abu Mohammed Haboush', 'Vladimir Putin', 'Melinda French Gates', 'Bill Gates']
Total unique names extracted: 19


In [9]:
# สมมติกลุ่มบุคคลที่เราสนใจ (Focus Group)
focus_group = ['Vladimir Putin', 'Melinda French Gates', 'Bill Gates', 'Jeffrey Epstein']

def filter_focus_group(person_list):
    # วนลูปเช็กว่าคนในข่าว อยู่ใน focus_group ของเราไหม
    return [p for p in person_list if any(name in p for name in focus_group)]

# สร้างคอลัมน์ใหม่ที่เก็บเฉพาะคนที่เราสนใจ
final_df['focus_persons'] = final_df['persons'].apply(filter_focus_group)

# กรองเอาเฉพาะข่าวที่มีคนใน Focus Group ปรากฏตัวจริงๆ
df_focus = final_df[final_df['focus_persons'].map(len) > 0].copy()

print(f"พบข่าวที่เกี่ยวข้องกับ Focus Group ทั้งหมด {len(df_focus)} ข่าว")

พบข่าวที่เกี่ยวข้องกับ Focus Group ทั้งหมด 6 ข่าว


In [10]:
# เตรียมข้อมูลสำหรับ Stacked Bar Chart
focus_data = []

# Standardize sentiment values in df_focus for consistent plotting
df_focus['Sentiment_Cleaned'] = df_focus['sentiment'].str.capitalize().head(5)

for _, row in df_focus.iterrows():
    for p in row['focus_persons']:
        # Use the cleaned sentiment column
        focus_data.append({'Person': p, 'Sentiment': row['Sentiment_Cleaned']})

df_plot_focus = pd.DataFrame(focus_data)

# สร้างกราฟ Stacked Bar Chart
import plotly.express as px

fig = px.bar(df_plot_focus, x="Person", color="Sentiment",
             title="Sentiment Analysis of Focus Group Persons",
             color_discrete_map={'Positive':'#2ecc71', 'Negative':'#e74c3c', 'Neutral':'#f1c40f'},
             barmode="group") # ใช้ group เพื่อเปรียบเทียบแท่งบวก/ลบข้างกัน
fig.show()

In [11]:
import numpy as np

# Create a new column 'sentiment_clean'
final_df['sentiment_clean'] = final_df['sentiment'].apply(lambda x: 'Unknown' if x == 'Error Parsing' else str(x).capitalize())

print("Sentiment column cleaned and 'sentiment_clean' column created.")
print(final_df[['sentiment', 'sentiment_clean']].head())

Sentiment column cleaned and 'sentiment_clean' column created.
  sentiment sentiment_clean
0  Negative        Negative
1  Negative        Negative
2  Negative        Negative
3  Negative        Negative
4  Negative        Negative


In [12]:
import ast

def clean_persons_column(persons_str):
    if isinstance(persons_str, list):
        return persons_str # Already a list
    try:
        # Safely evaluate the string to a Python literal
        persons_list = ast.literal_eval(persons_str)
        if isinstance(persons_list, list):
            # Ensure all elements in the list are strings
            return [str(p) for p in persons_list]
    except (ValueError, SyntaxError):
        pass # If conversion fails, return an empty list
    return []

# Apply the cleaning function to create 'persons_clean'
final_df['persons_clean'] = final_df['persons'].apply(clean_persons_column)

print("'persons' column cleaned and 'persons_clean' column created.")
print(final_df[['persons', 'persons_clean']].head())

'persons' column cleaned and 'persons_clean' column created.
                                             persons  \
0  [Volodymyr Zelensky, Donald Trump, Vladimir Pu...   
1  [Melinda French Gates, Bill Gates, Jeffrey Eps...   
2  [Vladimir Putin, Melinda French Gates, Bill Ga...   
3  [Abu Mohammed Haboush, Vladimir Putin, Melinda...   
4           [Marius Borg Høiby, Mette-Marit, Haakon]   

                                       persons_clean  
0  [Volodymyr Zelensky, Donald Trump, Vladimir Pu...  
1  [Melinda French Gates, Bill Gates, Jeffrey Eps...  
2  [Vladimir Putin, Melinda French Gates, Bill Ga...  
3  [Abu Mohammed Haboush, Vladimir Putin, Melinda...  
4           [Marius Borg Høiby, Mette-Marit, Haakon]  


In [13]:
final_df.to_csv('global_news_analysis.csv', index=False, encoding='utf-8-sig')
print("[*] Final DataFrame with cleaned sentiment and persons columns saved to 'global_news_analysis.csv'")

[*] Final DataFrame with cleaned sentiment and persons columns saved to 'global_news_analysis.csv'


In [14]:
!pip install streamlit plotly pandas

Collecting streamlit
  Downloading streamlit-1.54.0-py3-none-any.whl.metadata (9.8 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.54.0-py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m60.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m108.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.54.0


In [21]:
!pip install json_repair

Collecting json_repair
  Downloading json_repair-0.56.0-py3-none-any.whl.metadata (14 kB)
Downloading json_repair-0.56.0-py3-none-any.whl (38 kB)
Installing collected packages: json_repair
Successfully installed json_repair-0.56.0


In [35]:
%%writefile app.py
import streamlit as st
import pandas as pd
import plotly.express as px
import torch
import json_repair
from bs4 import BeautifulSoup
from unsloth import FastLanguageModel

# --- 1. ตั้งค่าและโหลดโมเดลจากโฟลเดอร์ที่ Save ไว้ ---
st.set_page_config(page_title="AI XML Analyst", layout="wide")

@st.cache_resource
def load_local_model():
    # โหลดจากโฟลเดอร์ที่เรา Save ไว้ (mysaved_model)
    # หมายเหตุ: ต้องแน่ใจว่าโฟลเดอร์ mysaved_model อยู่ที่เดียวกับ app.py
    model_path = "mysaved_model"
    max_seq_length = 2048
    dtype = None
    load_in_4bit = True

    try:
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = model_path, # ชี้ไปที่โฟลเดอร์
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
        )
        FastLanguageModel.for_inference(model)
        return model, tokenizer
    except Exception as e:
        # เผื่อหาไฟล์ไม่เจอ ให้โหลดจากเน็ตแทน (Fallback)
        st.warning(f"หาโฟลเดอร์โมเดลไม่เจอ ({e}) กำลังโหลดจาก Unsloth แทน...")
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = "unsloth/Llama-3.2-3B-Instruct",
            max_seq_length = max_seq_length,
            dtype = dtype,
            load_in_4bit = load_in_4bit,
        )
        FastLanguageModel.for_inference(model)
        return model, tokenizer

# --- 2. ฟังก์ชันแกะ XML Text และวิเคราะห์ ---
def process_xml_text(xml_string, model, tokenizer):
    # ใช้ BeautifulSoup แกะ XML string
    soup = BeautifulSoup(xml_string, 'xml')
    items = soup.find_all('item')

    if not items:
        # เผื่อกรณี user วางมาแค่ text ธรรมดา ไม่มี tag item
        # ให้ลองหาจาก root หรือถือว่าเป็น item เดียว
        if soup.find('title'):
            items = [soup]
        else:
            return []

    results = []

    # Progress Bar
    progress_bar = st.progress(0)
    status_text = st.empty()
    total = len(items)

    for i, item in enumerate(items):
        # ดึงข้อมูลจาก Tag
        title = item.find('title').get_text() if item.find('title') else "No Title"
        description = item.find('description').get_text() if item.find('description') else ""
        link = item.find('link').get_text() if item.find('link') else ""

        # รวมข้อความเพื่อส่งให้ AI
        full_text = f"Title: {title}\nDescription: {description}"
        input_text = full_text[:1500]

        status_text.text(f"⏳ กำลังวิเคราะห์: {title[:30]}...")

        # --- AI Inference Part ---
        prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
        You are a News Analyst. Extract public figures and sentiment.
        Output JSON only: {{"persons": ["Name1", "Name2"], "sentiment": "Positive/Negative/Neutral"}}
        <|eot_id|><|start_header_id|>user<|end_header_id|>
        News: {input_text}
        <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

        inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

        outputs = model.generate(
            **inputs,
            max_new_tokens=128,
            use_cache=True,
            temperature=0.1
        )

        response = tokenizer.batch_decode(outputs)[0].split("assistant")[-1].strip()

        # Parse JSON
        try:
            data = json_repair.loads(response)
            sentiment = str(data.get('sentiment', 'Neutral'))
            persons = data.get('persons', [])
            if isinstance(persons, str): persons = [persons]
            persons = [str(p) for p in persons if isinstance(p, (str, int))]
        except:
            sentiment = "Error"
            persons = []

        results.append({
            "title": title,
            "sentiment_clean": sentiment,
            "persons_clean": persons,
            "link": link
        })

        progress_bar.progress((i + 1) / total)

    status_text.text("✅ วิเคราะห์เสร็จสิ้น!")
    progress_bar.empty()
    return pd.DataFrame(results)

# --- 3. UI หน้าจอ ---
st.title("🤖 AI XML News Analyzer")
st.markdown("วางโค้ด XML (`<item>...</item>`) ลงในช่องด้านล่างเพื่อวิเคราะห์")

# โหลดโมเดล
with st.spinner("กำลังโหลดโมเดล..."):
    try:
        model, tokenizer = load_local_model()
        st.success("Model Loaded Successfully! 🚀")
    except Exception as e:
        st.error(f"Error loading model: {e}")
        st.stop()

# Input Text Area (รับ XML)
xml_input = st.text_area("วาง XML Code ที่นี่:", height=300, placeholder="<item>\n<title>Example News</title>\n...</item>")

if st.button("🚀 เริ่มวิเคราะห์"):
    if not xml_input.strip():
        st.warning("กรุณาวางโค้ด XML ก่อนครับ")
    else:
        df = process_xml_text(xml_input, model, tokenizer)

        if not df.empty:
            st.session_state['data_xml'] = df
        else:
            st.error("ไม่พบข้อมูลใน XML หรือรูปแบบไม่ถูกต้อง")

# --- 4. แสดงผล ---
if 'data_xml' in st.session_state:
    df = st.session_state['data_xml']
    st.divider()

    # Metrics
    c1, c2, c3 = st.columns(3)
    c1.metric("จำนวนข่าว", len(df))
    c2.metric("ข่าวบวก", len(df[df['sentiment_clean']=='Positive']))
    c3.metric("ข่าวลบ", len(df[df['sentiment_clean']=='Negative']))

    # Charts
    col_chart1, col_chart2 = st.columns(2)

    with col_chart1:
        st.subheader("Sentiment Analysis")
        fig_pie = px.pie(df, names='sentiment_clean', color='sentiment_clean',
                     color_discrete_map={'Positive':'#2ecc71', 'Negative':'#e74c3c', 'Neutral':'#f1c40f'})
        st.plotly_chart(fig_pie, use_container_width=True)

    with col_chart2:
        st.subheader("Top Figures")
        all_persons = []
        for p_list in df['persons_clean']:
            all_persons.extend(p_list)

        if all_persons:
            from collections import Counter
            counts = Counter(all_persons).most_common(10)
            df_p = pd.DataFrame(counts, columns=['Name', 'Count'])
            st.plotly_chart(px.bar(df_p, x='Count', y='Name', orientation='h'), use_container_width=True)

    # Table
    st.subheader("ผลลัพธ์การวิเคราะห์")
    st.dataframe(df[['title', 'sentiment_clean', 'persons_clean']])

Overwriting app.py


In [27]:
!curl ipv4.icanhazip.com

34.125.67.43


In [36]:
import subprocess

# Run Streamlit in the background, but capture its output to a log file
process = subprocess.Popen(['streamlit', 'run', 'app.py'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Start localtunnel separately
# Make sure to install localtunnel if not already done (!npm install localtunnel)
!npx localtunnel --port 8501

print("Streamlit process started. Check the output above for localtunnel URL. If there's an error, you might need to stop the kernel and check Streamlit logs.")

# To see Streamlit's logs later if it crashes, you can try:
# print("\n--- Streamlit Output ---")
# stdout, stderr = process.communicate(timeout=10) # Adjust timeout as needed
# print(stdout)
# print(stderr)


[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0Kyour url is: https://icy-poets-beg.loca.lt
^C
Streamlit process started. Check the output above for localtunnel URL. If there's an error, you might need to stop the kernel and check Streamlit logs.


In [29]:
# รัน Cell นี้ใน Colab เพื่อ Save Model เก็บไว้ก่อน
# (สมมติว่าตัวแปรโมเดลของคุณชื่อ model และ tokenizer)

model.save_pretrained("mysaved_model")
tokenizer.save_pretrained("mysaved_model")

print("✅ บันทึกโมเดลเรียบร้อยแล้วที่โฟลเดอร์ 'mysaved_model'")

✅ บันทึกโมเดลเรียบร้อยแล้วที่โฟลเดอร์ 'mysaved_model'


In [32]:
import os
from google.colab import files

# 1. ตั้งชื่อโฟลเดอร์ที่จะเก็บโมเดล
output_dir = "mysaved_model"

# 2. สั่ง Save โมเดลและ Tokenizer (จากตัวแปร model ที่คุณมีอยู่)
print(f"💾 กำลังบันทึกโมเดลลงโฟลเดอร์ {output_dir}...")
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

# 3. บีบอัดไฟล์เป็น .zip (เพราะ Colab โหลดโฟลเดอร์โดยตรงไม่ได้)
print("📦 กำลังบีบอัดไฟล์เป็น .zip...")
os.system(f"zip -r {output_dir}.zip {output_dir}")

# 4. สั่งดาวน์โหลดลงเครื่องคอมพิวเตอร์
print("⬇️ กำลังดาวน์โหลด...")
files.download(f"{output_dir}.zip")

💾 กำลังบันทึกโมเดลลงโฟลเดอร์ mysaved_model...
📦 กำลังบีบอัดไฟล์เป็น .zip...
⬇️ กำลังดาวน์โหลด...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>