<a href="https://colab.research.google.com/github/sarveshp1710/RAP-Hackathon/blob/main/Face_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install streamlit duckduckgo-search "newspaper3k==0.2.8" "lxml==4.9.4" transformers sentencepiece torch pyngrok -q

In [2]:
!pip install streamlit googlesearch-python newspaper3k transformers sentencepiece torch



In [3]:
!pip install streamlit googlesearch-python newspaper3k transformers sentencepiece torch thefuzz python-Levenshtein



In [19]:
%%writefile app.py
# Smart News Chatbot with a Conversational UI
# This script creates a web application using Streamlit that allows users to
# get the latest news about companies, summarized by an AI model, in a chat-like interface.

# --- 1. Installation of Required Libraries ---
# On your local machine, run the following command in your terminal:
# pip install streamlit googlesearch-python newspaper3k transformers sentencepiece torch thefuzz python-Levenshtein

# --- 2. Main Application Code ---
# Save the following code as a Python file (e.g., `app.py`).

import streamlit as st
from googlesearch import search
from newspaper import Article, ArticleException
from transformers import pipeline
import re
import os
import time
from urllib.error import HTTPError
from thefuzz import process, fuzz

# --- Model Loading ---
@st.cache_resource
def load_model():
    """Load a fast and efficient summarization model from Hugging Face."""
    # Using sshleifer/distilbart-cnn-6-6, a smaller and faster model for better performance.
    model_name = "sshleifer/distilbart-cnn-6-6"
    try:
        summarizer = pipeline("summarization", model=model_name)
        return summarizer
    except Exception as e:
        st.error(f"Error loading AI model: {e}. This can happen on the first run. Please ensure you have an internet connection.")
        return None

summarizer = load_model()

# --- Pre-defined list for typo correction ---
COMMON_COMPANIES = [
    "Microsoft", "Apple", "Google", "Amazon", "Facebook", "Meta", "Tesla", "NVIDIA", "Netflix",
    "Intel", "AMD", "IBM", "Oracle", "Salesforce", "Adobe", "Twitter", "Uber", "Lyft", "Airbnb",
    "Goldman Sachs", "JPMorgan Chase", "Morgan Stanley", "Walmart", "Target", "Costco",
    "Home Depot", "Boeing", "Lockheed Martin", "General Electric", "Ford", "General Motors",
    "Texas Instruments", "Workato", "Allos"
]

# --- Helper Functions ---

def is_greeting(text, threshold=90):
    """More robustly checks if the text is a greeting using fuzzy matching."""
    cleaned_text = re.sub(r'[^\w\s]', '', text.lower()).strip()
    greetings = ["hi", "hello", "hey", "how are you", "how r u", "whats up", "sup"]
    # Check if the fuzzy score is high enough for any of the greetings
    return any(fuzz.ratio(cleaned_text, g) >= threshold for g in greetings)

def find_closest_match(company_name, choices, threshold=75):
    """Finds the best match for a company name from a list of choices."""
    best_match = process.extractOne(company_name, choices)
    if best_match and best_match[1] >= threshold:
        return best_match[0]
    return None

def find_news_urls(company_name, max_results=10):
    """Searches for news articles about a company and filters for relevant, unique URLs."""
    try:
        query = f'"{company_name}" company news financial stock'
        urls = list(search(query, num_results=max_results, lang="en"))
        unique_urls, seen_urls = [], set()
        url_blacklist = ['/press-release/', '/docs/', '/learn/', '/events/', '/careers/', '/about/']
        for url in urls:
            if url in seen_urls or any(keyword in url for keyword in url_blacklist):
                continue
            seen_urls.add(url)
            unique_urls.append(url)
        return unique_urls
    except HTTPError as e:
        if e.code == 429: return "Search failed due to rate-limiting. Please try again in a few moments."
        return f"Could not perform search. An HTTP error occurred: {e}"
    except Exception as e:
        return f"An unknown error occurred during search: {e}"

def extract_article_text(url):
    """Downloads an article from a URL and extracts its main text and title."""
    try:
        article = Article(url)
        article.download()
        article.parse()
        if len(article.text) < 300: return None, None
        return article.title, article.text
    except (ArticleException, Exception):
        return None, None

def generate_summary(text, max_chunk_length=2048):
    """Generates a high-quality summary for the given text using the DistilBART model."""
    if not text or not summarizer: return "Could not generate summary."
    try:
        text_to_summarize = text[:max_chunk_length]
        summary_list = summarizer(text_to_summarize, max_length=140, min_length=40, do_sample=False)
        return summary_list[0]['summary_text']
    except Exception:
        return "(Could not summarize this article due to its format.)"

def get_news_and_summarize(company_name, max_summaries=3): # Reduced summaries per company for multi-search
    """The main workflow to get and summarize news."""
    with st.spinner(f"Searching Google for news about '{company_name}'..."):
        urls = find_news_urls(company_name)
        if isinstance(urls, str): return urls
        if not urls: return f"Sorry, I couldn't find any recent news articles for '{company_name}'."

        response_parts = [f"Here are the top news summaries for **{company_name}**:"]
        summaries_count = 0
        for url in urls:
            if summaries_count >= max_summaries: break
            title, text = extract_article_text(url)
            if title and text:
                summary = generate_summary(text)
                response_parts.append(f"### {title}\n{summary}\n\n[Read full article]({url})")
                summaries_count += 1

        if summaries_count == 0: return f"I found links for '{company_name}', but couldn't extract high-quality news articles from them."
        return "\n\n---\n\n".join(response_parts)

# --- Streamlit Conversational UI ---

st.set_page_config(layout="centered", page_title="Smart News Chatbot")
st.title("🤖 Smart News Chatbot")

# Initialize session state variables
if "messages" not in st.session_state:
    st.session_state.messages = [{"role": "assistant", "content": "Hi there! Select a mode and let's chat."}]
if "suggestion" not in st.session_state:
    st.session_state.suggestion = None
if "active_mode" not in st.session_state:
    st.session_state.active_mode = "General Chat"

# --- UI for Mode Toggle ---
col1, col2, col3 = st.columns([1,2,1])
with col2:
    st.radio(
        "Select Mode:",
        ["General Chat", "Company News"],
        key="active_mode",
        horizontal=True,
        label_visibility="collapsed"
    )

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"], unsafe_allow_html=True)

# Main chat input and logic loop
if prompt := st.chat_input("Ask about a company or say hi!"):
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    with st.chat_message("assistant"):
        response = ""
        start_time = time.time()

        # --- Logic for when Company News Mode is ON ---
        if st.session_state.active_mode == "Company News":
            if st.session_state.suggestion:
                if prompt.lower().strip() in ["yes", "y", "yep", "ok"]:
                    company_to_search = st.session_state.suggestion
                    response = get_news_and_summarize(company_to_search)
                else:
                    response = "Okay, which company would you like me to search for instead?"
                st.session_state.suggestion = None

            # --- New logic to handle multiple companies ---
            elif ',' in prompt:
                companies = [name.strip() for name in prompt.split(',') if name.strip()]
                all_responses = []
                for company in companies:
                    news_summary = get_news_and_summarize(company)
                    all_responses.append(news_summary)
                # Join all individual summaries with a horizontal rule
                response = "\n\n<hr>\n\n".join(all_responses)

            else:
                # --- Existing logic for a single company ---
                exact_match = next((c for c in COMMON_COMPANIES if c.lower() == prompt.lower()), None)
                if exact_match:
                    response = get_news_and_summarize(exact_match)
                else:
                    suggested_match = find_closest_match(prompt, COMMON_COMPANIES)
                    if suggested_match:
                        response = f"Did you mean **{suggested_match}**? Please say yes or no."
                        st.session_state.suggestion = suggested_match
                    else:
                        response = "I couldn't find a known company by that name. Please try another."

        # --- Logic for when Company News Mode is OFF (General Chat) ---
        else:
            if is_greeting(prompt):
                response = "Hello! How can I help you today?"
            else:
                response = "I can only search for company news. To do that, please select the **Company News** mode above."

        end_time = time.time()
        duration = end_time - start_time

        if 'Here are the top news summaries' in response or '<hr>' in response:
            response += f"\n\n*(Time taken: {duration:.2f} seconds)*"

        st.markdown(response, unsafe_allow_html=True)

    if response:
        st.session_state.messages.append({"role": "assistant", "content": response})


Overwriting app.py


In [18]:
from pyngrok import ngrok
import os

# Kill any previous ngrok tunnels
ngrok.kill()

# Paste your ngrok authtoken here
NGROK_AUTH_TOKEN = "30rAr8GA0nBAOZg5dehlkDdM92E_2kD5eom5yXAT51HP1igci"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Run the Streamlit app in the background
os.system("streamlit run app.py --server.port 8501 &")

# Create a public URL to access the app
public_url = ngrok.connect(8501)
print(f"🚀 Click here to open your chatbot: {public_url}")

🚀 Click here to open your chatbot: NgrokTunnel: "https://10cf15a8e612.ngrok-free.app" -> "http://localhost:8501"
