<a href="https://colab.research.google.com/github/navneetkrc/Deep_learning_experiments/blob/master/NER_Streamlit_apps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install transformers torch datasets evaluate pyngrok streamlit
!pip install -U git+https://github.com/facebookresearch/GLINER.git
!pip install -q streamlit-pydantic
!pip install -q streamlit-tags

In [None]:
from google.colab import userdata
userdata.get('NGROK_AUTH_TOKEN')

In [None]:
# Set up ngrok for Streamlit
!pip install pyngrok
from pyngrok import ngrok

# Check if you have an authtoken set, and install one if needed
# Get your authtoken from https://dashboard.ngrok.com/get-started/your-authtoken
import os
NGROK_AUTH_TOKEN = userdata.get('NGROK_AUTH_TOKEN')# Add your token here if you have one

if NGROK_AUTH_TOKEN:
    !ngrok authtoken $NGROK_AUTH_TOKEN
    print("Ngrok authentication successful!")
else:
    print("No Ngrok auth token provided. If tunneling fails, please get a token from https://dashboard.ngrok.com/get-started/your-authtoken")

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Ngrok authentication successful!


In [None]:
# Create Streamlit app file
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import torch
import json
import re
from gliner import GLiNER
import base64
from PIL import Image
import io
import html
from streamlit_tags import st_tags

# Set page configuration
st.set_page_config(
    page_title="Product Query Entity Extractor",
    page_icon="🔍",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Add custom CSS
st.markdown("""
<style>
    .main {
        padding: 1.5rem;
    }
    .entity-tag {
        display: inline-block;
        padding: 0.2rem 0.5rem;
        margin: 0.2rem;
        border-radius: 0.5rem;
        font-size: 0.8rem;
    }
    .header-container {
        display: flex;
        align-items: center;
        margin-bottom: 1rem;
    }
    .header-text {
        margin-left: 1rem;
    }
    .sidebar .sidebar-content {
        background-color: #f5f5f5;
    }
    .highlight {
        border-radius: 0.3rem;
        padding: 0.1rem 0.3rem;
        position: relative;
    }
    .entity-label {
        position: absolute;
        top: -0.7rem;
        left: 0;
        font-size: 0.6rem;
        background-color: #555;
        color: white;
        padding: 0 0.3rem;
        border-radius: 0.2rem;
    }
    .example-container {
        background-color: #f9f9f9;
        padding: 1rem;
        border-radius: 0.5rem;
        margin-bottom: 1rem;
    }
    .result-container {
        background-color: #f0f7ff;
        padding: 1rem;
        border-radius: 0.5rem;
        margin-top: 1rem;
    }
</style>
""", unsafe_allow_html=True)

# Header
st.markdown("""
<div class="header-container">
    <h1>🔍 Product Query Entity Extractor</h1>
</div>
<p>Extract key entities from natural language product search queries using GLINER (Generalized Label-Intensive Named Entity Recognition).</p>
""", unsafe_allow_html=True)

# Initialize session state variables
if 'entity_types' not in st.session_state:
    st.session_state.entity_types = [
        "Brand",             # e.g., Samsung, Apple
        "ProductType",       # e.g., smartphone, TV, earbuds
        "ProductLine",       # e.g., Galaxy
        "StorageCapacity",   # e.g., 100GB
        "MemorySize",        # e.g., 8GB RAM
        "DisplaySize",       # e.g., 43 inches
        "DisplayResolution", # e.g., 4K
        "CameraFeature",     # e.g., quad camera
        "AudioFeature",      # e.g., noise cancellation
        "SmartFeature",      # e.g., smart TV capabilities
        "PriceConstraint",   # e.g., under $400
        "Qualifier",         # e.g., latest, best, wireless
    ]

if 'entity_colors' not in st.session_state:
    import random
    # Generate color mapping for entities
    st.session_state.entity_colors = {}
    for entity_type in st.session_state.entity_types:
        hue = random.random()
        # Generate pastel colors that are readable
        r = int(180 + 75 * random.random())
        g = int(180 + 75 * random.random())
        b = int(180 + 75 * random.random())
        st.session_state.entity_colors[entity_type] = f"rgb({r},{g},{b})"

if 'examples' not in st.session_state:
    # Define a few examples for each entity type for few-shot learning
    st.session_state.examples = {
        "Brand": ["Samsung", "Apple", "Sony", "LG"],
        "ProductType": ["smartphone", "TV", "earbuds", "tablet", "laptop", "monitor"],
        "ProductLine": ["Galaxy", "iPhone", "Surface", "ThinkPad"],
        "StorageCapacity": ["100GB", "256GB", "1TB", "512GB", "64GB"],
        "MemorySize": ["8GB RAM", "16GB RAM", "4GB Ram", "12GB memory"],
        "DisplaySize": ["43 inches", "55 inch", "13.3 inches", "6.7 inch screen"],
        "DisplayResolution": ["4K", "1080p", "HD", "UHD", "Retina", "OLED"],
        "CameraFeature": ["quad camera", "triple lens", "50MP camera", "ultrawide"],
        "AudioFeature": ["noise cancellation", "surround sound", "Dolby Atmos"],
        "SmartFeature": ["smart TV", "voice assistant", "AI capabilities"],
        "PriceConstraint": ["under $400", "less than $1000", "$500-$800", "budget"],
        "Qualifier": ["latest", "best", "wireless", "premium", "budget", "high-end"]
    }

if 'custom_examples' not in st.session_state:
    st.session_state.custom_examples = [
        {
            "text": "Show me latest samsung galaxy smartphones with 100GB of storage and quad camera.",
            "entities": [
                [8, 14, "Qualifier"],
                [15, 22, "Brand"],
                [23, 29, "ProductLine"],
                [30, 41, "ProductType"],
                [47, 58, "StorageCapacity"],
                [63, 74, "CameraFeature"]
            ]
        },
        {
            "text": "Find Samsung 4k TV that is 43 inches and has smart TV capabilities",
            "entities": [
                [5, 12, "Brand"],
                [13, 15, "DisplayResolution"],
                [16, 18, "ProductType"],
                [27, 36, "DisplaySize"],
                [45, 53, "SmartFeature"]
            ]
        },
        {
            "text": "what are best samsung tablets under $400",
            "entities": [
                [9, 13, "Qualifier"],
                [14, 21, "Brand"],
                [22, 29, "ProductType"],
                [30, 40, "PriceConstraint"]
            ]
        }
    ]

if 'gliner' not in st.session_state:
    st.session_state.gliner = None

if 'demonstration_examples' not in st.session_state:
    st.session_state.demonstration_examples = []

# -------------------- Sidebar --------------------
with st.sidebar:
    st.header("Configuration")

    # Model Selection
    model_option = st.selectbox(
        "Base Model",
        ["roberta-base", "bert-base-uncased", "distilbert-base-uncased"],
        index=0
    )

    # Entity Management
    st.subheader("Entity Types")

    # Allow adding custom entity types
    new_entity = st.text_input("Add new entity type")
    if st.button("Add Entity Type") and new_entity and new_entity not in st.session_state.entity_types:
        st.session_state.entity_types.append(new_entity)
        # Generate a color for the new entity
        r = int(180 + 75 * random.random())
        g = int(180 + 75 * random.random())
        b = int(180 + 75 * random.random())
        st.session_state.entity_colors[new_entity] = f"rgb({r},{g},{b})"
        st.success(f"Added entity type: {new_entity}")

    # Show entity types with option to remove
    for i, entity in enumerate(st.session_state.entity_types):
        col1, col2 = st.columns([4, 1])
        color = st.session_state.entity_colors[entity]
        col1.markdown(f"<div style='background-color: {color}; padding: 0.5rem; border-radius: 0.3rem;'>{entity}</div>", unsafe_allow_html=True)
        if col2.button("×", key=f"remove_{i}"):
            st.session_state.entity_types.remove(entity)
            st.experimental_rerun()

    # Initialize model button
    if st.button("Initialize/Reset Model"):
        with st.spinner("Initializing GLINER model..."):
            try:
                st.session_state.gliner = GLiNER(model_option)

                # Create examples in the format GLiNER expects
                demonstration_examples = []
                for entity_type, example_list in st.session_state.examples.items():
                    for example in example_list:
                        dummy_text = f"I want a product with {example}."
                        start_idx = dummy_text.find(example)
                        end_idx = start_idx + len(example)
                        demonstration_examples.append({
                            "text": dummy_text,
                            "entities": [[start_idx, end_idx, entity_type]]
                        })

                # Add custom examples
                st.session_state.demonstration_examples = demonstration_examples + st.session_state.custom_examples
                st.success("Model initialized successfully!")
            except Exception as e:
                st.error(f"Error initializing model: {str(e)}")

    # Information about the app
    st.subheader("About")
    st.info("""
    This app uses GLINER (Generalized Label-Intensive Named Entity Recognition) to extract product-related entities from natural language queries.

    GLINER is a few-shot learning approach that allows for entity extraction with minimal examples.
    """)

# -------------------- Main content --------------------
tabs = st.tabs(["Query Analysis", "Batch Processing", "Example Management", "Export & Import"])

# -------------------- Query Analysis Tab --------------------
with tabs[0]:
    st.subheader("Analyze Product Queries")

    # Examples to help users
    with st.expander("Example Queries", expanded=False):
        st.markdown("""
        <div class="example-container">
            <p><strong>Example 1:</strong> Show me latest samsung galaxy smartphones with atleast 100GB of storage and a quad camera setup.</p>
            <p><strong>Example 2:</strong> Find Samsung 4k TV that is atleast 43 inches and has smart TV capabilities</p>
            <p><strong>Example 3:</strong> Show me wireless earbuds with active noise cancellation</p>
            <p><strong>Example 4:</strong> what are best samsung tablets under $400</p>
        </div>
        """, unsafe_allow_html=True)

    # Input query
    query = st.text_area("Enter your product query", height=100)

    # Process button
    process_button = st.button("Extract Entities")

    if process_button and query:
        if st.session_state.gliner is None:
            st.warning("Please initialize the model first using the button in the sidebar.")
        else:
            with st.spinner("Extracting entities..."):
                try:
                    # Extract entities using GLiNER
                    predictions = st.session_state.gliner.predict(
                        text=query,
                        entity_types=st.session_state.entity_types,
                        demonstration_examples=st.session_state.demonstration_examples,
                        k=5  # Number of examples to use
                    )

                    # Format results
                    formatted_results = {}
                    for entity in predictions:
                        entity_type = entity[2]
                        entity_text = query[entity[0]:entity[1]]
                        if entity_type not in formatted_results:
                            formatted_results[entity_type] = []
                        formatted_results[entity_type].append(entity_text)

                    # Combine multiple entities of the same type
                    for entity_type in formatted_results:
                        formatted_results[entity_type] = list(set(formatted_results[entity_type]))

                    # Show results
                    st.markdown("<div class='result-container'>", unsafe_allow_html=True)
                    st.subheader("Extracted Entities")

                    # Visualize entities in the query
                    st.markdown("### Query with Highlighted Entities")
                    highlighted_query = query

                    # Track all entity spans to handle overlaps
                    entity_spans = []
                    for entity_type, values in formatted_results.items():
                        for value in values:
                            # Find all occurrences
                            for match in re.finditer(re.escape(value), query, re.IGNORECASE):
                                entity_spans.append((match.start(), match.end(), entity_type, value))

                    # Sort by start position
                    entity_spans.sort(key=lambda x: x[0])

                    # Create highlighted text
                    html_text = ""
                    current_pos = 0
                    for start, end, entity_type, value in entity_spans:
                        if start < current_pos:
                            # Skip overlapping entities (taking only the first one)
                            continue

                        # Add text before the entity
                        html_text += html.escape(query[current_pos:start])

                        # Add the highlighted entity
                        bg_color = st.session_state.entity_colors[entity_type]
                        html_text += f"<span class='highlight' style='background-color: {bg_color};'>"
                        html_text += f"<span class='entity-label'>{entity_type}</span>"
                        html_text += html.escape(query[start:end])
                        html_text += "</span>"

                        current_pos = end

                    # Add any remaining text
                    html_text += html.escape(query[current_pos:])

                    st.markdown(f"<p style='font-size: 1.1rem; line-height: 1.6; padding: 1rem; background-color: #f9f9f9; border-radius: 0.5rem;'>{html_text}</p>", unsafe_allow_html=True)

                    # Show entity list
                    st.markdown("### Entity Summary")
                    for entity_type, values in formatted_results.items():
                        st.markdown(f"**{entity_type}**:")
                        bg_color = st.session_state.entity_colors[entity_type]
                        tags_html = " ".join([f"<span class='entity-tag' style='background-color: {bg_color};'>{value}</span>" for value in values])
                        st.markdown(f"<div>{tags_html}</div>", unsafe_allow_html=True)

                    # Add option to save this as a custom example
                    st.markdown("### Save as Custom Example")

                    if st.button("Save This Query as Custom Example"):
                        # Format entities for GLINER
                        custom_entities = []
                        for start, end, entity_type, value in entity_spans:
                            custom_entities.append([start, end, entity_type])

                        # Add to custom examples
                        st.session_state.custom_examples.append({
                            "text": query,
                            "entities": custom_entities
                        })

                        # Update demonstration examples
                        st.session_state.demonstration_examples.append({
                            "text": query,
                            "entities": custom_entities
                        })

                        st.success("Added as custom example! The model will use this for future predictions.")

                    st.markdown("</div>", unsafe_allow_html=True)

                except Exception as e:
                    st.error(f"Error processing query: {str(e)}")

# -------------------- Batch Processing Tab --------------------
with tabs[1]:
    st.subheader("Batch Process Multiple Queries")

    # Option to enter multiple queries
    batch_queries = st.text_area("Enter multiple queries (one per line)", height=150)

    # Process batch button
    process_batch = st.button("Process Batch")

    if process_batch and batch_queries:
        if st.session_state.gliner is None:
            st.warning("Please initialize the model first using the button in the sidebar.")
        else:
            # Split by lines
            queries = [q.strip() for q in batch_queries.split('\n') if q.strip()]

            if not queries:
                st.warning("Please enter at least one valid query.")
            else:
                results = []
                progress_bar = st.progress(0)

                for i, query in enumerate(queries):
                    with st.spinner(f"Processing query {i+1}/{len(queries)}..."):
                        try:
                            # Extract entities
                            predictions = st.session_state.gliner.predict(
                                text=query,
                                entity_types=st.session_state.entity_types,
                                demonstration_examples=st.session_state.demonstration_examples,
                                k=5
                            )

                            # Format results
                            formatted_results = {}
                            for entity in predictions:
                                entity_type = entity[2]
                                entity_text = query[entity[0]:entity[1]]
                                if entity_type not in formatted_results:
                                    formatted_results[entity_type] = []
                                formatted_results[entity_type].append(entity_text)

                            # Combine multiple entities of the same type
                            for entity_type in formatted_results:
                                formatted_results[entity_type] = list(set(formatted_results[entity_type]))

                            results.append({
                                "query": query,
                                "entities": formatted_results
                            })

                        except Exception as e:
                            results.append({
                                "query": query,
                                "error": str(e)
                            })

                        # Update progress
                        progress_bar.progress((i + 1) / len(queries))

                # Display results
                st.markdown("<div class='result-container'>", unsafe_allow_html=True)
                st.subheader("Batch Results")

                for i, result in enumerate(results):
                    st.markdown(f"### Query {i+1}: {result['query']}")

                    if "error" in result:
                        st.error(f"Error: {result['error']}")
                    else:
                        for entity_type, values in result["entities"].items():
                            st.markdown(f"**{entity_type}**:")
                            bg_color = st.session_state.entity_colors[entity_type]
                            tags_html = " ".join([f"<span class='entity-tag' style='background-color: {bg_color};'>{value}</span>" for value in values])
                            st.markdown(f"<div>{tags_html}</div>", unsafe_allow_html=True)

                    st.markdown("---")

                # Option to download results as JSON
                results_json = json.dumps(results, indent=2)
                b64 = base64.b64encode(results_json.encode()).decode()
                href = f'<a href="data:application/json;base64,{b64}" download="batch_results.json">Download Results as JSON</a>'
                st.markdown(href, unsafe_allow_html=True)

                st.markdown("</div>", unsafe_allow_html=True)


# -------------------- Example Management Tab --------------------
with tabs[2]:
    st.subheader("Manage Entity Examples")

    # Allow user to add examples for each entity type
    selected_entity = st.selectbox("Select Entity Type", st.session_state.entity_types)

    # Show current examples
    st.write(f"Current examples for **{selected_entity}**:")

    if selected_entity in st.session_state.examples:
        examples = st.session_state.examples[selected_entity]
        example_tags = st_tags(
            label=f"Edit {selected_entity} Examples",
            text="Press enter to add",
            value=examples,
            key=f"tags_{selected_entity}"
        )
        st.session_state.examples[selected_entity] = example_tags
    else:
        example_tags = st_tags(
            label=f"Add {selected_entity} Examples",
            text="Press enter to add",
            value=[],
            key=f"tags_{selected_entity}"
        )
        st.session_state.examples[selected_entity] = example_tags

# Custom annotated examples
    st.subheader("Custom Annotated Examples")

    if st.session_state.custom_examples:
        for i, example in enumerate(st.session_state.custom_examples):
            with st.expander(f"Example {i+1}: {example['text'][:50]}{'...' if len(example['text']) > 50 else ''}", expanded=False):
                st.write("Text:")
                st.code(example['text'])

                st.write("Entities:")
                for start, end, entity_type in example['entities']:
                    entity_text = example['text'][start:end]
                    st.markdown(f"- **{entity_type}**: '{entity_text}' (positions {start}-{end})")

                if st.button(f"Remove Example #{i+1}"):
                    st.session_state.custom_examples.pop(i)
                    # Rebuild demonstration examples
                    demonstration_examples = []
                    for entity_type, example_list in st.session_state.examples.items():
                        for ex in example_list:
                            dummy_text = f"I want a product with {ex}."
                            start_idx = dummy_text.find(ex)
                            end_idx = start_idx + len(ex)
                            demonstration_examples.append({
                                "text": dummy_text,
                                "entities": [[start_idx, end_idx, entity_type]]
                            })
                    st.session_state.demonstration_examples = demonstration_examples + st.session_state.custom_examples
                    st.experimental_rerun()
    else:
        st.info("No custom examples yet. You can add them by processing queries and saving them as examples.")

# -------------------- Export & Import Tab --------------------
with tabs[3]:
    st.subheader("Export & Import Configuration")

    col1, col2 = st.columns(2)

    with col1:
        st.markdown("### Export Configuration")
        if st.button("Export Model Configuration"):
            # Create configuration to export
            config = {
                "entity_types": st.session_state.entity_types,
                "entity_colors": st.session_state.entity_colors,
                "examples": st.session_state.examples,
                "custom_examples": st.session_state.custom_examples,
                "base_model": model_option
            }

# Convert to JSON
            config_json = json.dumps(config, indent=2)
            b64 = base64.b64encode(config_json.encode()).decode()

            # Create download link
            href = f'<a href="data:application/json;base64,{b64}" download="entity_extractor_config.json">Download Configuration</a>'
            st.markdown(href, unsafe_allow_html=True)

    with col2:
        st.markdown("### Import Configuration")
        uploaded_file = st.file_uploader("Upload Configuration File", type="json")

        if uploaded_file is not None:
            try:
                config = json.load(uploaded_file)

                # Validate# Validate configuration
                required_keys = ["entity_types", "entity_colors", "examples", "custom_examples"]
                if all(key in config for key in required_keys):
                    # Update session state
                    st.session_state.entity_types = config["entity_types"]
                    st.session_state.entity_colors = config["entity_colors"]
                    st.session_state.examples = config["examples"]
                    st.session_state.custom_examples = config["custom_examples"]

                    # Rebuild demonst# Rebuild demonstration examples
                    demonstration_examples = []
                    for entity_type, example_list in st.session_state.examples.items():
                        for example in example_list:
                            dummy_text = f"I want a product with {example}."
                            start_idx = dummy_text.find(example)
                            end_idx = start_idx + len(example)
                            demonstration_examples.append({
                                "text": dummy_text,
                                "entities": [[start_idx, end_idx, entity_type]]
                            })

                    st.session_state.demonstration_examples = demonstration_examples + st.session_state.custom_examples

                    st.success("Configuration imported successfully!")
                    st.info("Please re-initialize the model using the button in the sidebar.")
                else:
                    st.error("Invalid configuration file. Missing required keys.")
            except Exception as e:
                st.error(f"Error importing configuration: {str(e)}")

# Usage instructions
    st.subheader("Usage Instructions")
    with st.expander("How to use this app", expanded=False):
        st.markdown("""
        ### Getting Started
        1. First, initialize the model using the button in the sidebar.
        2. Enter your product query in the "Query Analysis" tab and click "Extract Entities".
        3. The app will highlight the entities found in your query.

        ### Batch Processing
        - Use the "Batch Processing" tab to analyze multiple queries at once.
        - Enter one query per line and click "Process Batch".

        ### Improving Accuracy
### Improving Accuracy
        - Add more examples for each entity type in the "Example Management" tab.
        - Save processed queries as custom examples to improve the model's accuracy.

        ### Saving Your Work
        - Export your configuration to reuse it later.
        - Import a previously saved configuration to continue your work.
        """)

    # Credits
    st.markdown("---")
    st.markdown("""
    <div style="text-align: center; color: #888;">
        Powered by GLINER (Generalized Label-Intensive Named Entity Recognition)
    </div>
    """, unsafe_allow_html=True)

Overwriting app.py


In [None]:
# Create a function to run Streamlit with ngrok
def run_streamlit_app():
    from pyngrok import ngrok
    import streamlit as st
    import os

    # Stop any existing tunnels
    ngrok.kill()

    # Set up a tunnel to the Streamlit port (8501)
    public_url = ngrok.connect(8501)
    print(f"Streamlit app is running at: {public_url}")

    # Run the Streamlit app
    !streamlit run app.py --server.port 8501 &

    # Display the public URL
    from IPython.display import HTML, display
    display(HTML(f"""
    <div style="background-color: #4CAF50; color: white; padding: 10px; border-radius: 5px; margin-bottom: 10px; font-family: Arial, sans-serif;">
        <h3 style="margin: 0;">🚀 Your Streamlit app is running!</h3>
        <p style="margin-top: 5px;">Access it at: <a href="{public_url}" target="_blank" style="color: white; text-decoration: underline;">{public_url}</a></p>
    </div>
    <p style="font-family: Arial, sans-serif; font-size: 14px;">
        <b>Note:</b> The URL will be active as long as this Colab notebook is running.
        Keep this tab open to maintain your app connection.
    </p>
    """))

# Create a function to stop the Streamlit app and ngrok tunnel
def stop_streamlit_app():
    from pyngrok import ngrok

    # Kill ngrok tunnels
    ngrok.kill()

    # Kill Streamlit process
    !pkill -f "streamlit run"

    print("Streamlit app and ngrok tunnel stopped successfully.")

In [None]:
# Run the Streamlit app
run_streamlit_app()

Streamlit app is running at: NgrokTunnel: "https://3d4a-34-145-99-102.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.145.99.102:8501[0m
[0m
2025-02-25 20:30:31.525 Uncaught app execution
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/streamlit/runtime/scriptrunner/exec_code.py", line 121, in exec_func_with_error_handling
    result = func()
             ^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/streamlit/runtime/scriptrunner/script_runner.py", line 591, in code_to_exec
    exec(code, module.__dict__)
  File "/content/app.py", line 7, in <module>
    from gliner import GLiNER
ModuleNotFoundError: No module named 'gliner'
2025-02-25 

In [None]:
# Stop the Streamlit app when you're done (run this cell to stop)
stop_streamlit_app()