In [1]:
!pip install openai

Defaulting to user installation because normal site-packages is not writeable
Collecting openai
  Downloading openai-1.98.0-py3-none-any.whl.metadata (29 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.10.0-cp311-cp311-win_amd64.whl.metadata (5.3 kB)
Downloading openai-1.98.0-py3-none-any.whl (767 kB)
   ---------------------------------------- 0.0/767.7 kB ? eta -:--:--
   ---------------------------------------- 767.7/767.7 kB 4.0 MB/s eta 0:00:00
Downloading jiter-0.10.0-cp311-cp311-win_amd64.whl (209 kB)
Installing collected packages: jiter, openai

   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------- ------------------- 1/2 [openai]
   -------------------


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
!pip install gradio

Defaulting to user installation because normal site-packages is not writeable
Collecting gradio
  Downloading gradio-5.39.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting brotli>=1.1.0 (from gradio)
  Downloading Brotli-1.1.0-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.1-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.11.0 (from gradio)
  Downloading gradio_client-1.11.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting huggingface-hub<1.0,>=0.33.5 (from gradio)
  Downloading huggingface_hub-0.34.3-py3-none-any.whl.metadata (14 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
!pip install ipywidgets

Defaulting to user installation because normal site-packages is not writeable
Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl.metadata (20 kB)
Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   -------------- ------------------------- 0.8/2.2 MB 4.2 MB/s eta 0:00:01
   --------------------------------- ------ 1.8/2.2 MB 4.4 MB/s eta 0:00:01
   ---------------------------------------- 2.2/2.2 MB 4.1 MB/s eta 0:00:00
Installing collected packages: widgetsnbextension, jupyterlab_widgets, ipywidgets

   -------


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import openai
import json
import sqlite3
import numpy as np
from datetime import datetime
from typing import List, Dict, Optional, Tuple
import re
import logging
from dataclasses import dataclass
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

In [9]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class KnowledgeItem:
    id: int
    content: str
    summary: str
    keywords: List[str]
    category: str
    embedding: List[float]
    created_at: str
    relevance_score: float = 0.0

class KnowledgeInformationSystem:
    def __init__(self, api_key: str, db_path: str = "knowledge_base.db"):
        """Initialize the Knowledge Information System with OpenAI API"""
        self.client = openai.OpenAI(api_key=api_key)
        self.db_path = db_path
        self._initialize_database()
        
    def _initialize_database(self):
        """Create database tables for knowledge storage"""
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS knowledge_items (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                content TEXT NOT NULL,
                summary TEXT,
                keywords TEXT,
                category TEXT,
                embedding TEXT,
                created_at TEXT,
                relevance_score REAL DEFAULT 0.0
            )
        ''')
        
        conn.commit()
        conn.close()
        
    def generate_embedding(self, text: str) -> List[float]:
        """Generate embeddings using OpenAI API"""
        try:
            response = self.client.embeddings.create(
                input=text,
                model="text-embedding-3-small"
            )
            return response.data[0].embedding
        except Exception as e:
            logger.error(f"Error generating embedding: {e}")
            return []
            
    def extract_keywords(self, text: str) -> List[str]:
        """Extract keywords from text using OpenAI"""
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "Extract 5-10 important keywords from the following text. Return only the keywords separated by commas."},
                    {"role": "user", "content": text}
                ],
                max_tokens=100,
                temperature=0.3
            )
            keywords = [kw.strip() for kw in response.choices[0].message.content.split(',')]
            return keywords
        except Exception as e:
            logger.error(f"Error extracting keywords: {e}")
            return []
            
    def generate_summary(self, text: str) -> str:
        """Generate summary using OpenAI"""
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "Provide a concise summary of the following text in 2-3 sentences."},
                    {"role": "user", "content": text}
                ],
                max_tokens=150,
                temperature=0.3
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            logger.error(f"Error generating summary: {e}")
            return text[:200] + "..."
            
    def categorize_content(self, text: str) -> str:
        """Categorize content using OpenAI"""
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "Categorize the following text into one of these categories: Technology, Business, Science, Education, Health, Finance, Legal, General. Return only the category name."},
                    {"role": "user", "content": text}
                ],
                max_tokens=10,
                temperature=0.1
                )
            return response.choices[0].message.content.strip()
        except Exception as e:
            logger.error(f"Error categorizing content: {e}")
            return "General"
            
    def add_knowledge(self, content: str) -> Dict[str, str]:
        """Add new knowledge to the system"""
        try:
            # Generate metadata
            summary = self.generate_summary(content)
            keywords = self.extract_keywords(content)
            category = self.categorize_content(content)
            embedding = self.generate_embedding(content)
            
            # Store in database
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            cursor.execute('''
                INSERT INTO knowledge_items (content, summary, keywords, category, embedding, created_at)
                VALUES (?, ?, ?, ?, ?, ?)
            ''', (
                content,
                summary,
                json.dumps(keywords),
                category,
                json.dumps(embedding),
                datetime.now().isoformat()
            ))
            
            conn.commit()
            item_id = cursor.lastrowid
            conn.close()
            
            return {
                "status": "success",
                "message": f"Knowledge item added successfully with ID: {item_id}",
                "id": str(item_id),
                "summary": summary,
                "category": category,
                "keywords": ", ".join(keywords)
            }
            
        except Exception as e:
            logger.error(f"Error adding knowledge: {e}")
            return {"status": "error", "message": f"Failed to add knowledge: {str(e)}"}
            
    def search_knowledge(self, query: str, top_k: int = 5) -> List[KnowledgeItem]:
        """Search knowledge using semantic similarity"""
        try:
            # Generate query embedding
            query_embedding = self.generate_embedding(query)
            if not query_embedding:
                return []
                
            # Retrieve all items from database
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            cursor.execute('SELECT * FROM knowledge_items')
            rows = cursor.fetchall()
            conn.close()
            
            knowledge_items = []
            for row in rows:
                item_embedding = json.loads(row[5]) if row[5] else []
                if item_embedding:
                    # Calculate similarity
                    similarity = cosine_similarity(
                        [query_embedding], [item_embedding]
                    )[0][0]
                    
                    item = KnowledgeItem(
                        id=row[0],
                        content=row[1],
                        summary=row[2],
                        keywords=json.loads(row[3]) if row[3] else [],
                        category=row[4],
                        embedding=item_embedding,
                        created_at=row[6],
                        relevance_score=float(similarity)
                    )
                    knowledge_items.append(item)
            
            # Sort by relevance and return top_k
            knowledge_items.sort(key=lambda x: x.relevance_score, reverse=True)
            return knowledge_items[:top_k]
            
        except Exception as e:
            logger.error(f"Error searching knowledge: {e}")
            return []
            
    def get_knowledge_stats(self) -> Dict[str, int]:
        """Get statistics about the knowledge base"""
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            
            cursor.execute('SELECT COUNT(*) FROM knowledge_items')
            total_items = cursor.fetchone()[0]
            
            cursor.execute('SELECT category, COUNT(*) FROM knowledge_items GROUP BY category')
            category_counts = dict(cursor.fetchall())
            
            conn.close()
            
            return {
                "total_items": total_items,
                "categories": category_counts
            }
        except Exception as e:
            logger.error(f"Error getting stats: {e}")
            return {"total_items": 0, "categories": {}}
            
    def generate_answer(self, query: str, context_items: List[KnowledgeItem]) -> str:
        """Generate answer using retrieved knowledge"""
        if not context_items:
            return "No relevant information found in the knowledge base."
            
        context = "\n\n".join([
            f"Content: {item.content}\nSummary: {item.summary}"
            for item in context_items[:3]  # Use top 3 most relevant items
        ])
        
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {
                        "role": "system", 
                        "content": "You are a helpful AI assistant. Answer the user's question based on the provided context from the knowledge base. If the context doesn't contain relevant information, say so."
                    },
                    {
                        "role": "user", 
                        "content": f"Context from knowledge base:\n{context}\n\nQuestion: {query}\n\nPlease provide a comprehensive answer based on the context."
                    }
                ],
                max_tokens=500,
                temperature=0.7
            )
            return response.choices[0].message.content
        except Exception as e:
            logger.error(f"Error generating answer: {e}")
            return f"Error generating answer: {str(e)}"

In [10]:
# Global instance
knowledge_system = None

def initialize_system(api_key: str) -> str:
    """Initialize the knowledge system with API key"""
    global knowledge_system
    try:
        knowledge_system = KnowledgeInformationSystem(api_key)
        return "✅ Knowledge system initialized successfully!"
    except Exception as e:
        return f"❌ Error initializing system: {str(e)}"

def add_knowledge_item(content: str) -> Tuple[str, str, str, str]:
    """Add knowledge item through Gradio interface"""
    if not knowledge_system:
        return "❌ Please initialize the system first", "", "", ""
        
    if not content.strip():
        return "❌ Please enter some content", "", "", ""
        
    result = knowledge_system.add_knowledge(content)
    
    if result["status"] == "success":
        return (
            result["message"],
            result["summary"],
            result["category"],
            result["keywords"]
        )
    else:
        return result["message"], "", "", ""

def search_and_answer(query: str) -> Tuple[str, str]:
    """Search knowledge base and generate answer"""
    if not knowledge_system:
        return "❌ Please initialize the system first", ""
        
    if not query.strip():
        return "❌ Please enter a search query", ""
        
    # Search for relevant items
    results = knowledge_system.search_knowledge(query, top_k=5)
    
    if not results:
        return "No relevant information found.", ""
        
    # Generate answer
    answer = knowledge_system.generate_answer(query, results)
    
    # Format search results
    search_results = ""
    for i, item in enumerate(results[:3], 1):
        search_results += f"Result {i} (Score: {item.relevance_score:.3f})\n"
        search_results += f"Category: {item.category}\n"
        search_results += f"Summary: {item.summary}\n"
        search_results += f"Keywords: {', '.join(item.keywords)}\n\n"
    
    return answer, search_results

def get_system_statistics() -> str:
    """Get knowledge base statistics"""
    if not knowledge_system:
        return "❌ Please initialize the system first"
        
    stats = knowledge_system.get_knowledge_stats()
    
    stats_text = f"📊 Knowledge Base Statistics\n\n"
    stats_text += f"Total Items: {stats['total_items']}\n\n"
    
    if stats['categories']:
        stats_text += "Categories:\n"
        for category, count in stats['categories'].items():
            stats_text += f"- {category}: {count}\n"
    
    return stats_text

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [15]:
def create_gradio_interface():
    """Create and configure Gradio interface"""
    
    with gr.Blocks(title="Knowledge Information System", theme=gr.themes.Soft()) as demo:
        gr.Markdown("#Knowledge Information System")
        gr.Markdown("An intelligent knowledge management system powered by OpenAI's GPT and embeddings")
        
        with gr.Tab("Setup"):
            gr.Markdown("## Initialize System")
            api_key_input = gr.Textbox(
                label="OpenAI API Key", 
                placeholder="Enter your OpenAI API key", 
                type="password"
            )
            init_button = gr.Button("Initialize System", variant="primary")
            init_status = gr.Textbox(label="Status", interactive=False)
            
            init_button.click(
                initialize_system,
                inputs=[api_key_input],
                outputs=[init_status]
            )
        
        with gr.Tab("Add Knowledge"):
            gr.Markdown("## Add New Knowledge")
            content_input = gr.Textbox(
                label="Content",
                placeholder="Enter the knowledge content you want to add...",
                lines=6
            )
            add_button = gr.Button("Add Knowledge", variant="primary")
            
            with gr.Row():
                with gr.Column():
                    add_status = gr.Textbox(label="Status", interactive=False)
                    generated_summary = gr.Textbox(label="Generated Summary", interactive=False)
                with gr.Column():
                    generated_category = gr.Textbox(label="Category", interactive=False)
                    generated_keywords = gr.Textbox(label="Keywords", interactive=False)
            
            add_button.click(
                add_knowledge_item,
                inputs=[content_input],
                outputs=[add_status, generated_summary, generated_category, generated_keywords]
            )
        
        with gr.Tab("Search & Query"):
            gr.Markdown("## Search Knowledge Base")
            query_input = gr.Textbox(
                label="Search Query",
                placeholder="Ask a question or search for information...",
                lines=2
            )
            search_button = gr.Button("Search & Answer", variant="primary")
            
            with gr.Row():
                with gr.Column():
                    answer_output = gr.Textbox(
                        label="AI-Generated Answer",
                        lines=8,
                        interactive=False
                    )
                with gr.Column():
                    search_results = gr.Textbox(
                        label="Relevant Knowledge Items",
                        lines=8,
                        interactive=False
                    )
            
            search_button.click(
                search_and_answer,
                inputs=[query_input],
                outputs=[answer_output, search_results]
            )
        
        with gr.Tab("Statistics"):
            gr.Markdown("## Knowledge Base Statistics")
            stats_button = gr.Button("Refresh Statistics", variant="secondary")
            stats_output = gr.Textbox(
                label="Statistics",
                lines=10,
                interactive=False
            )
            
            stats_button.click(
                get_system_statistics,
                outputs=[stats_output]
            )
    
    return demo

# Create and launch the interface
demo = create_gradio_interface()
demo.launch(
    share=True,  # Creates a public link
    server_name="0.0.0.0",
    server_port=7862,
    show_error=True
)

* Running on local URL:  http://0.0.0.0:7862


INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:7862/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://localhost:7862/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"


* Running on public URL: https://e34fe205a043cce706.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


INFO:httpx:HTTP Request: HEAD https://e34fe205a043cce706.gradio.live "HTTP/1.1 200 OK"




INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
