Creating Fake Database from different departments

In [7]:
import sqlite3
from datetime import datetime, timedelta
import random

# Define sample data for each department
departments = {
    "Finance": [
        ("Financial Report Q1", "This is the content of the Finance Q1 report.", "Executive"),
        ("Investment Analysis", "Detailed investment strategies and outcomes.", "Manager"),
        # Add more as needed
    ],
    "HR": [
        ("Policy Update", "New HR policies on leave and remote work.", "Employee"),
        ("Employee Satisfaction Report", "Analysis of employee satisfaction.", "Manager"),
        # Add more as needed
    ],
    "Technical": [
        ("System Architecture", "Technical specifications for system architecture.", "Engineer"),
        ("Tech Roadmap", "Roadmap for upcoming tech projects and improvements.", "Manager"),
        # Add more as needed
    ]
}

# Function to create and populate each department database
def create_department_db(department, data):
    db_name = f"{department.lower()}_db.sqlite"
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    
    # Create the documents table with metadata fields
    c.execute('''
        CREATE TABLE IF NOT EXISTS documents (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            title TEXT,
            content TEXT,
            access_level TEXT,
            created_date TEXT
        )
    ''')
    
    # Populate the table with sample data
    for title, content, access_level in data:
        created_date = (datetime.now() - timedelta(days=random.randint(0, 365))).strftime("%Y-%m-%d")
        c.execute("INSERT INTO documents (title, content, access_level, created_date) VALUES (?, ?, ?, ?)", 
                  (title, content, access_level, created_date))
    
    conn.commit()
    print(f"Database for {department} created with sample data.")
    conn.close()

# Create each department's database
for department, data in departments.items():
    create_department_db(department, data)


Database for Finance created with sample data.
Database for HR created with sample data.
Database for Technical created with sample data.


Check the random database created

In [8]:
def check_database(department):
    db_name = f"{department.lower()}_db.sqlite"
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    
    # Fetch all records from the documents table
    c.execute("SELECT * FROM documents")
    records = c.fetchall()
    
    # Print the results
    print(f"\n--- {department} Database ---")
    for record in records:
        print(record)
    
    conn.close()

In [9]:
for dept in departments:
    check_database(dept)


--- Finance Database ---
(1, 'Financial Report Q1', 'This is the content of the Finance Q1 report.', 'Executive', '2023-12-03')
(2, 'Investment Analysis', 'Detailed investment strategies and outcomes.', 'Manager', '2024-02-17')
(3, 'Financial Report Q1', 'This is the content of the Finance Q1 report.', 'Executive', '2024-09-05')
(4, 'Investment Analysis', 'Detailed investment strategies and outcomes.', 'Manager', '2024-06-22')
(5, 'Financial Report Q1', 'This is the content of the Finance Q1 report.', 'Executive', '2023-11-28')
(6, 'Investment Analysis', 'Detailed investment strategies and outcomes.', 'Manager', '2024-07-17')
(7, 'Financial Report Q1', 'This is the content of the Finance Q1 report.', 'Executive', '2024-10-16')
(8, 'Investment Analysis', 'Detailed investment strategies and outcomes.', 'Manager', '2024-02-09')
(9, 'Financial Report Q1', 'This is the content of the Finance Q1 report.', 'Executive', '2023-11-11')
(10, 'Investment Analysis', 'Detailed investment strategies

In [10]:
departments

{'Finance': [('Financial Report Q1',
   'This is the content of the Finance Q1 report.',
   'Executive'),
  ('Investment Analysis',
   'Detailed investment strategies and outcomes.',
   'Manager')],
 'HR': [('Policy Update',
   'New HR policies on leave and remote work.',
   'Employee'),
  ('Employee Satisfaction Report',
   'Analysis of employee satisfaction.',
   'Manager')],
 'Technical': [('System Architecture',
   'Technical specifications for system architecture.',
   'Engineer'),
  ('Tech Roadmap',
   'Roadmap for upcoming tech projects and improvements.',
   'Manager')]}

In [11]:
import sqlite3
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Initialize the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
embedding_dim = model.get_sentence_embedding_dimension()

# Initialize the Faiss index
index = faiss.IndexFlatL2(embedding_dim)

# Dictionary to map document IDs to metadata for retrieval
doc_metadata = {}

# Function to embed and add documents to the Faiss index
def embed_documents_from_db(department):
    db_name = f"{department.lower()}_db.sqlite"
    conn = sqlite3.connect(db_name)
    c = conn.cursor()
    
    # Fetch all documents from the department's database
    c.execute("SELECT id, title, content, access_level FROM documents")
    documents = c.fetchall()
    
    for doc_id, title, content, access_level in documents:
        # Generate an embedding for the content
        embedding = model.encode(content)
        
        # Add embedding to the Faiss index
        index.add(np.array([embedding], dtype=np.float32))
        
        # Store metadata in a dictionary for reference during retrieval
        doc_metadata[len(doc_metadata)] = {
            "doc_id": doc_id,
            "title": title,
            "department": department,
            "access_level": access_level,
            "content": content
        }
    
    conn.close()
    print(f"Documents from {department} embedded and added to Faiss index.")

# List of departments to process
departments = ["Finance", "HR", "Technical"]

# Process each department and embed documents
for dept in departments:
    embed_documents_from_db(dept)

print("All documents embedded and stored in Faiss index.")


ModuleNotFoundError: No module named 'sentence_transformers'