<a href="https://colab.research.google.com/github/jyotidabass/Chatbot/blob/main/chatbot_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Connect to the SQLite database**

In [1]:
import sqlite3
import numpy as np
import click
from sklearn.metrics.pairwise import cosine_similarity

def connect_to_db():
    conn = sqlite3.connect('workers.db')
    return conn

def create_schema(conn):
    c = conn.cursor()

    c.execute('''CREATE TABLE IF NOT EXISTS Worker
                 (id INTEGER PRIMARY KEY,
                 name TEXT,
                 experience TEXT,
                 skills TEXT,
                 full_time INTEGER,
                 hourly_rate INTEGER,
                 company TEXT)''')

# **Helper functions**

In [2]:
def create_sample_data(conn):
    c = conn.cursor()

    c.execute("INSERT INTO Worker (name, experience, skills, full_time, hourly_rate, company) VALUES (?, ?, ?, ?, ?, ?)",
              ('John Doe', '5 years', 'Python,SQLAlchemy,Flask', 1, 20, 'Acme Corp'))
    c.execute("INSERT INTO Worker (name, experience, skills, full_time, hourly_rate, company) VALUES (?, ?, ?, ?, ?, ?)",
              ('Jane Smith', '3 years', 'Java,Spring,MySQL', 0, 18, 'ABC Widgets'))
    c.execute("INSERT INTO Worker (name, experience, skills, full_time, hourly_rate, company) VALUES (?, ?, ?, ?, ?, ?)",
              ('Bob Johnson', '10 years', 'JavaScript,React,Node.js', 1, 25, 'Cloud9 Solutions'))

    conn.commit()

def get_scalar_values(query):
    # This function should extract scalar values from the query (e.g., budget and skills)
    # Replace this with your custom implementation

    return {'full_time': True, 'budget': 10000, 'skills': 'Python,React,AWS'}

def get_semantic_values(query):
    # This function should extract semantic values from the query (e.g., job title)
    # Replace this with your custom implementation

    return {'company': 'big tech company'}

# **Main function**

In [3]:
@click.command()
@click.argument('query')
@click.option('--embedding_file', default='embedding.npy')
def search_workers(query, embedding_file):
    # Load the SQLite database
    conn = connect_to_db()

    # Load embeddings from the file
    embeddings = np.load(embedding_file)

    # Process query and perform search
    results = perform_search(query, embeddings, conn)

    # Print results
    for result in results:
        print(result)
    print("\n")

def perform_search(query, embeddings, conn):
    scalar_values = get_scalar_values(query)
    semantic_values = get_semantic_values(query)

    results = vector_search(semantic_values, embeddings, conn)

    return combine_search_results(scalar_values, results, conn)

def vector_search(semantic_values, embeddings, conn):
    c = conn.cursor()

    worker_embeddings = [generate_embedding(worker) for worker in c.execute("SELECT embeddings FROM Worker").fetchall()]
    similarities = cosine_similarity(embeddings.reshape(1, -1), worker_embeddings).flatten()
    results = c.execute("SELECT name, experience, full_time, hourly_rate, company FROM Worker WHERE id IN (SELECT worker_id FROM Embeddings ORDER BY ? LIMIT 5)", (similarities,))
    return results

def combine_search_results(scalar_values, results, conn):
    c = conn.cursor()

    full_time = scalar_values['full_time']
    budget = scalar_values['budget']
    skills = scalar_values['skills']
    company = scalar_values['company']

    # Perform additional filtering based on scalar values and skills
    results = c.execute(
        "SELECT name, experience, full_time, hourly_rate, company FROM Worker WHERE (full_time = ? AND hourly_rate <= ? AND ? ILIKE ANY(skills) AND company = ?)",
        (full_time, budget, skills, company))

    return results

if __name__ == '__main__':
    create_schema(connect_to_db())
    create_sample_data(connect_to_db())

    # Generate embeddings and save them to the database
    # Replace this with your custom implementation
    #generate_embeddings(connect_to_db())

    #search_workers(query='your query here', embedding_file='path/to/embedding.npy')