In [None]:
import pandas as pd
import numpy as np
import re
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import openai


openai.api_key = ""#openai api key 


orders_df = pd.read_csv('/Users/krishilparikh/Desktop/SynCom-FinanceMitra/KB/KBorders.csv')
shipments_df = pd.read_csv('/Users/krishilparikh/Desktop/SynCom-FinanceMitra/KB/KBshipments.csv')
products_df = pd.read_csv('/Users/krishilparikh/Desktop/SynCom-FinanceMitra/KB/KBproducts.csv')


model = SentenceTransformer('all-MiniLM-L6-v2')


def embed_data(df, columns):
    texts = df[columns].fillna("").astype(str).agg(" ".join, axis=1).tolist()
    embeddings = model.encode(texts, convert_to_tensor=True).cpu().numpy()
    return embeddings

orders_embeddings = embed_data(orders_df, ["Order ID", "Customer Name", "Product Description", "Order Status"])
shipments_embeddings = embed_data(shipments_df, ["Tracking ID", "Shipping Address", "Shipment Status", "Product Description"])
products_embeddings = embed_data(products_df, ["Product ID", "Product Name", "Product Description", "Price", "Product Category"])

all_embeddings = {
    "orders": orders_embeddings,
    "shipments": shipments_embeddings,
    "products": products_embeddings
}


dataframes = {
    "orders": orders_df,
    "shipments": shipments_df,
    "products": products_df
}


conversation_history = []

# Function to search for relevant data
def search_relevant_data(user_query):
    
    matches = []
    for category, df in dataframes.items():
        for col in df.columns:
            match = df[df[col].astype(str).str.contains(user_query, case=False, na=False)]
            if not match.empty:
                matches.append((category, match, col))

    if matches:
        # Return the first match found
        best_match = matches[0]
        return best_match[1].iloc[0], best_match[0]

    # If no exact match, use embeddings for similarity search
    query_embedding = model.encode([user_query], convert_to_tensor=True).cpu().numpy()
    similarities = {category: cosine_similarity(query_embedding, embeddings).flatten() for category, embeddings in all_embeddings.items()}
    
    # Identify the best matching category and row
    best_category = max(similarities, key=lambda cat: np.max(similarities[cat]))
    best_index = np.argmax(similarities[best_category])
    return dataframes[best_category].iloc[best_index], best_category

# Function to generate a response using OpenAI API
def generate_response(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=prompt,
        max_tokens=200
    )
    return response['choices'][0]['message']['content'].strip()

# Function to build OpenAI messages from conversation history
def build_prompt(user_query, relevant_data, category):
  
    context_message = {
        "role": "system",
        "content": f"Here is the relevant data from the {category} category:\n{relevant_data.to_dict()}"
    }
    conversation_history.append(context_message)

    
    user_message = {"role": "user", "content": user_query}
    conversation_history.append(user_message)

    return conversation_history

# Main function to process the user query
def process_query(user_input):
   
    relevant_data, category = search_relevant_data(user_input)

    # Display the relevant data
    print(f"\nRelevant Data from {category.capitalize()}:\n")
    print(relevant_data.to_frame().T.to_string(index=False)) 
    
    prompt = build_prompt(user_input, relevant_data, category)

    # Generate a response
    response = generate_response(prompt)
    
    conversation_history.append({"role": "assistant", "content": response})

    return response


if __name__ == "__main__":
    print("Bot: How can I assist you today?")
    while True:
        user_input = input("You: ")
        if user_input.lower() in ["exit", "quit"]:
            print("Bot: Goodbye!")
            break
        response = process_query(user_input)
        print(f"\nBot: {response}\n")
