In [1]:
from sqlalchemy import create_engine, text, inspect
from llama_index.core import SQLDatabase
# Path to your database file
db_file_path = "/Users/skylerwilson/Desktop/PartsWise/co-pilot-v1/data/databases/partswise_island_moto.db"

# Create an engine instance
connection_string = f"sqlite:///{db_file_path}"
engine = create_engine(connection_string)

# Test the connection using raw SQL
with engine.connect() as connection:
    result = connection.execute(text("SELECT * FROM parts LIMIT 3"))
    for row in result:
        print(row)

tables = ['sales', 'parts']
# sql_database = SQLDatabase(engine, include_tables=tables,sample_rows_in_table_info=5)
sql_database = SQLDatabase(engine, sample_rows_in_table_info=2)#by default3 (actually)
list(sql_database._all_tables)

(0, 0, 0.0, 0.0, 6, 0.0, 0, -100.0, 'obsolete', 1, 0, 0.0, 1.0, 0.0, 13, 0.0, 0.0, 'bearing ntn 6203llu/2a 40x17x12', '004-153', 9.99, 0, 0.0, 'motovan', 0.0)
(0, 0, 0.0, 0.0, 3, 1.0, 0, -45.99, 'non-essential', 1, 0, 0.0052214272, 0.75, 1.0, 5, 0.0, 0.0, 'hi-flo o-filt hon 15412-hm5-a1', '004hf113', 4.99, 0, 360.0, 'thibault canada', 0.0)
(0, 0, 0.0, 0.0, 9, 1.0, 0, 0.11, 'nearing_obsolete', 1, 0, 0.2523689809, 0.7756696429, 1.0, 8, 0.0, 0.0, 'new style universal cruise ctr', '0069922bc', 18.99, 0, 360.0, 'thibault canada', 0.0)


['sales', 'parts']

In [2]:
import os
os.environ["OPENAI_API_KEY"] = "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"


In [20]:
import logging
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
import plotly.express as px
import plotly.graph_objects as go
import os
import openai
from llama_index.core import SQLDatabase, PromptTemplate, VectorStoreIndex
from llama_index.core.objects import (
    SQLTableNodeMapping,
    ObjectIndex,
    SQLTableSchema,
)
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index.llms.openai import OpenAI



class NLQueryEngine:
    def __init__(self, engine):
        self.engine = engine
        self.sql_database, self.table_schema_objs, self.obj_index = self._initialize_table_objects()
        self.context_str_combined = self._create_context_str()
        openai.api_key = os.environ["OPENAI_API_KEY"]
        self.query_engine = self._create_query_engine()

    def _initialize_table_objects(self):
        sql_database = SQLDatabase(self.engine, sample_rows_in_table_info=2, include_tables=['sales', 'parts'])
        table_contexts = {
            'sales': "Provides time-based sales count data for individual parts. Use for part-specific sales queries. No price column",
            'parts': "Provides detailed inventory data for individual parts. Use part-specific queries. Combine with 'sales' tables for temporal financial performance"
        }

        table_schema_objs = [SQLTableSchema(table_name=name, context_str=context) for name, context in table_contexts.items()]
        table_node_mapping = SQLTableNodeMapping(sql_database)

        obj_index = ObjectIndex.from_objects(
            table_schema_objs,
            table_node_mapping,
            VectorStoreIndex,
        )
        return sql_database, table_schema_objs, obj_index

    def _create_context_str(self):
        context_str = (
            "Inventory categories: essential, non-essential, nearing obsolescence, obsolete. "
            "Ensure detailed, relevant responses, including 'supplier_name', 'price', and 'quantity'. "
            "Access 'supplier_name' flexibly e.g., ('%bmw'). "
            "All text is lowercase. "
            "Convert percentages to decimals (e.g., '50%' as '0.5'). "
            "Use JOINs prefaced with table names for combining multiple tables. "
            "Calculate COGS as the sum of costs directly associated with goods sold. "
            "Calculate Gross Margin Percentage/Gross Margin as (Sales Revenue - COGS) / Sales Revenue * 100. "
            "Order months chronologically like a calendar (e.g., january, february, ..., december) in query results."
        )
        table_context_str = self._get_table_context_str()
        return context_str + "\n\n" + table_context_str

    def _get_table_context_str(self):
        context_strs = []
        for table_schema_obj in self.table_schema_objs:
            table_info = self.sql_database.get_single_table_info(table_schema_obj.table_name)
            if table_schema_obj.context_str:
                table_info += f" The table description is: {table_schema_obj.context_str}"
            context_strs.append(table_info)
        return "\n\n".join(context_strs)

    def _create_query_engine(self):
        return SQLTableRetrieverQueryEngine(
            sql_database=self.sql_database,
            table_retriever=self.obj_index.as_retriever(similarity_top_k=1),
            synthesize_response=True,
            llm=OpenAI(temperature=0.1, model="gpt-3.5-turbo-0125"),
            context_str_prefix=self.context_str_combined
        )

    def query(self, user_input, return_sql=False):
        response = self.query_engine.query(user_input)
        if return_sql:
            return response.metadata.get('sql_query', '')
        return response

def output_sql_query_to_df(result_data, columns):
    return pd.DataFrame(result_data, columns=columns)

def generate_plotly_visual(df, chart_type, title, x_axis_label, y_axis_label, hover_labels):
    # Filtering existing hover data columns
    hover_data = [col for col in hover_labels if col in df.columns]

    # Preparing customdata correctly, ensuring it matches the data length
    custom_data_adjusted = df[hover_data].values if hover_data else None

    # Building the hover template
    hover_template = "<br>".join([f"{col}: %{{customdata[{i}]}}" for i, col in enumerate(hover_data)])

    if chart_type == "line":
        fig = px.line(df, x=df.columns[0], y=df.columns[1:], title=title)
        if custom_data_adjusted is not None:
            fig.update_traces(customdata=custom_data_adjusted, hovertemplate=hover_template + "<extra></extra>")

    elif chart_type == "bar":
        fig = go.Figure()
        for col in df.columns[1:]:
            fig.add_trace(go.Bar(
                x=df[df.columns[0]], 
                y=df[col],
                name=col,
                customdata=custom_data_adjusted,
                hovertemplate=hover_template + "<extra></extra>"
            ))
        fig.update_layout(title=title, xaxis_title=x_axis_label, yaxis_title=y_axis_label, barmode='group')

    elif chart_type == "scatter":
        fig = px.scatter(df, x=df.columns[0], y=df.columns[1:], title=title)
        if custom_data_adjusted is not None:
            fig.update_traces(customdata=custom_data_adjusted, hovertemplate=hover_template + "<extra></extra>")

    elif chart_type == "pie":
        fig = px.pie(df, names=df.columns[0], values=df.columns[1], title=title)
        if custom_data_adjusted is not None:
            # Ensure each column of custom_data_adjusted matches the length of the main data
            custom_data_adjusted = np.array([custom_data_adjusted[:, i] if i < custom_data_adjusted.shape[1] else ['N/A']*len(df) for i in range(len(hover_data))]).T
            fig.update_traces(customdata=custom_data_adjusted, hovertemplate=hover_template + "<extra></extra>")

    else:
        fig = px.scatter(df, x=df.columns[0], y=df.columns[1:], title=title)
        if custom_data_adjusted is not None:
            fig.update_traces(customdata=custom_data_adjusted, hovertemplate=hover_template + "<extra></extra>")

    # Apply common layout updates
    fig.update_layout(
        title={'text': title, 'y':0.95, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'},
        xaxis_title=x_axis_label,
        yaxis_title=y_axis_label,
        hovermode="closest",
        plot_bgcolor='white',  
        paper_bgcolor='white',  
        font=dict(family="Arial, sans-serif", size=12, color="Gray"),
        xaxis=dict(showline=True, linecolor='Gray', linewidth=1, tickformat=',', title_standoff=10, tickfont=dict(size=14), titlefont=dict(size=16)),
        yaxis=dict(showline=True, linecolor='Gray', linewidth=1, tickformat=',', title_standoff=10, tickfont=dict(size=14), titlefont=dict(size=16)),
        legend=dict(x=0.5, y=-0.175, xanchor='center', yanchor='top', orientation='h', bgcolor='rgba(255,255,255,0.8)', bordercolor='Gray', borderwidth=1, font=dict(size=16)),
        margin=dict(l=50, r=50, t=50, b=0)
    )

    return fig




def extract_chart_details(query_engine, user_input):
    text_qa_template_str = (
        "Given the query: '{query_str}', extract the following details, adding hover data to enhance the utility of the charts:\n"
        "1. Chart type (e.g., bar, line, pie, etc.)\n"
        "2. Title for the chart\n"
        "3. X-axis label\n"
        "4. Y-axis label\n"
        "5. Additional columns for hover data\n"
        "\n"
        "Provide the details in the following format:\n"
        "Chart type: <type>\n"
        "Title: <title>\n"
        "X-axis label: <label>\n"
        "Y-axis label: <label>\n"
        "Hover data: <column1>, <column2>, ...\n"
    )
    text_qa_template = PromptTemplate(text_qa_template_str)
    
    prompt = text_qa_template.format(query_str=user_input)
    response = query_engine.query(prompt)
    logging.info(f"LLM Response: {response.response.strip()}")
    response_text = response.response.strip()
    lines = response_text.split('\n')
    
    chart_type = "bar"  # Default value
    title = "Generated Chart"
    x_axis_label = "X Axis"
    y_axis_label = "Y Axis"
    hover_data = []

    for line in lines:
        if "Chart type:" in line:
            chart_type = line.split(":", 1)[1].strip().lower()
        elif "Title:" in line:
            title = line.split(":", 1)[1].strip()
        elif "X-axis label:" in line:
            x_axis_label = line.split(":", 1)[1].strip()
        elif "Y-axis label:" in line:
            y_axis_label = line.split(":", 1)[1].strip()
        elif "Hover data:" in line:
            hover_data = [col.strip().lower().replace(' ', '_') for col in line.split(":", 1)[1].strip().split(',')]
    return chart_type, title, x_axis_label, y_axis_label, hover_data


def output_sql_query_to_graph(result_data, columns, chart_type, title, x_axis_label, y_axis_label, hover_labels):
    result_df = pd.DataFrame(result_data, columns=columns)
    hovertext = {col: True for col in hover_labels} if hover_labels else None
    fig = generate_plotly_visual(result_df, chart_type, title, x_axis_label, y_axis_label, hovertext)
    fig.show()

def parse_intent(user_input):
    if any(keyword in user_input.lower() for keyword in ["visual", "chart", "graph", "plot", "bar", "pie", "line"]):
        return "visual"
    return "text"

def query_output(query_engine, user_input):
    intent = parse_intent(user_input)
    sql_query = query_engine.query(user_input, return_sql=True)  # Get the SQL query only once
    # Execute the SQL query
    with query_engine.engine.connect() as connection:
        result = connection.execute(text(sql_query))
        result_data = result.fetchall()
        columns = result.keys()
        logging.info(f"Executed SQL Query: {sql_query}")
        logging.info(f"Resulting Data: {result_data}")
    if intent == "visual":
        chart_type, title, x_axis_label, y_axis_label, hover_labels = extract_chart_details(query_engine, user_input)
        output_sql_query_to_graph(result_data, columns, chart_type, title, x_axis_label, y_axis_label, hover_labels)
    else:
        if len(result_data) >= 5:
            df = output_sql_query_to_df(result_data, columns)
            return df
        response = query_engine.query(user_input)
        return str(response)

def main():
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
    db_file_path = "/Users/skylerwilson/Desktop/PartsWise/co-pilot-v1/data/databases/partswise_island_moto.db"
    engine = create_engine(f"sqlite:///{db_file_path}")
    query_engine = NLQueryEngine(engine)
    #user_input = "what brand had the most gross profit in October 2023"
    #user_input = "build a pie chart of each inventory catagory as a percentage of the total inventory and the total cost of each category?"
    user_input = "build a bar chart of each sales revenue, cogs, and gross profit for each month in 2023?"
    #user_input = "Generate a graph that compares the average gross profit margin percentage month by month for the year 2023 for the following brands: BMW, Ducati, Triumph, and Polaris. Each brand should be represented separately in the results"
    response = query_output(query_engine, user_input)
    print(response)

if __name__ == "__main__":
    main()



2024-06-12 20:11:01,918 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-06-12 20:11:02,184 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-06-12 20:11:02,283 - INFO - > Table desc str: Inventory categories: essential, non-essential, nearing obsolescence, obsolete. Ensure detailed, relevant responses, including 'supplier_name', 'price', and 'quantity'. Access 'supplier_name' flexibly e.g., ('%bmw'). All text is lowercase. Convert percentages to decimals (e.g., '50%' as '0.5'). Use JOINs prefaced with table names for combining multiple tables. Calculate COGS as the sum of costs directly associated with goods sold. Calculate Gross Margin Percentage/Gross Margin as (Sales Revenue - COGS) / Sales Revenue * 100. Order months chronologically like a calendar (e.g., january, february, ..., december) in query results.

Table 'sales' has columns: id (INTEGER), part_number (VARCHAR), month (VARCHAR), year (INTEGER), qu

None


### Build Functions that are used to analyze inventory data and assess problem areas --> create tools from these functions

##### Key Problem Areas:
- High months no sale: stocked parts are not selling --> pricing issue, quantity issue, poor ordering, or cyclicality?
- Improper quantity: quantity below reorder point w/ no current orders --> poor management or long lead time?
- Large negative on hand: selling parts we dont have --> poor stocking
- Margin/pricing issues: low margin + high sales = need to increase price and vice-versa
- Large percentage of obsolescence: need to blow off these parts --> sell at loss to re-coup invested capital
- Low ROI: either the parts are not selling or they are too expensive to hold in inventory and should be ordered just-in-time
- Special orders with no sales: Could mean we arent charging the customer before ordering or special ordering parts we shouldnt
- Stockouts of high sales volume parts: indicates a stockout of parts that have lots of sales --> poor inventory managment
- high day supply 
- High carrying cost

##### Define thresholds
- Margin below 40% but sales greater than the avg 12 month rolling sales for non-obsolete parts
- ROI below 25%
- Day supply greater than 65 days


In [None]:
#knowledge database build
#design: problem --> solution --> reference(s)


In [4]:
from datetime import datetime
from llama_index.core.tools import FunctionTool, QueryEngineTool, ToolMetadata
from sqlalchemy import create_engine, text
import pandas as pd

# Path to your database file
db_file_path = "/Users/skylerwilson/Desktop/PartsWise/co-pilot-v1/data/databases/partswise_island_moto.db"
connection_string = f"sqlite:///{db_file_path}"
engine = create_engine(connection_string)

def analyze_roi(threshold=25):
    with engine.connect() as connection:
        query = text("""
            SELECT
                part_number,
                description, 
                quantity,
                price,     
                roi
            FROM
                parts p
            WHERE roi < :threshold
        """)
        result = connection.execute(query, {'threshold': threshold})
        low_roi_parts = pd.DataFrame(result.fetchall(), columns=result.keys())
    return low_roi_parts

def analyze_inventory():
    with engine.connect() as connection:
        query = text("""
            SELECT 
                part_number,
                description,
                quantity,
                price,
                inventory_category
            FROM parts
            WHERE inventory_category = 'obsolete'
        """)
        result = connection.execute(query)
        obsolete_parts = pd.DataFrame(result.fetchall(), columns=result.keys())
    return obsolete_parts

def analyze_days_supply(threshold=60):
    with engine.connect() as connection:
        query = text("""
            SELECT 
                part_number,
                description,
                quantity,
                price,
                inventory_category,
                annual_days_supply
            FROM parts
            WHERE inventory_category != 'obsolete'
            AND annual_days_supply > :threshold
        """)
        result = connection.execute(query, {'threshold': threshold})
        high_days_supply = pd.DataFrame(result.fetchall(), columns=result.keys())
    return high_days_supply 

def analyze_special_orders():
    with engine.connect() as connection:
        query = text("""
            SELECT
                p.part_number,
                p.description,
                p.quantity,
                p.price,
                p.special_orders_ytd, 
                SUM(s.quantity_sold) as total_quantity_sold
            FROM parts p
            JOIN sales s ON p.part_number = s.part_number
            WHERE p.special_orders_ytd > 0
            GROUP BY p.part_number, p.description, p.quantity, p.price, p.special_orders_ytd
            HAVING SUM(s.quantity_sold) = 0
        """)
        result = connection.execute(query)
        special_orders = pd.DataFrame(result.fetchall(), columns=result.keys())
    return special_orders

def analyze_stockouts(threshold_value=10):
    query = text("""
        WITH PreviousMonthSales AS (
            SELECT
                part_number,
                month,
                year,
                quantity_sold,
                LEAD(quantity_sold) OVER (PARTITION BY part_number ORDER BY year, month) AS next_month_sales,
                LEAD(month) OVER (PARTITION BY part_number ORDER BY year, month) AS next_month,
                LEAD(year) OVER (PARTITION BY part_number ORDER BY year, month) AS next_year
            FROM sales
        ),
        PotentialStockouts AS (
            SELECT
                part_number,
                month AS previous_month,
                year AS previous_year,
                quantity_sold AS previous_month_sales,
                next_month,
                next_year,
                next_month_sales AS current_month_sales
            FROM PreviousMonthSales
            WHERE quantity_sold > :high_sales_threshold
            AND (next_month_sales IS NULL OR next_month_sales = 0)
        )
        SELECT
            p.part_number,
            p.description,
            p.quantity,
            p.price,
            ps.previous_month,
            ps.previous_year,
            ps.previous_month_sales,
            ps.next_month,
            ps.next_year,
            ps.current_month_sales
        FROM
            parts p
        JOIN PotentialStockouts ps ON p.part_number = ps.part_number
    """)
    with engine.connect() as connection:
        result = connection.execute(query, {'high_sales_threshold': threshold_value})
        result_df = pd.DataFrame(result.fetchall(), columns=result.keys())
    return result_df

def analyze_negative_on_hand():
    query = text("""
        SELECT
            part_number,
            description,
            quantity,
            price, 
            negative_on_hand
        FROM parts
        WHERE negative_on_hand != 0
    """)
    with engine.connect() as connection:
        result = connection.execute(query)
        negative_on_hand_parts = pd.DataFrame(result.fetchall(), columns=result.keys())
    return negative_on_hand_parts

def compile_analysis_results():
    results = {}
    results['low_roi_parts'] = analyze_roi()
    results['obsolete_parts'] = analyze_inventory()
    results['high_days_supply_parts'] = analyze_days_supply()
    results['special_orders'] = analyze_special_orders()
    results['potential_stockouts'] = analyze_stockouts()
    results['negative_on_hand_parts'] = analyze_negative_on_hand()
    return results

# Compile the analysis results
results = compile_analysis_results()


#need to implement the knowledge database to provide strategic advice based on the compiled analysis

    
#Other tools for the co-pilot

def get_current_year_month():
    """
    Get the current year and month. For temporal queries like: "how many sales of part 123456 have sold this year so far?"

    Returns:
        tuple: A tuple containing the current year and month.
    """
    current_date = datetime.now()
    return current_date.year, current_date.month

date_tool = FunctionTool.from_defaults(fn=get_current_year_month)


In [24]:
# *** For testing queries only ***

from sqlalchemy import create_engine, text
import pandas as pd

# Path to your database file
db_file_path = "/Users/skylerwilson/Desktop/PartsWise/co-pilot-v1/data/databases/partswise_island_moto.db"

# Create an engine instance
connection_string = f"sqlite:///{db_file_path}"
engine = create_engine(connection_string)

#threshold_value = 25  # Define your threshold value here

# Define the query
query = text("""
    SELECT p.supplier_name, SUM((s.quantity_sold * p.price) - (s.quantity_sold * p.cost_per_unit)) AS gross_profit\nFROM sales s\nJOIN parts p ON s.part_number = p.part_number\nWHERE s.month = 'march' AND s.year = 2023\nGROUP BY p.supplier_name\nORDER BY gross_profit DESC\nLIMIT 1;
             """)

# Execute the query within a managed connection
with engine.connect() as connection:
    result = connection.execute(query)
    result_df = pd.DataFrame(result.fetchall(), columns=result.keys())

# Print the result
print(result_df)

      supplier_name  gross_profit
0  bmw group canada      19050.89


In [2]:
from datetime import datetime
from llama_index.core.tools import FunctionTool, QueryEngineTool, ToolMetadata

def get_current_year_month():
    """
    Get the current year and month.

    Returns:
        tuple: A tuple containing the current year and month.
    """
    current_date = datetime.now()
    return current_date.year, current_date.month

 
date_tool = FunctionTool.from_defaults(fn=get_current_year_month)
    
'''
Will incorporate this later, once the marketplace is finished. Will be used to execute bulk actions within the maarketplace like 
getting all obsolete inventory from a particular brand, reducing the price by 50% and preparing it for bulk upload onto the marketplace

# Define metadata for your tool
bulk_action_metadata = ToolMetadata(
    name="bulk_action_tool",
    description=("Agent that executes bulk actions like price adjustments on"
                " retrieved data that satisfies provided conditions about the parts."
                "Use a detailed plain text question as input to the tool.")
)

# Set up the QueryEngineTool with the sql_agent and its metadata
bulk_action_tool = [
    QueryEngineTool(
        query_engine=engine,
        metadata=bulk_action_metadata,
    ),
]
'''

# This will be adjusted to incorporate the knowledge database when I finish accumulating it
# Need to figure out if i can use multiple engines for these
inventory_analyzer_metadata = ToolMetadata (
    name="inventory_analyzer_tool",
    description=("Agent that analyzes inventory data including 'months_no_sale', 'obsolescence_risk', "
                 "'sales_to_stock_ratio', 'rolling_12_month_sales','rolling_3_month_sales', and '12_month_turnover' "
                 "and makes suggestions to reduce and prevent obsolescence."
                 "Use a detailed plain text question as input to the tool.")
)

inventory_analyzer_tool = [
    QueryEngineTool(
        query_engine=engine,
        metadata=inventory_analyzer_metadata,
    )
]