In [6]:
from sqlalchemy import inspect
from sqlalchemy import create_engine
from sqlalchemy import text
import pandas as pd

In [8]:

# Database Path
#db_file_path = r"C:\Users\vivia\co-pilot-v1\data\databases\partswise_island_moto.db"
db_file_path = r"C:\Users\vivia\co-pilot-v1\data\databases\parts_database.db"
engine = create_engine(f"sqlite:///{db_file_path}")

In [9]:
from json import loads, dumps

# getting parts info: part number, model, price, quantity, brand, description, and year
with engine.connect() as connection:
    query = text("""SELECT p.part_number, m.model_name, p.price, p.quantity, p.brand, p.description, y.year
                    FROM model_year my
                    JOIN models m ON my.model_id=m.id
                    JOIN parts p ON my.part_number=p.part_number
                    JOIN years y ON my.year_id=y.id
                 """)
    parts_df = connection.execute(query)
    
parts_df = pd.DataFrame(parts_df.fetchall(), columns=['partNumber', 'modelName', 'price', 'quantity', 'brand', 'description', 'year'])
parts_df.to_csv(r'C:\Users\vivia\co-pilot-v1\Notebooks\parts.csv', index=False)

In [10]:
parts_df

Unnamed: 0,partNumber,modelName,price,quantity,brand,description,year
0,48710822A,1200 S Touring Brasil,333.989990,1,Ducati,New Ducati OEM Multistrada Windscreen Clear,2019
1,46628532317,Premium Abs,79.989998,1,Bmw,BMW R1200 GS Liscence Plate Holder BMW R1200 G...,2021
2,82718841AA,Panigale V4 Sp2 30∞ Anniversario 916,15.650000,1,Ducati,"New, OEM stickers Ducati LH Reflector Support ...",2008
3,77214031A,Xdiavel Standard Standard\t,8.420000,1,Ducati,New in Origional Box with OE stickers Ducati H...,2021
4,2879769-266,Indian Pursuit Dark Horse Icon With Premium Pa...,799.989990,1,Polaris/Indian,"New in Origional Box Fuel Tank, Cruiser Black,...",2014
...,...,...,...,...,...,...,...
2395,57322212A,Monster 821 All Types,399.989990,1,Ducati,takeoff 2016 Ducati Monster OEM 821 Exhaust As...,2016
2396,59522411B,Scrambler Icon 800,300.079987,1,Ducati,New Takeoff Ducati Scrambler Seat,2020
2397,2879769-266,Indian Pursuit Elite,799.989990,1,Polaris/Indian,"New in Origional Box Fuel Tank, Cruiser Black,...",2014
2398,18127712864,R1200 Gs Adventure 10,891.080017,1,Bmw,Used takeoff BMW Rear Muffler,2008


In [11]:
import os
import openai

os.environ["OPENAI_API_KEY"] = "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"
openai.api_key = os.environ["OPENAI_API_KEY"]


In [56]:
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Float, Integer
from sqlalchemy.orm import sessionmaker, relationship, declarative_base

Base = declarative_base()

class Part(Base):
    __tablename__ = 'parts'
    # name = Column(String)
    # category = Column(String)
    part_number = Column(String, primary_key=True, unique=True)
    description = Column(String)
    # additional_details = Column(String)
    quantity = Column(Integer)
    price = Column(Float)
    make = Column(String)
    # allowed_categories = Column(String)

class Model(Base):
    __tablename__ = 'models'
    id = Column(Integer, primary_key=True, autoincrement=True)
    part_number = Column(String, ForeignKey('parts.part_number'))
    model_name = Column(String, unique=True)

class Year(Base):
    __tablename__ = 'years'
    id = Column(Integer, primary_key=True, autoincrement=True)
    part_number = Column(String, ForeignKey('parts.part_number'))
    year = Column(Integer, unique=True)

# model year junction table
class ModelYear(Base):
    __tablename__ = 'model_year'
    id = Column(Integer, primary_key=True, autoincrement=True)
    part_number = Column(Integer, ForeignKey('parts.part_number'))
    model_id = Column(Integer, ForeignKey('models.id'), nullable=True)
    year_id = Column(Integer, ForeignKey('years.id'), nullable=True)


In [57]:
db_path = r'C:\Users\vivia\co-pilot-v1\Notebooks\bulk_upload.db'
engine = create_engine(f'sqlite:///{db_path}')
csv_path = r"C:\Users\vivia\co-pilot-v1\Notebooks\alberta_honda_data_synthetic.csv"

Base.metadata.drop_all(engine)

Base.metadata.create_all(engine)

Session = sessionmaker(bind=engine)
session = Session()

parts_df = pd.read_csv(csv_path)
parts_df = parts_df.drop_duplicates(subset=['part_number'])

parts_data = parts_df.to_dict('records')

In [58]:
# making parts table

for part in parts_data:
    new_part = Part(
        part_number = part['part_number'],
        description = part.get('description'),
        quantity = part.get('quantity'),
        price = part.get('price'),
        make = part.get('make')    
    )
    session.add(new_part)
    session.flush()

session.commit()

In [59]:

import numpy as np

def get_unique_elements(col):
    col_data = np.array(parts_df[col])
    unique_col = set()

    for entry in col_data:
        if "," in entry:
            separate_entries = entry.split(", ")
            for element in separate_entries:
                unique_col.add(element)
        else:
            unique_col.add(entry)
    
    return unique_col

# getting unique model names
unique_models = get_unique_elements('model')

# insert models into the database
for model in unique_models:
    new_model = Model(model_name=model)
    session.add(new_model)
session.commit()

In [60]:
# getting unique years
unique_years = get_unique_elements('year')

# insert years into the database
for year in unique_years:
    new_year = Year(year=year)
    session.add(new_year)
session.commit()

In [54]:
parts_data

[{'part_number': '04301-RNA-307',
  'description': 'P/S BELT',
  'quantity': 1,
  'price': 68.992,
  'make': 'Honda',
  'model': 'CR-Z, HR-V',
  'year': '2020'},
 {'part_number': '04711-3A0-A00ZZ',
  'description': 'FACE FR BUMPER',
  'quantity': 2,
  'price': 298.9375,
  'make': 'Honda',
  'model': 'Pilot',
  'year': '2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019'},
 {'part_number': '04711-T0A-A90ZZ',
  'description': 'FACE FR BPR(DOT)',
  'quantity': 1,
  'price': 336.625,
  'make': 'Honda',
  'model': 'Crosstour, Element, HR-V, Insight, Pilot',
  'year': '2018'},
 {'part_number': '04711-T1W-A91ZZ',
  'description': 'FACE FR (DOT)',
  'quantity': 2,
  'price': 382.6625,
  'make': 'Honda',
  'model': 'Insight',
  'year': '2018'},
 {'part_number': '04711-T20-A00ZZ',
  'description': 'FACE FR BUMPER',
  'quantity': 1,
  'price': 311.35,
  'make': 'Honda',
  'model': 'Crosstour',
  'year': '2018, 2019, 2020, 2021'},
 {'part_number': '04711-T7W-A80ZZ',
  'description': 'FACE 

In [61]:
for row in parts_data:
    part_number = row['part_number']
    model_name = row.get('model')
    year_value = row.get('year')

    # retrieve model_id if model_name is available
    if model_name:
        model_id = session.query(Model.id).filter_by(model_name=model_name).first()
        model_id = model_id[0] if model_id else None
    else:
        model_id = None
    
    # retrieve year_id if year_value is available
    if year_value:
        year_id = session.query(Year.id).filter_by(year=year_value).first()
        year_id = year_id[0] if year_id else None
    else:
        year_id = None
    
    new_model_year = ModelYear(part_number=part_number, model_id=model_id, year_id=year_id)
    session.add(new_model_year)

session.commit()
session.close()

In [62]:
from sqlalchemy import create_engine, inspect

engine = create_engine(f'sqlite:///{db_path}')

# Create an inspector
inspector = inspect(engine)

# Get the list of table names
tables = inspector.get_table_names()

# Print the structure of each table
for table in tables:
    print(f"\nTable: {table}")
    columns = inspector.get_columns(table)
    for column in columns:
        print(f"  Column: {column['name']} - {column['type']}")

# Additionally, print foreign key information
for table in tables:
    print(f"\nForeign Keys for Table: {table}")
    foreign_keys = inspector.get_foreign_keys(table)
    for fk in foreign_keys:
        print(f"  Foreign Key: {fk['constrained_columns']} -> {fk['referred_table']}.{fk['referred_columns']}")



Table: model_year
  Column: id - INTEGER
  Column: part_number - INTEGER
  Column: model_id - INTEGER
  Column: year_id - INTEGER

Table: models
  Column: id - INTEGER
  Column: part_number - VARCHAR
  Column: model_name - VARCHAR

Table: parts
  Column: part_number - VARCHAR
  Column: description - VARCHAR
  Column: quantity - INTEGER
  Column: price - FLOAT
  Column: make - VARCHAR

Table: years
  Column: id - INTEGER
  Column: part_number - VARCHAR
  Column: year - INTEGER

Foreign Keys for Table: model_year
  Foreign Key: ['part_number'] -> parts.['part_number']
  Foreign Key: ['model_id'] -> models.['id']
  Foreign Key: ['year_id'] -> years.['id']

Foreign Keys for Table: models
  Foreign Key: ['part_number'] -> parts.['part_number']

Foreign Keys for Table: parts

Foreign Keys for Table: years
  Foreign Key: ['part_number'] -> parts.['part_number']


In [1]:
import sqlite3
import csv

# csv_path = r'C:\Users\vivia\co-pilot-v1\Notebooks\parts.csv'
csv_path = r'C:\Users\vivia\co-pilot-v1\Notebooks\alberta_honda_data_synthetic.csv'

def csv_to_sql(csv_path):
    db_path = r'C:\Users\vivia\co-pilot-v1\Notebooks\bulk_upload.db'
    conn = sqlite3.connect(db_path)
    cur = conn.cursor()

    with open(csv_path) as f:
        reader = csv.reader(f)
        data = list(reader)

        csv_rows_dict = {
            'part_number': 'VARCHAR',
            'description': 'VARCHAR',
            'quantity': 'INT',
            'price': 'INT',
            'make': 'VARCHAR',
            'model': 'VARCHAR',
            'year': 'VARCHAR'
        }

        for row in data:
            print(row)
    # deletes the previous table
    # cur.execute('''DROP TABLE IF EXISTS parts''')

    # # makes new table with new data
    # cur.execute('''CREATE TABLE parts (
    #             part_number VARCHAR,
    #             description VARCHAR,
    #             quantity INT,
    #             price INT,
    #             make VARCHAR,
    #             model VARCHAR,
    #             year VARCHAR
    #             )''' )
    # for row in data:
    #     cur.execute("INSERT INTO parts (part_number, description, quantity, price, make, model, year) values (?, ?, ?, ?, ?, ?, ?)", row)

    # conn.commit()
    # conn.close()

    bulk_upload_db = r'C:\Users\vivia\co-pilot-v1\Notebooks\bulk_upload.db'
    engine = create_engine(f"sqlite:///{bulk_upload_db}")

    return engine

In [2]:
csv_to_sql(csv_path)

['part_number', 'description', 'quantity', 'price', 'make', 'model', 'year']
[]
['04301-RNA-307', 'P/S BELT', '1', '68.992', 'Honda', 'CR-Z, HR-V', '2020']
[]
['04711-3A0-A00ZZ', 'FACE FR BUMPER', '2', '298.9375', 'Honda', 'Pilot', '2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019']
[]
['04711-T0A-A90ZZ', 'FACE FR BPR(DOT)', '1', '336.625', 'Honda', 'Crosstour, Element, HR-V, Insight, Pilot', '2018']
[]
['04711-T1W-A91ZZ', 'FACE FR (DOT)', '2', '382.6625', 'Honda', 'Insight', '2018']
[]
['04711-T20-A00ZZ', 'FACE FR BUMPER', '1', '311.35', 'Honda', 'Crosstour', '2018, 2019, 2020, 2021']
[]
['04711-T7W-A80ZZ', 'FACE FR (DOT)', '0', '368.4125', 'Honda', 'Clarity', '2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021']
[]
['04711-TBA-A00ZZ', 'FACE FR BUMPER', '1', '393.45', 'Honda', 'Civic', '2022']
[]
['04711-TBA-A50ZZ', 'FACE FR BUMPER', '2', '424.5625', 'Honda', 'Ridgeline', '2022, 2023']
[]
['04711-TG7-A50ZZ', 'FACE FR BUMPER', '1', '383.15', 'Honda', 'CR-Z, Odyssey, S2000',

NameError: name 'create_engine' is not defined

In [8]:
from llama_index.core import SQLDatabase, VectorStoreIndex
from llama_index.core.objects import (
    SQLTableNodeMapping,
    ObjectIndex,
    SQLTableSchema,
)
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index.llms.openai import OpenAI

import os

# logfile = r"C:\Users\vivia\co-pilot-v1\Notebooks\logfile.txt"
# if os.path.exists(logfile):
#   os.remove(logfile)

# def write_to_log(text):
#     logfile = r"C:\Users\vivia\co-pilot-v1\Notebooks\logfile.txt"
#     f = open(logfile, "a")
#     f.write(text + "\n")
#     f.close()

engine = csv_to_sql(csv_path)

def setup_nlsql_query_engine():
    def initialize_table_objects():
        sql_database = SQLDatabase(engine, sample_rows_in_table_info=2, include_tables=['parts'])

        parts_context = "Provides detailed inventory data for individual parts, including the model name, price, quantity, brand, year, and a description of the part."

        table_context_str = "The Table description is: " + parts_context

        table_node_mapping = SQLTableNodeMapping(sql_database)
        table_schema_objs = [
            SQLTableSchema(table_name='parts', context_str=parts_context),
        ]
        obj_index = ObjectIndex.from_objects(
            table_schema_objs,
            table_node_mapping,
            VectorStoreIndex,
        )
        return sql_database, table_schema_objs, obj_index, table_context_str

    sql_database, table_schema_objs, obj_index, table_context_str = initialize_table_objects()

    context_str = (
        "Convert percentages to decimals (e.g., '50%' as '0.5')."
        "Use LOWER and LIKE for searching brand, modelName or description because of case sensitivity."
        "Pay close attention to filtering criteria mentioned in the question and incorporate them using the WHERE clause in your SQL query."
        "If the question involves multiple conditions, use logical operators such as AND, OR to combine them effectively."
        "If the question involves grouping of data (e.g., finding totals or averages for different categories), use the GROUP BY clause along with appropriate aggregate functions."
        "Consider using aliases for tables and columns to improve readability of the query, especially in case of complex joins or subqueries."
        "If necessary, use subqueries or common tables expressions (CTEs) to break down the problem into smaller, more manageable parts."
        "Ensure detailed, relevant responses."

    ) 
    context_str_combined = context_str + "\n\n" + table_context_str

    query_engine = SQLTableRetrieverQueryEngine(
        sql_database=sql_database,
        table_retriever=obj_index.as_retriever(similarity_top_k=1),
        synthesize_response=True,
        llm=OpenAI(temperature=0.1, model="gpt-3.5-turbo-0125"),
        context_str_prefix=context_str_combined
    )

    return query_engine

query_engine = setup_nlsql_query_engine()

def process_user_input_to_sql(user_input):
    response = query_engine.query(user_input)
    # print(f"RESPONSE: {response.metadata}")
    sql_query = response.metadata.get('sql_query', '').replace('\n', ' ').replace('\r', ' ').strip()
    # print(f"SQL QUERY after adjustment: {sql_query}")
    if sql_query.startswith('sql'):
        sql_query = sql_query[3:].strip()
    print(f"SQL: {sql_query}")
    # write_to_log(str(sql_query))
    return sql_query

def query_output(user_input):
    sql_query = process_user_input_to_sql(user_input)
    # print(f"SQL QUERY Output: {sql_query}")

    with engine.connect() as connection:
        result = connection.execute(text(sql_query))
        result_data = result.fetchall()
        if len(result_data) >= 5:
            result_df = pd.DataFrame(result_data, columns=result.keys())
            return result_df
        else:
            response = query_engine.query(sql_query)
            return str(response)

user_input = "Get all parts from honda"
response = query_output(user_input)
print(response)


OperationalError: database is locked

In [None]:

test_inputs = [
    "Get all bmw parts and reduce their price by 50%",
    "Get all mufflers between the years 2000 and 2005",
    "Get all parts between the years 2010 and 2020 and reduce the price by 20%",
    "Get all ducati parts made in 2019",
    "Get all new parts and reduce their price by 10%",
    "Take all parts less than $300",
    "Get all parts between $500 and $1500 and reduce their price by 30%",
    "Get all parts from polaris that were made after 2018 and reduce their price by 15%",
    "Get all parts from polaris/indian that were made after 2018 and reduce their price by 15%",
    "Get all used parts and reduce their price by 50%",
    "Get all parts that have more than 2 on quantity and reduce their price by 40%",
]

for test in test_inputs:
    response = query_output(test)
    write_to_log(test)
    if isinstance(response, pd.DataFrame):
        response = response.to_string()
    write_to_log(response)
    write_to_log("**********************************************************************************************")

write_to_log("DONE WITH ALL TESTS")
