### Imports

In [9]:
from langchain_google_community import GoogleSearchAPIWrapper
from langchain.tools import WikipediaQueryRun
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain.agents import AgentExecutor
from langchain_openai import OpenAI, ChatOpenAI
from langchain_core.tools import Tool
from langchain_community.utilities import WikipediaAPIWrapper

import os

In [10]:
os.environ['OPENAI_API_KEY'] = '...'

# Tiny Hacking Assignment!

## Generate SQL / Pandas code from natural language
There are 2 datasets in this repo (/data folder). Your goal is to transform the following questions into SQL queries (or python pandas code), using agents. 
ReAct prompting might help you achieve a more optimal solution.

1. (easy) Write an SQL query to retrieve the full names (first name and last name) and email addresses of all customers who have registered in the database since January 1, 2023.
2. Calculate the lifetime value of each customer. Lifetime value is defined as the total sum of orders placed by each customer from their first order to the present. The query should return the customer's first name, last name, and their lifetime value, sorted by lifetime value in descending order.
3. Construct a SQL query to identify customers who placed orders in their first month but did not place any orders in the subsequent month. The query should return the CustomerID, first name, last name, and the date of their last order. Additionally, provide a count of how many orders were placed in the first month of their activity.
4. Devise a SQL query that lists each customer along with their most recent order details. The output should include the customer's first name, last name, and the details of their most recent order (OrderID, OrderDate, TotalAmount, and OrderStatus). Additionally, calculate the number of days that have elapsed since their last order. Sort the results by the elapsed days in ascending order.


Here is a description of the datasets:
### Table 1: Customers
This table stores information about the customers who purchase items from the e-commerce store.

Columns:
- CustomerID (INT): A unique identifier for each customer.
- FirstName (VARCHAR): The first name of the customer.
- LastName (VARCHAR): The last name of the customer.
- Email (VARCHAR): The email address of the customer.
- PhoneNumber (VARCHAR, optional): The phone number of the customer, which can be null if not provided.
- CreateDate (DATETIME): The date and time when the customer was added to the database.

### Table 2: Orders
This table records details about orders placed by customers.

Columns:
- OrderID (INT): A unique identifier for each order.
- CustomerID (INT): A reference to CustomerID from the Customers table, linking each order to a specific customer.
- OrderDate (DATETIME): The date and time when the order was placed.
- ShippingAddress (VARCHAR): The address where the order is to be shipped.
- TotalAmount (DECIMAL): The total amount of the order in dollars.
- OrderStatus (VARCHAR): The status of the order (e.g., "Pending", "Shipped", "Delivered").

These tables are connected via the CustomerID in the Orders table, which acts as a foreign key referencing the CustomerID in the Customers table. This setup allows you to track which customer placed which order and to perform detailed analysis and reporting on customer behavior and order details.

### Let's code!

In [None]:
from langchain_google_community import GoogleSearchAPIWrapper
from langchain.tools import WikipediaQueryRun
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain.agents import AgentExecutor
from langchain_openai import OpenAI, ChatOpenAI
from langchain_core.tools import Tool

# Initialize the Chat-oriented LLM wrapper with the OpenAI API key.
chat_llm = ChatOpenAI(model='gpt-4o')

# Initialize the prompt.
# For agents, you need to think of how the agent is allowed to 'think'. 
# Here, this is done by giving the agent access to an 'agent_scratchpad', where it can put all intermediate messages in.
initial_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """
        Determine the most fitting SBI code for the following dutch company: Stedin

        TOOLS:
        google-search

        Begin!
        {agent_scratchpad}
        """),
    ]
)

# A helper function to visualize what the agent is actually doing:
def input_print(param):
    print("\nINPUT:", param)
    return param

# Intialize the agent, by chaining together several parts.
agent = (
    {
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(x["intermediate_steps"]),
        "print": lambda x: input_print(x),
    }
    | initial_prompt                    # The inputs are fed to the prompt.
    | chat_llm                    # The formatted prompt is handed down to the LLM, which might, or might not, invoke tools.
    | OpenAIToolsAgentOutputParser()    # The output of the llm is formatted by a customer output parser which formats the LLM output such that it can be used as input again for new tools.
)

agent_executor = AgentExecutor(agent=agent, verbose=True)

agent_executor.invoke({})

## Run your queries

In [7]:
import sqlite3
import pandas as pd

# Load data from CSV files into pandas DataFrames
customers_df = pd.read_csv('data/customers.csv')
orders_df = pd.read_csv('data/orders.csv')

# Create a connection to an SQLite in-memory database
conn = sqlite3.connect(':memory:')

# Transfer data from pandas DataFrames to SQLite
customers_df.to_sql('Customers', conn, index=False, if_exists='replace')
orders_df.to_sql('Orders', conn, index=False, if_exists='replace')

# Function to execute SQL queries and return the results as a DataFrame
def execute_query(query):
    return pd.read_sql_query(query, conn)

# Example SQL query to fetch data
query = """
SELECT c.FirstName, c.LastName, o.OrderID, o.TotalAmount
FROM Customers c
JOIN Orders o ON c.CustomerID = o.CustomerID
WHERE o.TotalAmount > 100.00
ORDER BY o.TotalAmount DESC
"""

# Execute the query
result = execute_query(query)

# Close the database connection
conn.close()

In [6]:
result

Unnamed: 0,FirstName,LastName,OrderID,TotalAmount
0,Andrew,Cardenas,161,498.26
1,Christian,Burns,163,494.40
2,James,Johnson,14,493.40
3,Michael,Hoffman,35,492.81
4,Vincent,Jones,63,490.78
...,...,...,...,...
164,Erin,Hall,80,123.14
165,James,Johnson,115,110.13
166,Elizabeth,Sanders,158,109.27
167,Stephanie,Stevenson,24,107.98
