# GenAI with Python: AI Agents from Zero to Hero

###### [Read the Article on TDS]()

## 0 - Setup

In [1]:
#pip install ollama==0.4.7
import ollama
llm = "qwen2.5"

In [2]:
stream = ollama.generate(model=llm, prompt='''can you train a machine learning model?''', stream=True)
for chunk in stream:
    print(chunk['response'], end='', flush=True)

I can provide guidance and information on how to train a machine learning model, but I don't have the capability to directly train models myself. However, I can certainly help you with:

1. **Choosing an appropriate algorithm** for your specific problem.
2. **Data preparation**—cleaning data, handling missing values, normalizing features, etc.
3. **Feature selection and engineering**—choosing relevant features and creating new ones if necessary.
4. **Model training**—using libraries like scikit-learn, TensorFlow, or PyTorch to train models.
5. **Hyperparameter tuning**—optimizing model parameters for better performance.
6. **Validation and testing**—ensuring your model generalizes well to unseen data.
7. **Deployment**—suggestions on how to deploy the trained model in a production environment.

If you have specific questions about any of these steps or need help with code, feel free to ask!

## 1 - DataScience Team

### 1.1. SQL Agent

###### Tools

In [41]:
# Create DB
from langchain_community.utilities.sql_database import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///database.db")

In [42]:
# SQL Tools
from langchain_community.tools.sql_database.tool import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool

## Tables
def get_tables() -> str:
    return ListSQLDatabaseTool(db=db).invoke("")

tool_get_tables = {'type':'function', 'function':{
  'name': 'get_tables',
  'description': 'Returns the name of the tables in the database.',
  'parameters': {'type': 'object', 
                 'required': [],
                 'properties': {}
}}}

## Columns
def get_columns(tables: str) -> str:
    tool = InfoSQLDatabaseTool(db=db)
    return tool.invoke(tables)

tool_get_columns = {'type':'function', 'function':{
  'name': 'get_columns',
  'description': 'Returns the name of the columns in the table.',
  'parameters': {'type': 'object', 
                 'required': ['tables'],
                 'properties': {'tables': {'type':'str', 'description':'table name. Example Input: table1, table2, table3'}}
}}}

## SQL
def sql_check(sql: str) -> str:
    p = f'''Double check if the SQL query is correct: {sql}. You MUST just SQL code without comments'''
    res = ollama.generate(model=llm, prompt=p)["response"]
    return res.replace('sql','').replace('```','').replace('\n',' ').strip()

tool_sql_check = {'type':'function', 'function':{
  'name': 'sql_check',
  'description': 'Before executing a query, always review the SQL query and correct the code if necessary',
  'parameters': {'type': 'object', 
                 'required': ['sql'],
                 'properties': {'sql': {'type':'str', 'description':'SQL code'}}
}}}

## Query
def sql_exec(sql: str) -> str:
    return QuerySQLDataBaseTool(db=db).invoke(sql)
    
tool_sql_exec = {'type':'function', 'function':{
  'name': 'sql_exec',
  'description': 'Execute a SQL query',
  'parameters': {'type': 'object', 
                 'required': ['sql'],
                 'properties': {'sql': {'type':'str', 'description':'SQL code'}}
}}}

In [43]:
# Final Answer as a Tool
def final_answer(text:str) -> str:
    return text

tool_final_answer = {'type':'function', 'function':{
  'name': 'final_answer',
  'description': 'Returns a natural language response to the user',
  'parameters': {'type': 'object', 
                 'required': ['text'],
                 'properties': {'text': {'type':'str', 'description':'natural language response'}}
}}}

final_answer(text="hi")

'hi'

In [44]:
dic_tools = {'final_answer':final_answer,
             'get_tables':get_tables, 
             'get_columns':get_columns,
             'sql_check':sql_check,
             'sql_exec':sql_exec}

###### Prompt

In [45]:
prompt_sql = '''
You are an expert Data Engineer, retrieve data from the database using SQL.
You have access to the following tools:
- tool 'final_answer' to return a text response.
- tool 'get_tables' to find the tables in the database.
- tool 'get_columns' to find the columns in the table.
- tool 'sql_check' to double check the SQL query.
- tool 'sql_exec' to execute the SQL query and save the results.



'''

###### Utils

In [46]:
import re

def utils_save_data(table: str, data: str, file: str) -> str:
    str_cols = get_columns(table)
    lst_cols = re.findall(r'"([^"]+)"\s+\w+', str_cols)
    df = pd.DataFrame(data=eval(data), columns=lst_cols)
    print(df.head())
    df.to_csv(file_name, sep=',', na_rep='', index=False)
    return f"Data are saved as '{file}'"

In [47]:
def use_tool(agent_res:dict, dic_tools:dict) -> dict:
    ## use tool
    if "tool_calls" in agent_res["message"].keys():
        for tool in agent_res["message"]["tool_calls"]:
            t_name, t_inputs = tool["function"]["name"], tool["function"]["arguments"]
            if f := dic_tools.get(t_name):
                ### calling tool
                print('🔧 >', f"\x1b[1;31m{t_name} -> Inputs: {t_inputs}\x1b[0m")
                ### tool output
                t_output = f(**tool["function"]["arguments"])
                print(t_output)
                ### final res
                res = t_output
            else:
                print('🤬 >', f"\x1b[1;31m{t_name} -> NotFound\x1b[0m")
    ## don't use tool
    if agent_res['message']['content'] != '':
        res = agent_res["message"]["content"]
        t_name, t_inputs = '', ''
    return {'res':res, 'tool_used':t_name, 'inputs_used':t_inputs}

In [48]:
def run_agent(llm, messages, available_tools):
    tool_used, local_memory = '', ''
    while tool_used != 'final_answer':
        ### use tools
        try:
            agent_res = ollama.chat(model=llm, 
                                    messages=messages,
                                    #format="json", #or schema
                                    #stream=False,
                                    #options={"num_ctx":2048},
                                    tools=[v for v in available_tools.values()])
            dic_res = use_tool(agent_res, dic_tools)
            res, tool_used, inputs_used = dic_res["res"], dic_res["tool_used"], dic_res["inputs_used"]
        ### error
        except Exception as e:
            print("⚠️ >", e)
            res = f"I tried to use {tool_used} but didn't work. I will try something else."
            print("👽 >", f"\x1b[1;30m{res}\x1b[0m")
            messages.append( {"role":"assistant", "content":res} )
        ### update memory
        if tool_used not in ['','final_answer']:
            local_memory += f"\nTool used: {tool_used}.\nInput used: {inputs_used}.\nOutput: {res}"
            messages.append( {"role":"assistant", "content":local_memory} )
            available_tools.pop(tool_used)
            if len(available_tools) == 1:
                messages.append( {"role":"user", "content":"now activate the tool final_answer."} )
        ### tools not used
        if tool_used == '':
            break
        
        ### custom
        if tool_used ==  'get_columns':
            table = inputs_used
        if tool_used == 'sql_exec':
            utils_save_data(table=table, data=res, file="data.csv")
            
    return res

###### Start a chat

In [49]:
messages = [{"role":"system", "content":prompt_sql}]
memory = '''
To solve the task, you must follow a ReAct cycle of 'THOUGHT', 'ACTION', and 'OBSERVATION'. At each step:
1) 'THOUGHT' don't use any tool, just explain your your reasoning towards solving the task and the tools that you want to use.
2) If the user agree, you move to 'ACTION' and follow your plan to solve the task. But if the user doesn't agree, try to formulate a new plan.
3) 'OBSERVATION' don't use any tool, just comment whatever important information you will use as input for the next step.
4) In the end, you have to return a final answer using the 'final_answer' tool.
'''
while True:
    ## User
    q = input('🙂 >')
    if q == "quit":
        break
    messages.append( {"role":"user", "content":q} )

    ## Memory
    #messages.append( {"role":"user", "content":memory} )      
    
    ## Model
    available_tools = {"final_answer":tool_final_answer, "get_tables":tool_get_tables, "get_columns":tool_get_columns,
                       "sql_check":tool_sql_check, "sql_exec":tool_sql_exec}
    res = run_agent(llm, messages, available_tools)
    
    ## Response
    print("👽 >", f"\x1b[1;30m{res}\x1b[0m")
    messages.append( {"role":"assistant", "content":res} )

🙂 > extract all the males above 18 years old


🔧 > [1;31mget_tables -> Inputs: {}[0m
titanic
🔧 > [1;31mget_columns -> Inputs: {'tables': 'titanic'}[0m

CREATE TABLE titanic (
	"PassengerId" INTEGER, 
	"Survived" INTEGER, 
	"Pclass" INTEGER, 
	"Name" TEXT, 
	"Sex" TEXT, 
	"Age" REAL, 
	"SibSp" INTEGER, 
	"Parch" INTEGER, 
	"Ticket" TEXT, 
	"Fare" REAL, 
	"Cabin" TEXT, 
	"Embarked" TEXT
)

/*
3 rows from titanic table:
PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
1	0	3	Braund, Mr. Owen Harris	male	22.0	1	0	A/5 21171	7.25	None	S
2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Thayer)	female	38.0	1	0	PC 17599	71.2833	C85	C
3	1	3	Heikkinen, Miss. Laina	female	26.0	0	0	STON/O2. 3101282	7.925	None	S
*/
⚠️ > cannot access local variable 'res' where it is not associated with a value
👽 > [1;30mI tried to use get_columns but didn't work. I will try something else.[0m


KeyError: 'get_columns'

In [None]:
messages

### 1.2. ML Agent

In [None]:
import io
import contextlib

def code_exec(code:str) -> str:
    output = io.StringIO()
    with contextlib.redirect_stdout(output):
        try:
            exec(code)
        except Exception as e:
            print(f"Error: {e}")
    return output.getvalue()

tool_code_exec = {'type':'function', 'function':{
  'name': 'code_exec',
  'description': 'Execute python code. Use always the function print() to get the output.',
  'parameters': {'type': 'object', 
                 'required': ['code'],
                 'properties': {
                    'code': {'type':'str', 'description':'code to execute'},
}}}}

code_exec("from datetime import datetime; print(datetime.now().strftime('%H:%M'))")

In [None]:
prompt_py = '''
[GOAL] You are an expert Data Analyst, you write Python code to analyze data.
You must answer every question from the user, you can use the list of tools provided to you.
After you finish your job, use the 'final_answer' tool to answer the user.

[RETURN] You must generate and execute Python code to create plots.

[WARNINGS] ALWAYS execute the following code exactly as it is: 'df=pd.read_csv(path); print(df.head())'.
If you create a plot, ALWAYS add 'plt.show()' at the end.

[CONTEXT] Start by loading the data.
'''

###### ML Agent

In [None]:
def train_model(code:str) -> str:
   
    return

tool_train_model = {'type':'function', 'function':{
  'name': 'code_exec',
  'description': 'Execute python code. Use always the function print() to get the output.',
  'parameters': {'type': 'object', 
                 'required': ['code'],
                 'properties': {
                    'code': {'type':'str', 'description':'code to execute'},
}}}}

train_model("from datetime import datetime; print(datetime.now().strftime('%H:%M'))")

In [None]:
def save_model(code:str) -> str:
   
    return

tool_save_model = {'type':'function', 'function':{
  'name': 'code_exec',
  'description': 'Execute python code. Use always the function print() to get the output.',
  'parameters': {'type': 'object', 
                 'required': ['code'],
                 'properties': {
                    'code': {'type':'str', 'description':'code to execute'},
}}}}

save_model("from datetime import datetime; print(datetime.now().strftime('%H:%M'))")

In [None]:
prompt_ml = '''
[GOAL] You are an expert Data Scientist, you train machine learning models.
You must answer every question from the user, you can use the list of tools provided to you.
After you finish your job, use the 'final_answer' tool to answer the user.

[RETURN] The final output must be a model object saved on the computer.

[WARNINGS] 

[CONTEXT] You must answer every question from the user, you can use the list of tools provided to you.
Once you have collected plenty of information to answer the user's question use the 'final_answer' tool.
If the user doesn't ask a specific question, you MUST use the 'final_answer' tool directly.
'''

In [None]:
dic_tools = {'final_answer':final_answer,
             'get_tables':get_tables, 
             'get_schema':get_schema, 
             'sql_exec':sql_exec, 
             'sql_check':sql_check,
             'code_exec':code_exec,
             'train_model':train_model,
             'save_model':save_model}

###### Utils functions

In [None]:
def use_tool(agent_res:dict, dic_tools:dict) -> dict:
    ## use tool
    if "tool_calls" in agent_res["message"].keys():
        for tool in agent_res["message"]["tool_calls"]:
            t_name, t_inputs = tool["function"]["name"], tool["function"]["arguments"]
            if f := dic_tools.get(t_name):
                ### calling tool
                print('🔧 >', f"\x1b[1;31m{t_name} -> Inputs: {t_inputs}\x1b[0m")
                ### tool output
                t_output = f(**tool["function"]["arguments"])
                print(t_output)
                ### final res
                res = t_output
            else:
                print('🤬 >', f"\x1b[1;31m{t_name} -> NotFound\x1b[0m")
    ## don't use tool
    if agent_res['message']['content'] != '':
        res = agent_res["message"]["content"]
        t_name, t_inputs = '', ''
    return {'res':res, 'tool_used':t_name, 'inputs_used':t_inputs}

###### Workflow

In [None]:
messages_sql = [{"role":"system", "content":prompt_sql}]
messages_py  = [{"role":"system", "content":prompt_py}]
messages_ml  = [{"role":"system", "content":prompt_ml}]
shared_memory = ''

while True:    
    ## User
    q = input('🙂 >')
    if q == "quit":
        break
    
    messages_sql.append( {"role":"user", "content":q} )
    
    ## Model
    available_tools = {"final_answer":tool_final_answer, 
                       "get_tables":tool_get_tables, "get_schema":tool_get_schema,
                       "sql_exec":tool_sql_exec, "sql_check":sql_check}
    tool_used, memory = '', ''
    while tool_used != 'final_answer':
        ### use tools
        try:
            agent_res = ollama.chat(model=llm, messages=messages_sql, 
                                    tools=[v for v in available_tools.values()])
            dic_res = use_tool(agent_res, dic_tools)
            res, tool_used, inputs_used = dic_res["res"], dic_res["tool_used"], dic_res["inputs_used"]
        ### error
        except Exception as e:
            print("⚠️ >", e)
            res = f"I tried to use {tool_used} but didn't work. I will try something else."
            print("👽 >", f"\x1b[1;30m{res}\x1b[0m")
            messages.append( {"role":"assistant", "content":res} )
        ### update memory
        if tool_used not in ['','final_answer']:
            memory += f"\nTool used: {tool_used}.\nInput used: {inputs_used}.\nOutput: {res}"
            messages.append( {"role":"assistant", "content":memory} )
            available_tools.pop(tool_used)
            if len(available_tools) == 1:
                messages.append( {"role":"user", "content":"now activate the tool final_answer."} )
        ### tools not used
        if tool_used == '':
            break
    
    ## Response
    print("👽 >", f"\x1b[1;30m{res}\x1b[0m")
    messages.append( {"role":"assistant", "content":res} )