In [24]:
import pandas as pd
import json
from fuzzywuzzy import process
from extraction_functions import extract_data_etfs, extract_data_stocks

# Assuming we have the following data
data = {
    "transaction_id": ["T1001", "T1002", "T1003", "T1004", "T1005"],
    "customer_id": ["C001", "C002", "C003", "C002", "C001"],
    "payment_amount": [125.50, 89.99, 120.00, 54.30, 210.20],
    "payment_date": [
        "2021-10-05",
        "2021-10-06",
        "2021-10-07",
        "2021-10-05",
        "2021-10-08",
    ],
    "payment_status": ["Paid", "Unpaid", "Paid", "Paid", "Pending"],
}

# Create DataFrame
df = pd.DataFrame(data)


company_names = {
    "ucb": "UCB.BR",
    "abinbev": "ABI.BR",
    "aedifica": "AED.BR",
    "ageas": "AGS.BR",
    "aperam": "APAM.AS",
    "argenx": "ARGX.BR",
    "belgian national bank": "BNB.BR",
    "cofinimmo": "COFB.BR",
    "elia": "ELI.BR",
    "ishares msci belgium etf": "EWK",
    "groupe bruxelles lambert sa": "GBLB.BR",
    "galapagos nv": "GLPG.AS",
    "kbc bank": "KBC.BR",
    "melexis nv": "MELE.BR",
    "proximus": "PROX.BR",
    "sofina": "SOF.BR",
    "solvay": "SOLB.BR",
    "umicore": "UMI.BR",
    "unified post group": "UPG.BR",
    "warehouses de pauw": "WDP.BR",
    "xior student housing": "XIOR.BR",
}

companies = list(company_names.keys())
# Threshold for acceptable similarity (0-100 scale)
threshold = 80


def find_best_match(user_input, company_list, threshold):
    # Find the best match with score
    best_match, best_score = process.extractOne(user_input, company_list)

    # Check if the best match meets the threshold
    if best_score >= threshold:
        return best_match
    else:
        return "none"


def read_file_to_string(file_name):
    try:
        with open(file_name, "r", encoding="utf-8") as file:
            content = file.read()
        return content
    except FileNotFoundError:
        return "Error: File not found."
    except IOError:
        return "Error: Could not read the file."


def retrieve_stock_etf_info(companies: list, stock_name: str, threshold: int) -> str:
    stock_name = stock_name.lower()
    result = find_best_match(stock_name, companies, threshold)
    # let's do a fuzzy match with the company names and retrieve the most relevant company name
    res = ""
    if result != "none":
        file_content = read_file_to_string("data/" + company_names[result] + ".txt")
        res = json.dumps({"information we have on stock/etf": file_content})
    else:
        res = json.dumps({"error": "transaction id not found."})
    return res

In [26]:
retrieve_stock_etf_info(companies, "ishare", 80)

'{"information we have on stock/etf": "Yahoo ID of ETF: EWK\\ndescription of the ETF: The fund generally will invest at least 80% of its assets in the component securities of its underlying index and in investments that have economic characteristics that are substantially identical to the component securities of its underlying index. The index is designed to measure the performance of the large-, mid- and small-capitalization segments of the equity market in Belgium. The fund is non-diversified.\\nETF Fund Overview: {\'categoryName\': \'Miscellaneous Region\', \'family\': \'iShares\', \'legalType\': \'Exchange Traded Fund\'}\\nETF Fund Operations:                                    EWK Category Average\\nAttributes                                             \\nAnnual Report Expense Ratio      0.005              0.0\\nAnnual Holdings Turnover         0.120             <NA>\\nTotal Net Assets             17577.820         17577.82\\nETF Asset Classes: {\'cashPosition\': 0.0146, \'stockP

In [27]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "retrieve_stock_etf_info",
            "description": "get info about a certain stock or etf given the name of the instrument",
            "parameters": {
                "type": "object",
                "properties": {
                    "stock_name": {
                        "type": "string",
                        "description": "The stock or ETF name",
                    }
                },
                "required": ["stock_name"],
            },
        },
    }
]

In [28]:
import functools

names_to_functions = {
    "retrieve_stock_etf_info": functools.partial(
        retrieve_stock_etf_info, companies=companies, threshold=80
    ),
}

In [30]:
def get_api_key(platform, file_path):
    with open(file_path, "r") as file:
        lines = file.readlines()

    for line in lines:
        if "=" in line:
            key_value = line.split("=")
            key_platform = key_value[0].strip()
            key = key_value[1].strip().strip('"')

            if key_platform == platform:
                return key
    return None


key = get_api_key("mistral", "/Users/naimsassine/Desktop/DSAI/keys")

In [32]:
messages = [
    {"role": "user", "content": "give me the latest selling price of the UCB stock"}
]

In [33]:
import os
from mistralai import Mistral


api_key = key
model = "mistral-small-latest"

client = Mistral(api_key=api_key)
response = client.chat.complete(
    model=model,
    messages=messages,
    tools=tools,
    tool_choice="any",
)
response

ChatCompletionResponse(id='ff3982d1d77645f8aa92263b4b6966a8', object='chat.completion', model='mistral-small-latest', usage=UsageInfo(prompt_tokens=105, completion_tokens=30, total_tokens=135), created=1731169613, choices=[ChatCompletionChoice(index=0, message=AssistantMessage(content='', tool_calls=[ToolCall(function=FunctionCall(name='retrieve_stock_etf_info', arguments='{"stock_name": "UCB"}'), id='oi1p9fY7U', type='function')], prefix=False, role='assistant'), finish_reason='tool_calls')])

In [34]:
messages.append(response.choices[0].message)

In [35]:
import json

tool_call = response.choices[0].message.tool_calls[0]
function_name = tool_call.function.name
function_params = json.loads(tool_call.function.arguments)
print("\nfunction_name: ", function_name, "\nfunction_params: ", function_params)


function_name:  retrieve_stock_etf_info 
function_params:  {'stock_name': 'UCB'}


In [40]:
function_result = names_to_functions[function_name](**function_params)
function_result

'{"information we have on stock/etf": "Yahoo ID of Stock: UCB.BR\\nlast traded price: 178.0\\ncurrency: EUR\\nstock value change since market open: -0.25\\nstock value change since market last close: -0.75\\nstock exchange on which stock is traded: BRU\\nvolume: 115187\\nturnover: 20503286.0\\nmarket Cap: 33770160128\\nnumber of shares outstanding: 189720000\\ncompany information in json format: [{\'maxAge\': 1, \'name\': \'Mr. Jean-Christophe  Tellier\', \'age\': 64, \'title\': \'CEO & Executive Director\', \'yearBorn\': 1959, \'fiscalYear\': 2023, \'totalPay\': 4099149, \'exercisedValue\': 0, \'unexercisedValue\': 1781620}, {\'maxAge\': 1, \'name\': \'Ms. Sandrine  Dufour CFA\', \'age\': 57, \'title\': \'Executive VP, CFO & Chief Corporate Development\', \'yearBorn\': 1966, \'fiscalYear\': 2023, \'exercisedValue\': 0, \'unexercisedValue\': 0}, {\'maxAge\': 1, \'name\': \'Dr. Kirsten  Lund-Jurgensen Ph.D.\', \'age\': 63, \'title\': \'Executive Vice President of Supply & Technology Sol

In [41]:
messages.append(
    {
        "role": "tool",
        "name": function_name,
        "content": function_result,
        "tool_call_id": tool_call.id,
    }
)

response = client.chat.complete(model=model, messages=messages)
response.choices[0].message.content

'The latest selling price of the UCB stock is €178.0. The stock value has changed by -0.75 since the last close and by -0.25 since the market opened.'