# 1. Prompting

In [None]:
# !pip3 install mistralai

In [6]:
import os
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage

In [7]:
# os.environ["MISTRAL_API_KEY"] = "nope :)"

api_key = os.environ["MISTRAL_API_KEY"]
model = "mistral-large-latest"
# model = "open-mistral-7b"
# model = "open-mixtral-8x22b"

# These open source models could be downloaded for free and hosted by yourself
# but if you use the API it would cost a bit

[Pricing Guide](https://mistral.ai/technology/#pricing) <-- Here you can see how much each model costs

In [8]:
client = MistralClient(api_key=api_key)

In [9]:
def mistral(user_message, 
            model="open-mixtral-8x22b",
            is_json=False):
    client = MistralClient(api_key=api_key)
    messages = [ChatMessage(role="user", content=user_message)]

    if is_json:
        chat_response = client.chat(
            model=model, 
            messages=messages,
            response_format={"type": "json_object"})
    else:
        chat_response = client.chat(
            model=model, 
            messages=messages)

    return chat_response.choices[0].message.content

In [10]:
mistral("Hello, what can you do?", model=model)

"Hello! I'm an assistant designed to help you with a variety of tasks. I can provide information, answer questions, set reminders, help manage your schedule, and much more. I'm also capable of engaging in friendly conversation. How can I assist you today?"

In [11]:
response = mistral("Tell me a great joke", model=model)
print(response)

Sure, here's a light-hearted joke for you:

Why don't scientists trust atoms?

Because they make up everything!


## Classification

In [12]:
prompt = """
    You are a bank customer service bot. 
    Your task is to assess customer intent and categorize customer 
    inquiry after <<<>>> into one of the following predefined categories:
    
    card arrival
    change pin
    exchange rate
    country support 
    cancel transfer
    charge dispute
    
    If the text doesn't fit into any of the above categories, 
    classify it as:
    customer service
    
    You will only respond with the predefined category. 
    Do not provide explanations or notes. 
    
    ###
    Here are some examples:
    
    Inquiry: How do I know if I will get my card, or if it is lost? I am concerned about the delivery process and would like to ensure that I will receive my card as expected. Could you please provide information about the tracking process for my card, or confirm if there are any indicators to identify if the card has been lost during delivery?
    Category: card arrival
    Inquiry: I am planning an international trip to Paris and would like to inquire about the current exchange rates for Euros as well as any associated fees for foreign transactions.
    Category: exchange rate 
    Inquiry: What countries are getting support? I will be traveling and living abroad for an extended period of time, specifically in France and Germany, and would appreciate any information regarding compatibility and functionality in these regions.
    Category: country support
    Inquiry: Can I get help starting my computer? I am having difficulty starting my computer, and would appreciate your expertise in helping me troubleshoot the issue. 
    Category: customer service
    ###
    
    <<<
    Inquiry: {inquiry}
    >>>
    Category:
"""

In [13]:
response = mistral(f"Please correct the spelling and grammar of \
this prompt and return a text that is the same prompt,\
with the spelling and grammar fixed: {prompt}", model)

In [14]:
print(response)

You are a bank customer service bot.
Your task is to assess customer intent and categorize the customer inquiry after <<<>>> into one of the following predefined categories:

* card arrival
* change pin
* exchange rate
* country support
* cancel transfer
* charge dispute

If the text doesn't fit into any of the above categories, classify it as:

* customer service

You will only respond with the predefined category. Do not provide explanations or notes.

---

Here are some examples:

Inquiry: How do I know if I will receive my card, or if it is lost? I am concerned about the delivery process and would like to ensure that I will receive my card as expected. Could you please provide information about the tracking process for my card, or confirm if there are any indicators to identify if the card has been lost during delivery?
Category: card arrival

Inquiry: I am planning an international trip to Paris and would like to inquire about the current exchange rates for Euros as well as any as

In [15]:
mistral(
    response.format(
        inquiry="I am inquiring about the availability of your cards in the EU"
    ), model=model
)

'country support'

## Information Extraction with JSON Mode

In [16]:
medical_notes = """
A 60-year-old male patient, Mr. Johnson, presented with symptoms
of increased thirst, frequent urination, fatigue, and unexplained
weight loss. Upon evaluation, he was diagnosed with diabetes,
confirmed by elevated blood sugar levels. Mr. Johnson's weight
is 210 lbs. He has been prescribed Metformin to be taken twice daily
with meals. It was noted during the consultation that the patient is
a current smoker. 
"""

In [17]:
prompt = f"""
Extract information from the following medical notes:
{medical_notes}

Return json format with the following JSON schema: 

{{
        "age": {{
            "type": "integer"
        }},
        "gender": {{
            "type": "string",
            "enum": ["male", "female", "other"]
        }},
        "diagnosis": {{
            "type": "string",
            "enum": ["migraine", "diabetes", "arthritis", "acne"]
        }},
        "weight": {{
            "type": "integer"
        }},
        "smoking": {{
            "type": "string",
            "enum": ["yes", "no"]
        }}
}}
"""

In [18]:
response = mistral(prompt, model, is_json=True)
print(response)

{"./medical-notes": {"age": 60, "gender": "male", "diagnosis": "diabetes", "weight": 210, "smoking": "yes"}}


## Personalization

In [19]:
email = """
Dear mortgage lender, 

What's your 30-year fixed-rate APR, how is it compared to the 15-year 
fixed rate?

Regards,
Anna
"""

In [20]:
prompt = f"""

You are a mortgage lender customer service bot, and your task is to 
create personalized email responses to address customer questions.
Answer the customer's inquiry using the provided facts below. Ensure 
that your response is clear, concise, and directly addresses the 
customer's question. Address the customer in a friendly and 
professional manner. Sign the email with "Lender Customer Support."   
      
# Facts
30-year fixed-rate: interest rate 6.403%, APR 6.484%
20-year fixed-rate: interest rate 6.329%, APR 6.429%
15-year fixed-rate: interest rate 5.705%, APR 5.848%
10-year fixed-rate: interest rate 5.500%, APR 5.720%
7-year ARM: interest rate 7.011%, APR 7.660%
5-year ARM: interest rate 6.880%, APR 7.754%
3-year ARM: interest rate 6.125%, APR 7.204%
30-year fixed-rate FHA: interest rate 5.527%, APR 6.316%
30-year fixed-rate VA: interest rate 5.684%, APR 6.062%

# Email
{email}
"""

In [21]:
response = mistral(prompt, model)
print(response)

Subject: Information on Our 30-Year and 15-Year Fixed-Rate Mortgages

Dear Anna,

Thank you for reaching out to us with your inquiry about our fixed-rate mortgage options. I'm happy to provide you with the information you're looking for.

Our current 30-year fixed-rate mortgage has an interest rate of 6.403% and an Annual Percentage Rate (APR) of 6.484%. On the other hand, our 15-year fixed-rate mortgage comes with an interest rate of 5.705% and an APR of 5.848%.

Comparatively, the 15-year fixed-rate mortgage has a lower interest rate and APR than the 30-year fixed-rate mortgage. While the monthly payments for the 15-year term may be higher due to the shorter repayment period, the overall interest you'll pay over the life of the loan will be significantly less.

I hope this information helps you make an informed decision about which option is best suited for your financial goals. If you have any further questions or would like more detailed information, please don't hesitate to reach 

## Summarization

In [22]:
newsletter = """
European AI champion Mistral AI unveiled new large language models and formed an alliance with Microsoft. 

What’s new: Mistral AI introduced two closed models, Mistral Large and Mistral Small (joining Mistral Medium, which debuted quietly late last year). Microsoft invested $16.3 million in the French startup, and it agreed to distribute Mistral Large on its Azure platform and let Mistral AI use Azure computing infrastructure. Mistral AI makes the new models available to try for free here and to use on its La Plateforme and via custom deployments.

Model specs: The new models’ parameter counts, architectures, and training methods are undisclosed. Like the earlier, open source Mistral 7B and Mixtral 8x7B, they can process 32,000 tokens of input context. 

Mistral Large achieved 81.2 percent on the MMLU benchmark, outperforming Anthropic’s Claude 2, Google’s Gemini Pro, and Meta’s Llama 2 70B, though falling short of GPT-4. Mistral Small, which is optimized for latency and cost, achieved 72.2 percent on MMLU.
Both models are fluent in French, German, Spanish, and Italian. They’re trained for function calling and JSON-format output.
Microsoft’s investment in Mistral AI is significant but tiny compared to its $13 billion stake in OpenAI and Google and Amazon’s investments in Anthropic, which amount to $2 billion and $4 billion respectively.
Mistral AI and Microsoft will collaborate to train bespoke models for customers including European governments.
Behind the news: Mistral AI was founded in early 2023 by engineers from Google and Meta. The French government has touted the company as a home-grown competitor to U.S.-based leaders like OpenAI. France’s representatives in the European Commission argued on Mistral’s behalf to loosen the European Union’s AI Act oversight on powerful AI models. 

Yes, but: Mistral AI’s partnership with Microsoft has divided European lawmakers and regulators. The European Commission, which already was investigating Microsoft’s agreement with OpenAI for potential breaches of antitrust law, plans to investigate the new partnership as well. Members of President Emmanuel Macron’s Renaissance party criticized the deal’s potential to give a U.S. company access to European users’ data. However, other French lawmakers support the relationship.

Why it matters: The partnership between Mistral AI and Microsoft gives the startup crucial processing power for training large models and greater access to potential customers around the world. It gives the tech giant greater access to the European market. And it gives Azure customers access to a high-performance model that’s tailored to Europe’s unique regulatory environment.

We’re thinking: Mistral AI has made impressive progress in a short time, especially relative to the resources at its disposal as a startup. Its partnership with a leading hyperscaler is a sign of the tremendous processing and distribution power that remains concentrated in the large, U.S.-headquartered cloud companies.
"""

In [23]:
prompt = f"""
You are a commentator. Your task is to write a report on a newsletter. 
When presented with the newsletter, come up with interesting questions to ask,
and answer each question. 
Afterward, combine all the information and write a report in the markdown
format. 

# Newsletter: 
{newsletter}

# Instructions: 
## Summarize:
In clear and concise language, summarize the key points and themes 
presented in the newsletter.

## Interesting Questions: 
Generate three distinct and thought-provoking questions that can be 
asked about the content of the newsletter. For each question:
- After "Q: ", describe the problem 
- After "A: ", provide a detailed explanation of the problem addressed 
in the question.
- Enclose the ultimate answer in <>.

## Write a analysis report
Using the summary and the answers to the interesting questions, 
create a comprehensive report in Markdown format. 
"""

In [24]:
response = mistral(prompt, model)
print(response)

# Summary:

European AI company Mistral AI has launched two new large language models, Mistral Large and Mistral Small, and has formed a strategic partnership with Microsoft. The models are available for free trial and use on Mistral's La Plateforme and via custom deployments. Mistral Large outperformed several other models on the MMLU benchmark, while Mistral Small is optimized for latency and cost. Both models support multiple languages and are trained for function calling and JSON-format output. Microsoft's investment in Mistral AI is significant but smaller compared to its investment in OpenAI and those of Google and Amazon in Anthropic. The partnership has divided European lawmakers and regulators, with concerns over potential antitrust violations and data access.

# Interesting Questions:

Q1: How significant is the partnership between Mistral AI and Microsoft in terms of resources and market access for both companies?
A1: The partnership is highly significant for both parties. M

# 2. Model Selection

- Mistral Small: Good for simple tasks, fast inference, lower cost.
- Mistral Medium: Good for intermediate tasks such as language transformation.
- Mistral Large: Good for complex tasks that require advanced reasoning.

## Mistral Large

### Math

In [25]:
prompt = """
Calculate the difference in payment dates between the two \
customers whose payment amounts are closest to each other \
in the following dataset. Do not write code.

# dataset: 
'{
  "transaction_id":{"0":"T1001","1":"T1002","2":"T1003","3":"T1004","4":"T1005"},
    "customer_id":{"0":"C001","1":"C002","2":"C003","3":"C002","4":"C001"},
    "payment_amount":{"0":125.5,"1":89.99,"2":120.0,"3":54.3,"4":210.2},
"payment_date":{"0":"2021-10-05","1":"2021-10-06","2":"2021-10-07","3":"2021-10-05","4":"2021-10-08"},
    "payment_status":{"0":"Paid","1":"Unpaid","2":"Paid","3":"Paid","4":"Pending"}
}'
"""

In [26]:
response_small = mistral(prompt, model="mistral-small-latest")
print(response_small)

First, let's find the two customers with the closest payment amounts:

1. Customer C001 has a payment amount of 125.5 and Customer C003 has a payment amount of 120.0, which are the closest amounts in the dataset.

Next, let's find the difference in their payment dates:

1. Customer C001's payment date is 2021-10-05, and Customer C003's payment date is 2021-10-07.

The difference in payment dates between these two customers is 2 days.


In [27]:
response_large = mistral(prompt, model)
print(response_large)

To solve this problem without writing code, we'll first need to identify the two closest payment amounts and then calculate the difference in payment dates between the corresponding transactions.

1. Find the two closest payment amounts:
- Sort the payment amounts in ascending order: 54.3, 89.99, 120.0, 125.5, 210.2
- The smallest difference between any two payments is 125.5 - 120.0 = 5.5 (between transaction T1001 and T1003)

2. Identify the corresponding transactions and their payment dates:
- Transaction T1001: payment date 2021-10-05
- Transaction T1003: payment date 2021-10-07

3. Calculate the difference in payment dates:
- The difference between 2021-10-07 (T1003) and 2021-10-05 (T1001) is 2 days.

So, the difference in payment dates between the two customers whose payment amounts are closest to each other in the given dataset is 2 days.


### Expense Reporting

In [28]:
transactions = """
McDonald's: 8.40
Safeway: 10.30
Carrefour: 15.00
Toys R Us: 20.50
Panda Express: 10.20
Beanie Baby Outlet: 25.60
World Food Wraps: 22.70
Stuffed Animals Shop: 45.10
Sanrio Store: 85.70
"""

prompt = f"""
Given the purchase details, how much did I spend on each category:
1) restaurants
2) groceries
3) stuffed animals and props
{transactions}
"""

In [29]:
response_small = mistral(prompt, model="mistral-small-latest")
print(response_small)

To calculate the total amount spent on each category, we need to group the purchases accordingly:

1) Restaurants:
- McDonald's: 8.40
- Panda Express: 10.20
Total spent on restaurants: 8.40 + 10.20 = 18.60

2) Groceries:
- Safeway: 10.30
- Carrefour: 15.00
- World Food Wraps: 22.70
Total spent on groceries: 10.30 + 15.00 + 22.70 = 48.00

3) Stuffed animals and props:
- Toys R Us: 20.50
- Beanie Baby Outlet: 25.60
- Stuffed Animals Shop: 45.10
- Sanrio Store: 85.70
Total spent on stuffed animals and props: 20.50 + 25.60 + 45.10 + 85.70 = 176.90

So, you spent $18.60 on restaurants, $48.00 on groceries, and $176.90 on stuffed animals and props.


In [30]:
response_large = mistral(prompt, model="mistral-large-latest")
print(response_large)

Sure, I can help you categorize your spending. Here's the breakdown:

1) Restaurants:
   - McDonald's: $8.40
   - Panda Express: $10.20
   - World Food Wraps: $22.70
   - Total for restaurants: $8.40 + $10.20 + $22.70 = $41.30

2) Groceries:
   - Safeway: $10.30
   - Carrefour: $15.00
   - Total for groceries: $10.30 + $15.00 = $25.30

3) Stuffed animals and props:
   - Toys R Us: $20.50
   - Beanie Baby Outlet: $25.60
   - Stuffed Animals Shop: $45.10
   - Sanrio Store: $85.70
   - Total for stuffed animals and props: $20.50 + $25.60 + $45.10 + $85.70 = $176.90

So, you spent $41.30 on restaurants, $25.30 on groceries, and $176.90 on stuffed animals and props.


### Writing and checking code

In [31]:
user_message = """
Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.

You may assume that each input would have exactly one solution, and you may not use the same element twice.

You can return the answer in any order.

Your code should pass these tests:

assert twoSum([2,7,11,15], 9) == [0,1]
assert twoSum([3,2,4], 6) == [1,2]
assert twoSum([3,3], 6) == [0,1]
"""

In [32]:
print(mistral(user_message, model="mistral-large-latest"))

Here's a Python solution for the problem:

```python
def twoSum(nums, target):
    num_dict = {}
    for i, num in enumerate(nums):
        complement = target - num
        if complement in num_dict:
            return [num_dict[complement], i]
        num_dict[num] = i
    return []

assert twoSum([2,7,11,15], 9) == [0,1]
assert twoSum([3,2,4], 6) == [1,2]
assert twoSum([3,3], 6) == [0,1]
```

In this solution, we first create an empty dictionary called `num_dict`. Then, we iterate through the `nums` list with enumerate to keep track of the current index `i` and number `num`.

For each number, calculate its complement (i.e., `target - num`). If the complement is found in the `num_dict`, we have found the pair of numbers that sum up to the target, so we return their indices. Otherwise, add the current number and its index to the `num_dict`.

Finally, if no pair is found, the function returns an empty list (though it's not necessary in this case, as the question states there is always 

### Natively Fluent in English, French, Spanish, German, and Italian

In [33]:
user_message = """
Lequel est le plus lourd une livre de fer ou un kilogramme de plume
"""

In [34]:
print(mistral(user_message, model="mistral-large-latest"))

Une livre de fer pèse environ 0,453 kilogramme, tandis qu'un kilogramme de plumes pèse un kilogramme. Donc, un kilogramme de plumes est plus lourd qu'une livre de fer. Cependant, il est important de noter que le volume occupé par un kilogramme de plumes serait beaucoup plus grand que celui d'une livre de fer, car les plumes sont moins denses que le fer.


# 3. Function Calling

In [35]:
!pip3 install pandas "mistralai>=0.1.2"

Collecting pandas
  Using cached pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (19 kB)
Collecting numpy>=1.26.0 (from pandas)
  Using cached numpy-2.0.0-cp312-cp312-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl (11.3 MB)
Using cached numpy-2.0.0-cp312-cp312-macosx_14_0_arm64.whl (5.0 MB)
Using cached pytz-2024.1-py2.py3-none-any.whl (505 kB)
Using cached tzdata-2024.1-py2.py3-none-any.whl (345 kB)
Installing collected packages: pytz, tzdata, numpy, pandas
Successfully installed numpy-2.0.0 pandas-2.2.2 pytz-2024.1 tzdata-2024.1


In [36]:
import pandas as pd

In [37]:
data = {
    "transaction_id": ["T1001", "T1002", "T1003", "T1004", "T1005"],
    "customer_id": ["C001", "C002", "C003", "C002", "C001"],
    "payment_amount": [125.50, 89.99, 120.00, 54.30, 210.20],
    "payment_date": [
        "2021-10-05",
        "2021-10-06",
        "2021-10-07",
        "2021-10-05",
        "2021-10-08",
    ],
    "payment_status": ["Paid", "Unpaid", "Paid", "Paid", "Pending"],
}
df = pd.DataFrame(data)

In [38]:
df.head()

Unnamed: 0,transaction_id,customer_id,payment_amount,payment_date,payment_status
0,T1001,C001,125.5,2021-10-05,Paid
1,T1002,C002,89.99,2021-10-06,Unpaid
2,T1003,C003,120.0,2021-10-07,Paid
3,T1004,C002,54.3,2021-10-05,Paid
4,T1005,C001,210.2,2021-10-08,Pending


### Without funciton calling

In [43]:
data = """
    "transaction_id": ["T1001", "T1002", "T1003", "T1004", "T1005"],
    "customer_id": ["C001", "C002", "C003", "C002", "C001"],
    "payment_amount": [125.50, 89.99, 120.00, 54.30, 210.20],
    "payment_date": [
        "2021-10-05",
        "2021-10-06",
        "2021-10-07",
        "2021-10-05",
        "2021-10-08",
    ],
    "payment_status": ["Paid", "Unpaid", "Paid", "Paid", "Pending"],
}
"""
transaction_id = "T1001"

prompt = f"""
Given the following data, what is the payment status for \
 transaction_id={transaction_id}?

data:
{data}

"""

In [44]:
response = mistral(prompt, model)
print(response)

Based on the provided data, the payment status for transaction_id T1001 is "Paid".


### With function calling

### Step 1. User: specify tools and query

#### Tools

In [45]:
import json

In [46]:
def retrieve_payment_status(df: data, transaction_id: str) -> str:
    if transaction_id in df.transaction_id.values:
        return json.dumps(
            {"status": df[df.transaction_id == transaction_id].payment_status.item()}
        )
    return json.dumps({"error": "transaction id not found."})

In [48]:
status = retrieve_payment_status(df, transaction_id="T1001")
print(status)

{"status": "Paid"}


In [49]:
type(status)

str

In [50]:
def retrieve_payment_date(df: data, transaction_id: str) -> str:
    if transaction_id in df.transaction_id.values:
        return json.dumps(
            {"date": df[df.transaction_id == transaction_id].payment_date.item()}
        )
    return json.dumps({"error": "transaction id not found."})

In [51]:
date = retrieve_payment_date(df, transaction_id="T1002")
print(date)

{"date": "2021-10-06"}


In [52]:
tool_payment_status = {
    "type": "function",
    "function": {
        "name": "retrieve_payment_status",
        "description": "Get payment status of a transaction",
        "parameters": {
            "type": "object",
            "properties": {
                "transaction_id": {
                    "type": "string",
                    "description": "The transaction id.",
                }
            },
            "required": ["transaction_id"],
        },
    },
}

In [53]:
type(tool_payment_status)

dict

In [54]:
tool_payment_date = {
    "type": "function",
    "function": {
        "name": "retrieve_payment_date",
        "description": "Get payment date of a transaction",
        "parameters": {
            "type": "object",
            "properties": {
                "transaction_id": {
                    "type": "string",
                    "description": "The transaction id.",
                }
            },
            "required": ["transaction_id"],
        },
    },
}

In [55]:
type(tool_payment_status)

dict

In [56]:
tools = [tool_payment_status, tool_payment_date]

In [57]:
type(tools)

list

In [58]:
tools

[{'type': 'function',
  'function': {'name': 'retrieve_payment_status',
   'description': 'Get payment status of a transaction',
   'parameters': {'type': 'object',
    'properties': {'transaction_id': {'type': 'string',
      'description': 'The transaction id.'}},
    'required': ['transaction_id']}}},
 {'type': 'function',
  'function': {'name': 'retrieve_payment_date',
   'description': 'Get payment date of a transaction',
   'parameters': {'type': 'object',
    'properties': {'transaction_id': {'type': 'string',
      'description': 'The transaction id.'}},
    'required': ['transaction_id']}}}]

#### functools

In [59]:
import functools

In [60]:
names_to_functions = {
    "retrieve_payment_status": functools.partial(retrieve_payment_status, df=df),
    "retrieve_payment_date": functools.partial(retrieve_payment_date, df=df),
}

In [61]:
names_to_functions["retrieve_payment_status"](transaction_id="T1001")

'{"status": "Paid"}'

In [62]:
tools

[{'type': 'function',
  'function': {'name': 'retrieve_payment_status',
   'description': 'Get payment status of a transaction',
   'parameters': {'type': 'object',
    'properties': {'transaction_id': {'type': 'string',
      'description': 'The transaction id.'}},
    'required': ['transaction_id']}}},
 {'type': 'function',
  'function': {'name': 'retrieve_payment_date',
   'description': 'Get payment date of a transaction',
   'parameters': {'type': 'object',
    'properties': {'transaction_id': {'type': 'string',
      'description': 'The transaction id.'}},
    'required': ['transaction_id']}}}]

#### User query

In [63]:
from mistralai.models.chat_completion import ChatMessage

chat_history = [
    ChatMessage(role="user", content="What's the status of my transaction?")
]

### Step 2. Model: Generate function arguments

In [65]:
response = client.chat(
    model=model, messages=chat_history, tools=tools, tool_choice="auto"
)

response

ChatCompletionResponse(id='f431b7d633f7448b9d3ec742bdf10b21', object='chat.completion', created=1719736113, model='mistral-large-latest', choices=[ChatCompletionResponseChoice(index=0, message=ChatMessage(role='assistant', content='To find out the status of your transaction, I need the transaction ID. Please provide me with the transaction ID.', name=None, tool_calls=None, tool_call_id=None), finish_reason=<FinishReason.stop: 'stop'>)], usage=UsageInfo(prompt_tokens=161, total_tokens=184, completion_tokens=23))

In [67]:
print(response.choices[0].message.content)

To find out the status of your transaction, I need the transaction ID. Please provide me with the transaction ID.


#### Save chat history

In [68]:
chat_history.append(
    ChatMessage(role="assistant", content=response.choices[0].message.content)
)
chat_history.append(ChatMessage(role="user", content="My transaction ID is T1001."))
chat_history

[ChatMessage(role='user', content="What's the status of my transaction?", name=None, tool_calls=None, tool_call_id=None),
 ChatMessage(role='assistant', content='To find out the status of your transaction, I need the transaction ID. Please provide me with the transaction ID.', name=None, tool_calls=None, tool_call_id=None),
 ChatMessage(role='user', content='My transaction ID is T1001.', name=None, tool_calls=None, tool_call_id=None)]

In [69]:
response = client.chat(
    model=model, messages=chat_history, tools=tools, tool_choice="auto"
)

In [70]:
print(response.choices[0].message)

role='assistant' content='' name=None tool_calls=[ToolCall(id='fYw57YWGJ', type=<ToolType.function: 'function'>, function=FunctionCall(name='retrieve_payment_status', arguments='{"transaction_id": "T1001"}'))] tool_call_id=None


In [71]:
chat_history.append(response.choices[0].message)

### Step 3. User: Execute function to obtain tool results

In [72]:
tool_function = response.choices[0].message.tool_calls[0].function
print(tool_function)

name='retrieve_payment_status' arguments='{"transaction_id": "T1001"}'


In [73]:
tool_function.name

'retrieve_payment_status'

In [74]:
tool_function.arguments

'{"transaction_id": "T1001"}'

In [75]:
args = json.loads(tool_function.arguments)
print(args)

{'transaction_id': 'T1001'}


In [76]:
function_result = names_to_functions[tool_function.name](**args)
function_result

'{"status": "Paid"}'

In [77]:
tool_msg = ChatMessage(role="tool", name=tool_function.name, content=function_result)
chat_history.append(tool_msg)

In [78]:
chat_history

[ChatMessage(role='user', content="What's the status of my transaction?", name=None, tool_calls=None, tool_call_id=None),
 ChatMessage(role='assistant', content='To find out the status of your transaction, I need the transaction ID. Please provide me with the transaction ID.', name=None, tool_calls=None, tool_call_id=None),
 ChatMessage(role='user', content='My transaction ID is T1001.', name=None, tool_calls=None, tool_call_id=None),
 ChatMessage(role='assistant', content='', name=None, tool_calls=[ToolCall(id='fYw57YWGJ', type=<ToolType.function: 'function'>, function=FunctionCall(name='retrieve_payment_status', arguments='{"transaction_id": "T1001"}'))], tool_call_id=None),
 ChatMessage(role='tool', content='{"status": "Paid"}', name='retrieve_payment_status', tool_calls=None, tool_call_id=None)]

### Step 4. Model: Generate final answer

In [79]:
response = client.chat(model=model, messages=chat_history)
response.choices[0].message.content

'The status of your transaction T1001 is "Paid". Is there anything else I can assist you with?'

# 4. RAG from Scratch

In [80]:
!pip3 install faiss-cpu "mistralai>=0.1.2"

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp312-cp312-macosx_11_0_arm64.whl.metadata (3.7 kB)
Collecting numpy<2.0,>=1.0 (from faiss-cpu)
  Using cached numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl.metadata (61 kB)
Downloading faiss_cpu-1.8.0.post1-cp312-cp312-macosx_11_0_arm64.whl (6.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
[?25hUsing cached numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl (13.7 MB)
Installing collected packages: numpy, faiss-cpu
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.0
    Uninstalling numpy-2.0.0:
      Successfully uninstalled numpy-2.0.0
Successfully installed faiss-cpu-1.8.0.post1 numpy-1.26.4


In [82]:
URL = "https://www.deeplearning.ai/the-batch/issue-255/"

#### Parse with BeautifulSoup

In [84]:
!pip3 install requests bs4

Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting charset-normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (33 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Using cached urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)
Collecting beautifulsoup4 (from bs4)
  Using cached beautifulsoup4-4.12.3-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4->bs4)
  Using cached soupsieve-2.5-py3-none-any.whl.metadata (4.7 kB)
Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Using cached charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl (119 kB)
Using cached urllib3-2.2.2-py3-none-any.whl (121 kB)
Using cached beautifulsoup4-4.12.3-py3-none-any.whl (147 kB)
Using cached soupsieve-2.5-py3-none-any.whl (36 kB)
In

In [85]:
import requests
from bs4 import BeautifulSoup
import re

response = requests.get(
    URL
)
html_doc = response.text
soup = BeautifulSoup(html_doc, "html.parser")
tag = soup.find("div", re.compile("^prose--styled"))
text = tag.text
print(text)

Dear friends,On Monday, a number of large music labels sued AI music makers Suno and Udio for copyright infringement. Their lawsuit echoes The New York Times’ lawsuit against OpenAI in December. The question of what’s fair when it comes to AI software remains a difficult one. I spoke out in favor of OpenAI’s side in the earlier lawsuit. Humans can learn from online articles and use what they learn to produce novel works, so I’d like to be allowed to use AI to do so. Some people criticized my view as making an unjustifiable equivalence between humans and AI. This made me realize that people have at least two views of AI: I view AI as a tool we can use and direct to our own purposes, while some people see it as akin to a separate species, distinct from us, with its own goals and desires.If I’m allowed to build a house, I want to be allowed to use a hammer, saw, drill, or any other tool that might get the job done efficiently. If I’m allowed to read a webpage, I’d like to be allowed to re

In [86]:
# save as text file
file_name = "AI_monopolies_etc.txt"
with open(file_name, 'w') as file:
    file.write(text)

### Chunking

In [87]:
chunk_size = 512
chunks = [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)]

In [88]:
len(chunks)

36

### Get embeddings

In [107]:
import numpy as np

In [113]:
def get_text_embedding(txt):
    embeddings_batch_response = client.embeddings(model="mistral-embed", input=txt)
    return embeddings_batch_response.data[0].embedding

In [123]:
import numpy as np

text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks]).astype('float')

### Store in a vector database

In [128]:
text_embeddings.shape

(36, 1024)

In [133]:
import faiss

# Make sure text_embeddings is a numpy array
assert isinstance(text_embeddings, np.ndarray)

# Add index to faiss index and cast text embedding to np.ndarray
index = faiss.IndexFlatIP(text_embeddings.shape[1])

In [135]:
type(text_embeddings)

numpy.ndarray

In [140]:
text_embeddings_copy = np.array(text_embeddings, copy=True, dtype=np.float32)

In [141]:
index.add(text_embeddings_copy)

ValueError: input not a numpy array

In [121]:
question = "What are the ways that AI can reduce emissions in Agriculture?"
question_embeddings = np.array([get_text_embedding(question)])

In [None]:
question_embeddings

### Search for similar chunks

In [None]:
D, I = index.search(question_embeddings, k=2)
print(I)

In [None]:
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
print(retrieved_chunk)

In [None]:
prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""

In [None]:
from mistralai.models.chat_completion import ChatMessage


def mistral(user_message, model=model, is_json=False):
    messages = [ChatMessage(role="user", content=user_message)]

    if is_json:
        chat_response = client.chat(
            model=model, messages=messages, response_format={"type": "json_object"}
        )
    else:
        chat_response = client.chat(model=model, messages=messages)

    return chat_response.choices[0].message.content

# RAG notebook does not work...

---