In [1]:
import torch
import torch.nn.functional as F

from torch import Tensor
from transformers import AutoTokenizer, AutoModel


def last_token_pool(last_hidden_states: Tensor,
                 attention_mask: Tensor) -> Tensor:
    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]


def get_detailed_instruct(task_description: str, query: str) -> str:
    return f'Instruct: {task_description}\nQuery: {query}'


# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = [
    get_detailed_instruct(task, 'how much protein should a female eat'),
    get_detailed_instruct(task, 'summit define')
]
# No need to add instruction for retrieval documents
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments."
]
input_texts = queries + documents

tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-mistral-7b-instruct')
model = AutoModel.from_pretrained('intfloat/e5-mistral-7b-instruct')

max_length = 4096
# Tokenize the input texts
batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors='pt')

outputs = model(**batch_dict)
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])

# normalize embeddings
embeddings = F.normalize(embeddings, p=2, dim=1)
scores = (embeddings[:2] @ embeddings[2:].T) * 100
print(scores.tolist())


tokenizer_config.json:   0%|          | 0.00/981 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.28G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[[82.91365814208984, 47.97157669067383], [46.95110321044922, 81.74346923828125]]


In [2]:
from litellm import completion
import os
import dotenv

dotenv.load_dotenv()

## set ENV variables
# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")

messages = [{ "content": "Hello, how are you?","role": "user"}]

# openai call
# response = completion(model="gpt-3.5-turbo", messages=messages)

# # cohere call
response = completion(model="claude-3-5-sonnet-20240620", messages=messages)
print(response.choices[0].message.content)

os.environ['GEMINI_API_KEY'] = os.getenv("GEMINI_API_KEY")
response = completion(
    model="gemini/gemini-pro", 
    messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
)
print(response.choices[0].message.content)

Hello! As an AI language model, I don't have feelings, but I'm functioning well and ready to assist you. How can I help you today?
```python
def say_hi(name):
  """Says hi to a person.

  Args:
    name: The name of the person to say hi to.

  Returns:
    A string saying hi to the person.
  """

  return f"Hi, {name}!"
```


In [30]:
def llm(system_prompt: str, user_prompt: str) -> str:
    ''' Here, we use OpenAI for illustration, you can change it to your own LLM '''
    # ensure your LLM imports are all within this function
    from litellm import completion
    
    # define your own LLM here
    response = completion(
        model='claude-3-haiku-20240307',
        temperature = 0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return response.choices[0].message.content

# Verify that llm function is working
llm(system_prompt = 'You are a classifier to classify the sentiment of a sentence, do not mention the preamble.', 
    user_prompt = 'It is a hot and sunny day')

'The sentiment of the sentence "It is a hot and sunny day" is positive. The description of the weather as "hot and sunny" suggests a pleasant and enjoyable day, which indicates a positive sentiment.'

In [31]:
from strictjson import *
import os
import dotenv
dotenv.load_dotenv()

os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY')

res = strict_json(system_prompt = 'You are a classifier',
                    user_prompt = 'It is a beautiful and sunny day',
                    output_format = {'Sentiment': 'Type of Sentiment',
                                    'Adjectives': 'Array of adjectives',
                                    'Words': 'Number of words'},
                    llm = llm)
print(res)

{'Sentiment': 'Positive', 'Adjectives': ['beautiful', 'sunny'], 'Words': 6}


In [58]:
import base64
import anthropic
import httpx

image_url = "/Users/ob1/projects/sgllm/resilience.png"
image_media_type = "image/png"
# image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
image_data = base64.b64encode(open(image_url, "rb").read()).decode("utf-8")

message = anthropic.Anthropic().messages.create(
    model="claude-3-5-sonnet-20240620",
    max_tokens=1024,
    temperature=0,
    tools=[
        {
            "name": "solve_math_problem",
            "description": "Solve a math problem using well-structured JSON.",
            "input_schema": {
                "type": "object",
                "properties": {
                    "chain_of_thought": {
                        "type": "string",
                        "description": "Chain of thought for solving the math problem.",
                    },
                    "correct_answers": {
                        "type": "array",
                        "description": "Correct answers to the math problem.",
                    }
                },
                "required": ["correct_answers", "chain_of_thought"],
            },
        }
    ],
    tool_choice={"type": "tool", "name": "solve_math_problem"},
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image_media_type,
                        "data": image_data,
                    },
                },
                {"type": "text", "text": "Solve all math problems in the image."},
            ],
        }
    ],
)



In [59]:
print(message.content)

[ToolUseBlock(id='toolu_01PSdoRLz6BDopPZBdHjpvWp', input={'chain_of_thought': 'To solve the math problems in this image, we need to analyze the pattern of the word "RESILIENCE" and count the occurrences of the letter \'E\'.\n\nFor question (a):\n1. The word RESILIENCE has 3 E\'s.\n2. In the first 100 letters, there would be 3 complete repetitions of RESILIENCE (9 letters each), which is 27 letters.\n3. The remaining 73 letters would include 8 complete words (72 letters) plus 1 letter.\n4. So we have (3 * 3) + (8 * 3) + 0 = 9 + 24 = 33 E\'s in the first 100 letters.\n\nFor question (b):\n1. We need to find how many letters are there up to the 101st \'E\'.\n2. Each RESILIENCE has 3 E\'s and is 9 letters long.\n3. To get to the 101st \'E\', we need 33 full repetitions of RESILIENCE plus 2 more E\'s.\n4. 33 * 9 = 297 letters for the full repetitions\n5. The 34th repetition will contribute 7 more letters (RESILIE) to reach the 101st \'E\'\n6. Total letters: 297 + 7 = 304\n\nTherefore, there

In [64]:
eval("2 ** 8")

256

In [66]:
from sympy.abc import r

In [68]:

from sympy import pi, symbols

# Define the symbol r
r = symbols("r")

# Define the expression using sympy
expr = pi * r**2

# Display the expression
expr

pi*r**2

In [69]:
expr.evalf(subs={r:5})

78.5398163397448