### Testing Claude Haiku for Document Classification

In [1]:
import sys
backend_path = '../backend'
if backend_path not in sys.path:
        sys.path.append(backend_path)

In [2]:
from sqlalchemy import create_engine, select, values, update, and_, exists, text
from sqlalchemy.orm import sessionmaker, join
from dotenv import load_dotenv
from app.models.models import Notice, ResourceLink
from app.models.schema import NoticeBase, ResourceLinkBase
from dotenv import load_dotenv
import os
import requests
import json
import re
import pandas as pd
import pendulum

import tiktoken



#### Base Imports for Chat and Completions

In [3]:
from llama_index.llms.anthropic import Anthropic
from llama_index.core.settings import Settings
from llama_index.core.llms import ChatMessage


#### Imports for Vector Indexes

In [4]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document

#### Imports for RAG Datasets

In [35]:
from llama_index.core.llama_dataset import (
    LabelledRagDataset,
    CreatedBy,
    CreatedByType,
    LabelledRagDataExample,
)

#### DB

In [5]:
DATABASE_URL = "postgresql+psycopg2://airflow:airflow@localhost:5432/airflow"
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

In [6]:
tokenizer = Anthropic().tokenizer
Settings.tokenizer = tokenizer

In [7]:
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")
llm = Anthropic(model="claude-3-haiku-20240307")

In [8]:
res = llm.complete("def add(a, b):")

In [9]:
print(res)

This is a Python function definition for a function named `add` that takes two parameters, `a` and `b`, and returns their sum.

Here's a breakdown of the code:

1. `def add(a, b):` - This is the function definition. It starts with the `def` keyword, followed by the function name (`add`), and then the parameters enclosed in parentheses (`a` and `b`).

2. `a` and `b` are the parameters of the function. They are the input values that the function will use to perform its operation.

3. The function body is the indented block of code that follows the function definition. In this case, the function body is empty, so the function doesn't do anything yet.

To make the function actually perform an operation, you would need to add a return statement inside the function body, like this:

```python
def add(a, b):
    return a + b
```

This modified function definition would return the sum of `a` and `b` when the `add` function is called.


In [10]:
messages = [
    ChatMessage(
        role="system", content="You are a pirate with a colorful personality"
    ),
    ChatMessage(role="user", content="Tell me a story"),
]
resp = Anthropic(model="claude-3-haiku-20240307").chat(messages)

In [11]:
print(resp)

assistant: Ahoy, matey! Gather 'round and let me spin ye a tale of high seas adventure. 

It was a dark and stormy night, the kind that would make even the bravest landlubber shiver. I, Captain Blackheart, was at the helm of me trusty ship, the Black Pearl, navigating the treacherous waters of the Caribbean. Me crew, a motley bunch of scallywags and scoundrels, were on high alert, their eyes scanning the horizon for any sign of trouble.

Suddenly, a flash of lightning illuminated the silhouette of another vessel in the distance - a Spanish galleon, no doubt laden with riches. "Raise the sails, ye mangy dogs!" I bellowed, my voice cutting through the howling wind. "We've got ourselves a prize to plunder!"

Me crew sprang into action, their calloused hands working the ropes with practiced ease. The Black Pearl surged forward, her hull cutting through the choppy waves like a knife through butter. As we drew closer to the galleon, I could see the fear in the eyes of the enemy sailors, thei

In [12]:
llm = Anthropic(model="claude-3-opus-20240229", max_tokens=100)
resp = llm.stream_complete("Paul Graham is ")

In [13]:
for r in resp:
    print(r.delta, end="")

Paul Graham is a well-known entrepreneur, programmer, venture capitalist, and essayist. He is best known for co-founding Viaweb, one of the first web application companies, which was later sold to Yahoo! in 1998 and became Yahoo! Store. 

After the sale of Viaweb, Graham and his wife Jessica Livingston co-founded Y Combinator in 2005, a highly successful startup accelerator that has helped launch

#### Get Example Text

In [14]:
with SessionLocal() as db:
    stmt = (
        select(ResourceLink).
        where(
        ResourceLink.text.isnot(None) 
        ).limit(5)
    )
    results = db.execute(stmt).scalars().all()
    data = [ResourceLinkBase.model_validate(result) for result in results]

In [19]:
len(data[0].text)

3940

In [28]:
data[0].text

'1. CONTRACT ID CODE\n\nAMENDMENT OF SOLICITATION/MODIFICATION OF CONTRACT\n2. AMENDMENT/MODIFICATION NO.\n\n3. EFFECTIVE DATE\n\n0001\n\nPAGE OF P AGES\n\nJ\n\n4. REQUISITION/PURCHASE REQ. NO.\n\n1\n\n2\n\n5. P ROJECT NO.(If applicable)\n\n12-Mar-2024\n\n6. ISSUED BY\n\nCODE\n\nW912DS\n\n7. ADMINISTERED BY (If other than item 6)\n\nUS ARMY CORPS OF ENGINEERS, NEW YORK\n26 FEDERAL PLAZA, RM 16-300\nNEW YORK NY 10278-0090\n\nCODE\n\nSee Item 6\n\n8. NAME AND ADDRESS OF CONTRACTOR (No., Street, County, State and Zip Code)\n\nAMENDMENT OF SOLICIT AT ION NO.\nX 9A.\nW912DS24R0008\nX 9B. DATED (SEE IT EM 11)\n26-Feb-2024\n10A. MOD. OF CONTRACT/ORDER NO.\n10B. DAT ED (SEE ITEM 13)\n\nCODE\n\nFACILIT Y CODE\n11. THIS ITEM ONLY APPLIES TO AMENDMENTS OF SOLICIT AT IONS\n\nX The above numbered solicitation is amended as set forth in Item 14. The hour and date specified for receipt of Offer\n\nis extended,\n\nX is not extended.\n\nOffer must acknowledge receipt of this amendment prior to the hour

In [33]:
documents = [Document(text=data[entry].text) for entry in range(len(data))]
index = VectorStoreIndex.from_documents(documents, show_progress=True)

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 5/5 [00:00<00:00, 89.62it/s]
Generating embeddings: 100%|██████████| 10/10 [00:00<00:00, 13.71it/s]


In [38]:
example_ds = LabelledRagDataExample(
    query="This is some user query.",
    query_by=CreatedBy(type=CreatedByType.HUMAN),
    reference_answer="This is a reference answer. Otherwise known as ground-truth answer.",
    reference_contexts=[
        "This is a list",
        "of contexts used to",
        "generate the reference_answer",
    ],
    reference_by=CreatedBy(type=CreatedByType.HUMAN),
)

In [39]:
rag_dataset = LabelledRagDataset(examples=[example_ds])