In [1]:
!pip install -qU \
    datasets==2.14.4 \
    langchain==0.0.274 \
    pinecone-client==2.2.2 \
    openai==0.27.9

In [2]:
from datasets import load_dataset

data = load_dataset(
    "jamescalam/agent-conversations-retrieval-tool",
    split="train"
)
data

Dataset({
    features: ['messages'],
    num_rows: 270
})

In [3]:
data["messages"][0]

[{'role': 'system',
  'content': 'Assistant is a large language model trained by OpenAI.\n\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n\nOverall, Assistant is a powerful system

In [4]:
data.to_json("conversations.jsonl")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

1103809

## Running Training

First we upload the files:

In [5]:
import os
import openai

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") or "YOUR_API_KEY"
openai.api_key = os.environ["OPENAI_API_KEY"]

res = openai.File.create(
    file=open("conversations.jsonl", "r"),
    purpose='fine-tune'
)
res

<File file id=file-lNA3Mipra1v7Q9ckvqTn84UA at 0x7ddc84ffae80> JSON: {
  "object": "file",
  "id": "file-lNA3Mipra1v7Q9ckvqTn84UA",
  "purpose": "fine-tune",
  "filename": "file",
  "bytes": 1103809,
  "created_at": 1693167014,
  "status": "uploaded",
  "status_details": null
}

In [6]:
file_id = res["id"]
file_id

'file-lNA3Mipra1v7Q9ckvqTn84UA'

We then create the fine-tuning job _(note, it can take some time before the file above is ready)_.

In [7]:
res = openai.FineTuningJob.create(training_file=file_id, model="gpt-3.5-turbo")
res

<FineTuningJob fine_tuning.job id=ftjob-NLYU2lRW6AjOkAswAu52lkDi at 0x7ddc8503e750> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-NLYU2lRW6AjOkAswAu52lkDi",
  "model": "gpt-3.5-turbo-0613",
  "created_at": 1693167129,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-f8Ugk8IIbz8etfgd5WdFJngy",
  "result_files": [],
  "status": "created",
  "validation_file": null,
  "training_file": "file-lNA3Mipra1v7Q9ckvqTn84UA",
  "hyperparameters": {
    "n_epochs": 3
  },
  "trained_tokens": null
}

In [8]:
job_id = res["id"]
job_id

'ftjob-NLYU2lRW6AjOkAswAu52lkDi'

We can retrieve info for a our fine-tuning job like so:

In [9]:
openai.FineTuningJob.retrieve(job_id)

<FineTuningJob fine_tuning.job id=ftjob-NLYU2lRW6AjOkAswAu52lkDi at 0x7ddc850660c0> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-NLYU2lRW6AjOkAswAu52lkDi",
  "model": "gpt-3.5-turbo-0613",
  "created_at": 1693167129,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-f8Ugk8IIbz8etfgd5WdFJngy",
  "result_files": [],
  "status": "running",
  "validation_file": null,
  "training_file": "file-lNA3Mipra1v7Q9ckvqTn84UA",
  "hyperparameters": {
    "n_epochs": 3
  },
  "trained_tokens": null
}

The `"finished_at"` value is still `null`, so fine-tuning isn't yet complete. We can check for events from our fine-tuning job while we wait:

In [10]:
openai.FineTuningJob.list_events(id=job_id)

<OpenAIObject list at 0x7ddc8503dfd0> JSON: {
  "object": "list",
  "data": [
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-aunKCogMZVKxsfuULBI5wzet",
      "created_at": 1693167130,
      "level": "info",
      "message": "Fine tuning job started",
      "data": null,
      "type": "message"
    },
    {
      "object": "fine_tuning.job.event",
      "id": "ftevent-IU02P4hK6Lr2OSJJw4ipwmeO",
      "created_at": 1693167129,
      "level": "info",
      "message": "Created fine-tune: ftjob-NLYU2lRW6AjOkAswAu52lkDi",
      "data": null,
      "type": "message"
    }
  ],
  "has_more": false
}

We can setup a check for fine-tuning completion (or wait for OpenAI to send you an email telling you that the job has completed):

In [11]:
from time import sleep

while True:
    res = openai.FineTuningJob.retrieve(job_id)
    if res["finished_at"] != None:
        break
    else:
        print(".", end="")
        sleep(100)

.

KeyboardInterrupt: ignored

Once complete, we can see our model details in the `res`:

In [None]:
res

We access our fine-tuned model name:

In [None]:
ft_model = res["fine_tuned_model"]
ft_model

Finally, we use our new model!

In [12]:
ft_model = 'ft:gpt-3.5-turbo-0613:pinecone::7s8gnk9R'

In [13]:
import requests

res = requests.get('https://raw.githubusercontent.com/pinecone-io/examples/master/learn/generation/openai/fine-tuning/gpt-3.5-agent-training/chains.py')
with open("chains.py", 'w') as fp:
    fp.write(res.text)

In [15]:
from langchain.agents import Tool
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferWindowMemory
from chains import VectorDBChain

llm = ChatOpenAI(
    temperature=0.5,
    model_name=ft_model
)

memory = ConversationBufferWindowMemory(
    memory_key="chat_history",
    k=5,
    return_messages=True,
    output_key="output"
)
# app.pinecone.io
vdb = VectorDBChain(
    index_name="llama-2-arxiv-papers",
    environment=os.getenv("PINECONE_ENV") or "YOUR_ENV",
    pinecone_api_key=os.getenv("PINECONE_API_KEY") or "YOUR_KEY"
)

vdb_tool = Tool(
    name=vdb.name,
    func=vdb.query,
    description="This tool allows you to get research information about LLMs."
)

In [16]:
from langchain.agents import AgentType, initialize_agent

agent = initialize_agent(
    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
    tools=[vdb_tool],
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method="generate",
    memory=memory,
    return_intermediate_steps=True
)

In [17]:
agent("tell me about Llama 2?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Vector Search Tool",
    "action_input": "Llama 2 information"
}
```[0mLlama 2 information
[-0.013843749649822712, 0.01913735456764698, -0.017765453085303307, -0.01960851438343525, -0.007406890857964754, 0.023308495059609413, -0.019359076395630836, -0.006499217823147774, -0.008557071909308434, -0.03156762942671776, 0.011681962758302689, 0.015478947199881077, 0.009249952621757984, -0.0033795239869505167, 0.004604190122336149, 0.010199198499321938, 0.0141901895403862, -0.007843405939638615, 0.024112235754728317, 0.01829204149544239, -0.00408452982082963, -0.0030417446978390217, 0.008633289486169815, -0.022837337106466293, 0.0012237998889759183, -0.0005486746085807681, 0.014716778881847858, -0.008058198727667332, 0.009506318718194962, -0.01578381471335888, 0.03905073553323746, 0.0026156234089285135, -0.03641778975725174, -0.010545639321208, -0.02688375674188137, -0.019372934475541115, -0.001163172884844

{'input': 'tell me about Llama 2?',
 'chat_history': [],
 'output': 'Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. These models, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc, are optimized for dialogue use cases. They outperform open-source chat models on most benchmarks tested and may be a suitable substitute for closed-source models. The approach to fine-tuning and safety is detailed in the work. Llama 2 is intended for commercial and research use in English, with tuned models intended for assistant-like chat and pretrained models adaptable for various natural language generation tasks.',
 'intermediate_steps': [(AgentAction(tool='Vector Search Tool', tool_input='Llama 2 information', log='```json\n{\n    "action": "Vector Search Tool",\n    "action_input": "Llama 2 information"\n}\n```'),
   ['Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Ta

In [18]:
agent("what makes llama 2 so special?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Vector Search Tool",
    "action_input": "Llama 2 features and advantages"
}
```[0mLlama 2 features and advantages
[-0.007560313679277897, 0.014035549946129322, -0.019672317430377007, -0.002987486543133855, -0.004100747872143984, 0.02533726766705513, -0.024097178131341934, -0.00222476152703166, -0.027620157226920128, -0.029593026265501976, 0.0096459174528718, 0.018051745370030403, 0.0035652550868690014, -0.009364078752696514, -0.0043403105810284615, 0.007828060537576675, 0.013986228033900261, 0.0006363381398841739, 0.007813967764377594, 0.021631093695759773, -0.006552741397172213, -0.0042768968269228935, 0.00280957599170506, -0.029283003881573677, -0.011618785560131073, 0.009364078752696514, 0.030748562887310982, -0.011477867141366005, 0.004231098107993603, 0.006443529389798641, 0.027930179610848427, 0.0001197812962345779, -0.03821728006005287, -0.007377118803560734, -0.03801999241113663, -0.00901882

{'input': 'what makes llama 2 so special?',
 'chat_history': [HumanMessage(content='tell me about Llama 2?', additional_kwargs={}, example=False),
  AIMessage(content='Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. These models, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc, are optimized for dialogue use cases. They outperform open-source chat models on most benchmarks tested and may be a suitable substitute for closed-source models. The approach to fine-tuning and safety is detailed in the work. Llama 2 is intended for commercial and research use in English, with tuned models intended for assistant-like chat and pretrained models adaptable for various natural language generation tasks.', additional_kwargs={}, example=False)],
 'output': 'Llama 2 is special because it features a collection of pretrained and fine-tuned large language models optimized for dialogue use cases. The

In [19]:
agent("tell me about llama 2 red teaming?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Vector Search Tool",
    "action_input": "Llama 2 red teaming"
}
```[0mLlama 2 red teaming
[-0.024061694741249084, -0.027173474431037903, -0.011430328711867332, -0.0008598336717113853, -0.0025880311150103807, 0.021222878247499466, -0.02164597250521183, -0.002226355019956827, -0.0065954700112342834, -0.04864202067255974, 0.01959874853491783, 0.01131431944668293, 0.025044361129403114, -0.01090487465262413, 0.001903917407616973, -0.0031458993908017874, 0.015900099650025368, 0.0015149450628086925, 0.022041767835617065, 0.002535144565626979, -0.014235024340450764, 0.003722533816471696, 0.004114918410778046, -0.012044495902955532, -0.015804562717676163, -0.02099085971713066, 0.014207728207111359, -0.01070015225559473, 0.036085717380046844, -0.010529550723731518, 0.04015286639332771, -0.009314864873886108, -0.013477551750838757, 0.012419819831848145, -0.02238297276198864, -0.019830767065286636, 0.0060870763

{'input': 'tell me about llama 2 red teaming?',
 'chat_history': [HumanMessage(content='tell me about Llama 2?', additional_kwargs={}, example=False),
  AIMessage(content='Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. These models, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc, are optimized for dialogue use cases. They outperform open-source chat models on most benchmarks tested and may be a suitable substitute for closed-source models. The approach to fine-tuning and safety is detailed in the work. Llama 2 is intended for commercial and research use in English, with tuned models intended for assistant-like chat and pretrained models adaptable for various natural language generation tasks.', additional_kwargs={}, example=False),
  HumanMessage(content='what makes llama 2 so special?', additional_kwargs={}, example=False),
  AIMessage(content='Llama 2 is special because it fea

---