### Prepare

In [None]:
import cohere
import os
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv('API_KEY') 

co = cohere.Client(API_KEY)
co.check_api_key()


### Example setup

In [None]:
prompt = "What is Autonomous Database?"

response = co.generate(
    prompt=prompt,
    model="base-light",
    max_tokens=500,
    temperature=1,
    k=0,
    p=0.75,
    return_likelihoods='ALL'
)

print(f"Likelihood: {response.generations[0].likelihood}")
print(f"generated text: {response.generations[0].text}")


### Create custom model - generative

#### Scrape data for fine tuning

In [None]:
from bs4 import BeautifulSoup, element
from urllib import request
import pandas as pd

url = 'https://www.oracle.com/database/technologies/datawarehouse-bigdata/adb-faqs.html'
response = request.urlopen(url)
soup = BeautifulSoup(response)
response.close()

df = pd.DataFrame(columns=['prompt', 'completion'])

for h4 in soup.find_all('h4'):
    prompt = h4
    line = []
    for n in h4.next_siblings:
        if type(n) is element.Tag:
            if n.name == 'h4':
                completion = (''.join(line)).replace('\n', '').strip()
                qa = pd.DataFrame({'prompt': prompt, 'completion': [completion]})
                df = pd.concat([df, qa], ignore_index=True)
                break
            if n.name == 'p':
                for d in n.contents:
                    if d.name == 'a':
                        line.append(f"{d.text}<{d.get('href')}>")
                    else:
                        line.append(str(d.text))

df.head()
df.to_csv('./training-data.csv', index=False, header=False)


In [None]:
from cohere.custom_model_dataset import CsvDataset

dataset = CsvDataset(train_file="./data/training-data.csv", delimiter=",")
finetune = co.create_custom_model(name="custom-generative-model", dataset=dataset, model_type="GENERATIVE")

print(finetune)


In [None]:
response = co.generate(
    prompt=prompt,
    model="base-light",
    max_tokens=500,
    temperature=1,
    k=0,
    p=0.75,
    return_likelihoods='ALL'
)
