In [1]:
import json

from fireworks.client import Fireworks
import numpy as np
import pandas as pd
import pronouncing
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline

sentiment_pipeline = pipeline("sentiment-analysis")
embeddings_model = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)

  from tqdm.autonotebook import tqdm, trange
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [2]:
# Make sure you have the FIREWORKS_API_KEY set to your account's key!
client = Fireworks()

In [9]:
system_message = 'Translate the English sentence to Danish. Your response must contain ONLY the translated sentence.'
def format_translation_for_fireworks(english_sentence, danish_sentence):
    return {"messages": [
        {"role": "system", "content": system_message}, 
        {"role": "user", "content": english_sentence}, 
        {"role": "assistant", "content": danish_sentence}
    ]}

df = pd.read_csv('english_to_danish.csv')
json_objs = list()
for i, row in enumerate(df.iterrows()):
    msg = format_translation_for_fireworks(row[1]['English'], row[1]['Danish'])
    json_objs.append(msg)

dataset_file_name = 'danish_training_data.jsonl'
with open(dataset_file_name, 'w') as f:
    for obj in json_objs:
        json.dump(obj, f)
        f.write('\n')

In [10]:
# Upload our dataset to fireworks
!firectl create dataset danish-training-data-v1 {dataset_file_name}

2024/06/28 14:19:18 There are updates available.
Current version: 1.1.1
Latest version: 1.2.0

To upgrade to the latest version, run
  $ sudo firectl upgrade

20.63 KiB / 20.63 KiB [------------------------------------] 100.00% ? p/s 100ms


In [12]:
!firectl list datasets

2024/06/28 14:19:32 There are updates available.
Current version: 1.1.1
Latest version: 1.2.0

To upgrade to the latest version, run
  $ sudo firectl upgrade

NAME                     CREATE TIME          STATE  DISPLAY_NAME
chatbot-arena-10k-v1     2024-06-16 09:33:47  READY  
chatbot-arena-1k-v1      2024-06-16 10:44:33  READY  
chatbot-arena-1k-v2      2024-06-18 09:26:18  READY  
chatbot-arena-2k-v1      2024-06-18 13:52:21  READY  
chatbot-arena-2k-v3      2024-06-18 17:50:47  READY  
chatbot-arena-4k-v1      2024-06-18 15:02:57  READY  
chatbot-arena-v1         2024-06-18 19:13:53  READY  
chatbot-arena-v2         2024-06-18 19:15:32  READY  
chatbot-arena-v3         2024-06-18 19:23:27  READY  
danish-training-data-v1  2024-06-28 14:19:19  READY  
poem-training-data-v1    2024-06-25 23:03:20  READY  

Total size: 11


In [13]:
# Create a fine-tuning job
!firectl create fine-tuning-job --settings-file danish_fine_tuning_config.yaml --display-name danish-translation-v1 --dataset danish-training-data-v1 

2024/06/28 14:24:13 There are updates available.
Current version: 1.1.1
Latest version: 1.2.0

To upgrade to the latest version, run
  $ sudo firectl upgrade

Name: accounts/sdkramer10-5e98cb/fineTuningJobs/f283a791b01b4c4b80d3d75bf5c22bcf
Display Name: danish-translation-v1
Create Time: 2024-06-28 14:24:14
State: CREATING
Dataset: accounts/sdkramer10-5e98cb/datasets/danish-training-data-v1
Created By: sdkramer10@gmail.com
Container Version: 
Model Id: 
Wandb Url: 
Conversation:
  Jinja Template: {%- set _mode = mode | default('generate', true) -%}
{%- set stop_token = '<|eot_id|>' -%}
{%- set message_roles = ['SYSTEM', 'USER', 'ASSISTANT'] -%}
{%- set ns = namespace(initial_system_message_handled=false, last_assistant_index_for_eos=-1, messages=messages) -%}
{%- for message in ns.messages -%}
    {%- if not message.get('role') -%}
        {{ raise_exception('Key [role] is missing. Original input: ' +  message|tojson) }}
    {%- endif -%}
    {%- if message['role'] | upper not in messa

In [59]:
!firectl list models

2024/06/28 15:24:14 There are updates available.
Current version: 1.1.1
Latest version: 1.2.0

To upgrade to the latest version, run
  $ sudo firectl upgrade

NAME                              CREATE TIME          KIND           CHAT  PUBLIC  STATE      STATUS MESSAGE
489aabe3b8274492a226629f28aa8d4f  2024-06-25 23:13:41  HF_PEFT_ADDON  true  false   PREPARING  
4fe290a74b72458cafe0c9d8881e5d37  2024-06-18 19:36:34  HF_PEFT_ADDON  true  false   PREPARING  
8ec0b7dd59b54c09926ff87e14c02f3d  2024-06-25 23:19:30  HF_PEFT_ADDON  true  false   PREPARING  
cc8324868ff04936855cffb392dba3b8  2024-06-18 19:31:50  HF_PEFT_ADDON  true  false   PREPARING  
f283a791b01b4c4b80d3d75bf5c22bcf  2024-06-28 14:28:15  HF_PEFT_ADDON  true  false   PREPARING  

Total size: 5


In [43]:
model_id = 'f283a791b01b4c4b80d3d75bf5c22bcf'
account_id = 'sdkramer10-5e98cb'
ft_model_name = f'accounts/{account_id}/models/{model_id}'
base_model_name = "accounts/fireworks/models/llama-v3-8b-instruct"

In [62]:
!firectl deploy {model_id}

2024/06/28 15:28:52 There are updates available.
Current version: 1.1.1
Latest version: 1.2.0

To upgrade to the latest version, run
  $ sudo firectl upgrade



In [None]:
def generate_translations(model, english_sentences):
    responses = list()
    for i, sentence in enumerate(english_sentences):
        msg = [
              {"role": "system", "content": system_message},
              {"role": "user", "content": sentence}
        ]
        response = client.chat.completions.create(
            model=model,
            messages=msg,
            temperature=0,
        )

        response = response.choices[0].message.content
        print(response)
        responses.append(response)   
    return responses


generate_translations(ft_model_name , df['English'].tolist())

<|start_header_id|><|start_header_id|>Solen skinner i dag.
<|start_header_id|> Hun løber fem miles hver morgen.
<|start_header_id|><|start_header_id|>Min yndlingsfarve er blå.
<|start_header_id|><|start_header_id|>De planlægger en overraskelsfest.
<|start_header_id|><|start_header_id|>Han spiller guitar meget godt.
<|start_header_id|><|start_header_id|>Vi skal færdige dette projekt inden fredag.
<|start_header_id|>Har du set den nye film endnu?
<|start_header_id|> Hun taler tre sprog flydende.

<|start_header_id|> Jeg elsker at spise pizza med ekstra ost.
<|start_header_id|>Han nyder sig til at lytte til klassisk musik.
<|start_header_id|><|start_header_id|>Mødet starter præcis kl. 9.
<|start_header_id|>Kan du lide noget med i weekenden?

<|start_header_id|> Vi burde tage en tur i parken.
<|start_header_id|>

<|start_header_id|> Hun vil gerne rejse rundt omkring i verden.
<|start_header_id|> Dette restaurant server smagfuld mad.
<|start_header_id|><|start_header_id|>De maler deres hus 

In [None]:
!firectl undeploy {model_id}