In [None]:
# basics
import sys
import time
import datetime   # 
import re

# load self-written func
sys.path.append("func/")      # add path 
import prompts

# load env variables
from dotenv import dotenv_values
ENV_VAR = dotenv_values("../env/.env")
Gemini_key = ENV_VAR['Gemini_key']
HF_key = ENV_VAR['HF_key']

# DS
import pandas as pd
import numpy as np

# LLM
import google.generativeai as genai     # Gemini model

In [5]:
# get credentials
from huggingface_hub import login
login(token=HF_key)
genai.configure(api_key=Gemini_key)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\user\.cache\huggingface\token
Login successful


In [6]:
gemini = genai.GenerativeModel('gemini-pro')   # load Gemini model

## Get prompt arguments
* 30 types of conversation tone
* 50 types of restaurants

In [14]:
input_text = """
List 30 types of most common tone in conversation. Your response should have great variety and contain only the answer. Each tone should be separated by a new line
"""
generation_config = genai.GenerationConfig(temperature=1, top_k=32, top_p=0.3)
tones_str = gemini.generate_content(input_text, generation_config=generation_config)

In [None]:
# re-format the output:
tones = tones_str.text
tones = tones.replace('- ', '')
tones = tones.split('\n')

In [18]:
input_text = """
List 60 types of common restaurants. Your response should have great variety and contain only the answer. Each response should be separated by a new line
"""
food_str = gemini.generate_content(input_text)

In [1]:
# re-format the output:
food_types = food_str.text
food_types = re.sub('\d+\.\s', '', food_types)
food_types = food_types.split('\n')

In [7]:
# show an example prompt for calibration
input_text = prompts.restaurant_calib_data(tones[1], food_types[3:5])
print(input_text)



I'm working on building a chatbot for restaurant recommendations. Can you assist me in generating training data in the following format? 
Please create 20 conversations, each must contain user input and model outputs, and encapsulated within a Python dictionary. All conversations should be wrapped up into a Python list.

Ensure the conversations cover a variety of scenarios, preferences, and inquiries related to dining experiences, focusing on Brewery or Buffet restaurants.
The tone of user input should be Aggressive, and the sentence structure in each example should be different, 
reflecting different users and their unique styles of communication.

Remember to use double quotes ("") to enclose the text in the training data.

Example format:

[
  {
    "user": "Hi there! Wondering if you can help me find a spot for a cozy dinner tonight.",
    "AI assistant": "Of course! For a cozy dinner experience, I recommend [Restaurant Name]. They offer intimate ambiance and a menu filled with 

In [12]:
# get calibration data from Gemini:
generation_config = genai.GenerationConfig(temperature=0.7, top_k=64)
calib_data_raw = {}   # buffer to save Gemini output:
i = 0
err = 0
start_time = time.time()
while len(calib_data_raw) < 30:
    tone = tones[i]
    food_type = food_types[i*2:2*(i+1)]
    try: 
        if tone not in calib_data_raw:
            print(f"{i}, Tone: {tone}, Food type: {str(food_type)}")
            input_text = prompts.restaurant_calib_data(tone=tone, food_type=food_type)
            output = gemini.generate_content(input_text, generation_config=generation_config)
            calib_data_raw[tone] = output.text   # save output
            i += 1
    except:   # may have internet error
        err += 1
        print(f"{err} Internet error at: {i}")
    print(f"Accumulated time spent: {(time.time() - start_time)/60:.4f} minutes")

0, Tone: Assertive, Food type: ['American', 'Barbecue']
Accumulated time spent: 0.3292 minutes
1, Tone: Aggressive, Food type: ['Breakfast', 'Brewery']
Accumulated time spent: 0.6882 minutes
2, Tone: Authoritative, Food type: ['Buffet', 'Burger joint']
Accumulated time spent: 1.0274 minutes
3, Tone: Calm, Food type: ['Cafe', 'Chinese']
Accumulated time spent: 1.3316 minutes
4, Tone: Commanding, Food type: ['Coffee shop', 'Creperie']
Accumulated time spent: 1.6456 minutes
5, Tone: Confident, Food type: ['Diner', 'Fast food']
Accumulated time spent: 1.9313 minutes
6, Tone: Conversational, Food type: ['Fine dining', 'Food truck']
Accumulated time spent: 2.2476 minutes
7, Tone: Critical, Food type: ['Gastropub', 'Greek']
Accumulated time spent: 2.5385 minutes
8, Tone: Cynical, Food type: ['Hamburger stand', 'Health food']
Accumulated time spent: 2.9127 minutes
9, Tone: Defensive, Food type: ['Hot dog stand', 'Indian']
Accumulated time spent: 3.2936 minutes
10, Tone: Demeaning, Food type: [

In [28]:
# forat the output:
chat_lst = []
for key, val in calib_data_raw.items():
    val = val.replace("```", "")   # remove sep symbols
    val = val.replace("python", "")   # remove sep symbols
    chat_split = val.split(' = ')
    cur_lst = eval(chat_split[-1])
    cur_lst = [{"user_tone": key, **item} for item in cur_lst]
    chat_lst.extend(cur_lst)
calib_data = pd.DataFrame(chat_lst)
calib_data['user_input'] = np.where(calib_data['user'].isna(), calib_data['input'], calib_data['user'])
calib_data['model_output'] = np.where(calib_data['AI assistant'].isna(), calib_data['output'], calib_data['AI assistant'])
calib_data = calib_data[['user_tone', 'user_input', 'model_output']]
# calib_data.isna().mean()
now_timestamp = re.sub('(:|\s|\.)', '_', str(datetime.datetime.now()))
calib_data.to_csv(f"data/restaurant_chat_{now_timestamp}.csv", index=False, encoding="utf-8-sig")

In [64]:
# display output:
pd.set_option('display.max_colwidth', None)
display(calib_data.sample(5))

Unnamed: 0,user_tone,user_input,model_output
455,Sarcastic,I'm looking for a place that's both kid-friendly and has a great selection of craft beers.,Kid-friendly and craft beers? [Restaurant Name] fits the bill. They have a dedicated kids' menu and an impressive tap list featuring local brews.
52,Authoritative,"Hi there, I'm in the mood for a buffet restaurant with a sushi bar.","Sushi lovers, unite! [Restaurant Name] combines a delectable buffet with a fully stocked sushi bar. Dive into a culinary adventure that will satisfy your cravings for both variety and freshness."
202,Demeaning,I'm in the mood for something healthy and light.,"For a healthy and refreshing Japanese meal, try [Restaurant Name]. They offer a variety of light dishes such as sashimi, salads, and steamed entrees."
460,Sarcastic,I'm looking for a place that serves up mouthwatering burgers and fries.,"Burgers and fries, you say? [Restaurant Name] is your burger heaven. Their patties are juicy, their fries are crispy, and their milkshakes are the perfect complement."
380,Ironic,I'm craving some Thai food that will make my taste buds dance.,Prepare for a culinary adventure at [Restaurant Name]. Their tantalizing dishes are a symphony of flavors that will awaken your senses and leave you craving more.


In [None]:
# push data onto huggingface:
from datasets import Dataset

finetuning_dataset = Dataset.from_pandas(calib_data)
finetuning_dataset.push_to_hub("tctsung/tctsung/chat_restaurant_recommendation")