In [53]:
import pandas as pd
import json

# Sample data creation function for multilingual support
def create_multilingual_fine_tuning_dataframe(data_path, languages):
    # Read the CSV data
    data = pd.read_csv(data_path)
    formatted_data = []

    # Function to format individual row
    def format_row(row, lang):
        tour_name = row['Tour Name']
        description = row['Description']
        character = row['Character']
        tour_info = eval(row['Tour Info'])  # Assuming this is a string representation of a dictionary
        housing = eval(row['Housing Recommendations'])  # Assuming this is a string representation of a list
        location = tour_info.get('Location', 'Unknown')  # Add a location field in your data if it exists

        # Translation dictionary with more sophisticated answers
        translations = {
            "en": {
                "Can you tell me about the tour?": "Can you tell me about the '{tour_name}' tour?",
                "What kind of tour is it?": "What kind of tour is '{tour_name}'?",
                "Where is the tour located?": "Where is the '{tour_name}' tour located?",
                "What can I expect on the tour?": "What can I expect on the '{tour_name}' tour?",
                "What is the terrain like on the tour?": "What is the terrain like on the '{tour_name}' tour?",
                "Are there any accommodations recommended?": "Are there any accommodations recommended for the '{tour_name}' tour?",
                "Where can I stay if I take the tour?": "Where can I stay if I take the '{tour_name}' tour?",
                "Can you recommend a tour near": "Can you recommend a tour near {location}?",
                "I am looking for a tour around": "I am looking for a tour around {location}, any suggestions?",
                "What are some good hiking tours in the": "What are some good hiking tours in the {region}?",
                "Are there any easy hikes in the": "Are there any easy hikes in the {region}?"
            },
            "de": {
                "Can you tell me about the tour?": "Können Sie mir etwas über die Tour '{tour_name}' erzählen?",
                "What kind of tour is it?": "Was für eine Tour ist die '{tour_name}'?",
                "Where is the tour located?": "Wo befindet sich die Tour '{tour_name}'?",
                "What can I expect on the tour?": "Was kann ich auf der Tour '{tour_name}' erwarten?",
                "What is the terrain like on the tour?": "Wie ist das Gelände auf der Tour '{tour_name}'?",
                "Are there any accommodations recommended?": "Gibt es Empfehlungen für Unterkünfte für die Tour '{tour_name}'?",
                "Where can I stay if I take the tour?": "Wo kann ich übernachten, wenn ich die Tour '{tour_name}' mache?",
                "Can you recommend a tour near": "Können Sie eine Tour in der Nähe von {location} empfehlen?",
                "I am looking for a tour around": "Ich suche eine Tour in der Nähe von {location}, haben Sie Vorschläge?",
                "What are some good hiking tours in the": "Was sind einige gute Wandertouren in der {region}?",
                "Are there any easy hikes in the": "Gibt es einfache Wanderungen in der {region}?"
            },
            "it": {
                "Can you tell me about the tour?": "Puoi parlarmi del tour '{tour_name}'?",
                "What kind of tour is it?": "Che tipo di tour è '{tour_name}'?",
                "Where is the tour located?": "Dove si trova il tour '{tour_name}'?",
                "What can I expect on the tour?": "Cosa posso aspettarmi dal tour '{tour_name}'?",
                "What is the terrain like on the tour?": "Com'è il terreno durante il tour '{tour_name}'?",
                "Are there any accommodations recommended?": "Ci sono alloggi raccomandati per il tour '{tour_name}'?",
                "Where can I stay if I take the tour?": "Dove posso soggiornare se faccio il tour '{tour_name}'?",
                "Can you recommend a tour near": "Puoi consigliarmi un tour vicino a {location}?",
                "I am looking for a tour around": "Sto cercando un tour vicino a {location}, qualche suggerimento?",
                "What are some good hiking tours in the": "Quali sono alcuni buoni tour escursionistici nella {region}?",
                "Are there any easy hikes in the": "Ci sono escursioni facili nella {region}?"
            }
        }

        questions_answers = [
            {
                "role": "user",
                "content": translations[lang]["Can you tell me about the tour?"].format(tour_name=tour_name)
            },
            {
                "role": "assistant",
                "content": f"The '{tour_name}' tour is an exciting adventure that {description}. It offers a unique experience with {character}."
            },
            {
                "role": "user",
                "content": translations[lang]["What kind of tour is it?"].format(tour_name=tour_name)
            },
            {
                "role": "assistant",
                "content": f"{tour_name} is a {tour_info.get('Tourenkategorie', 'tour')}. It is known for its {tour_info.get('Highlights', 'special features')}."
            },
            {
                "role": "user",
                "content": translations[lang]["Where is the tour located?"].format(tour_name=tour_name)
            },
            {
                "role": "assistant",
                "content": f"The '{tour_name}' tour is located in the beautiful {tour_info.get('Gebirgsgruppe', 'region')}, known for its stunning landscapes and natural beauty."
            },
            {
                "role": "user",
                "content": translations[lang]["What can I expect on the tour?"].format(tour_name=tour_name)
            },
            {
                "role": "assistant",
                "content": f"On the '{tour_name}' tour, you can expect {character}. It's a perfect blend of adventure and relaxation."
            },
            {
                "role": "user",
                "content": translations[lang]["What is the terrain like on the tour?"].format(tour_name=tour_name)
            },
            {
                "role": "assistant",
                "content": "The terrain is gentle and suitable for a comfortable hike, making it ideal for hikers of all levels."
            },
            {
                "role": "user",
                "content": translations[lang]["Are there any accommodations recommended?"].format(tour_name=tour_name)
            },
            {
                "role": "assistant",
                "content": f"Yes, one highly recommended accommodation is {housing[0]['Title'] if housing else 'not specified'}, which offers excellent amenities and a great location."
            },
            {
                "role": "user",
                "content": translations[lang]["Where can I stay if I take the tour?"].format(tour_name=tour_name)
            },
            {
                "role": "assistant",
                "content": f"You can stay at {housing[0]['Title'] if housing else 'not specified'} if you take the '{tour_name}' tour. It's a comfortable and convenient place to rest after your adventures."
            },
            {
                "role": "user",
                "content": translations[lang]["Can you recommend a tour near"].format(location=location)
            },
            {
                "role": "assistant",
                "content": f"You can consider the '{tour_name}' tour, which is located near {location}. It's a great choice for exploring the area."
            },
            {
                "role": "user",
                "content": translations[lang]["I am looking for a tour around"].format(location=location)
            },
            {
                "role": "assistant",
                "content": f"You can try the '{tour_name}' tour, which is near {location}. It's well-regarded for its scenic routes and enjoyable experience."
            },
            {
                "role": "user",
                "content": translations[lang]["What are some good hiking tours in the"].format(region=tour_info.get('Gebirgsgruppe', 'region'))
            },
            {
                "role": "assistant",
                "content": f"One good hiking tour in the {tour_info.get('Gebirgsgruppe', 'region')} is the '{tour_name}' tour. It offers beautiful trails and breathtaking views."
            },
            {
                "role": "user",
                "content": translations[lang]["Are there any easy hikes in the"].format(region=tour_info.get('Gebirgsgruppe', 'region'))
            },
            {
                "role": "assistant",
                "content": f"Yes, the '{tour_name}' is an easy hike in the {tour_info.get('Gebirgsgruppe', 'region')}, perfect for beginners and families."
            }
        ]

        return questions_answers

    # Applying the function to the dataset
    for lang in languages:
        for _, row in data.iterrows():
            conversation = format_row(row, lang)
            formatted_data.extend([{"messages": [qa]} for qa in conversation])

    return formatted_data

# Use the function to create the DataFrame
file_path = '../Data/tour_info.csv'
languages = ['en', 'de', 'it']  # Add more languages as needed
formatted_data = create_multilingual_fine_tuning_dataframe(file_path, languages)

# Save the formatted data to a JSONL file
jsonl_file_path = '../Data/multilingual_formatted_tour_info_for_fine_tuning.json'
with open(jsonl_file_path, 'w') as f:
    for entry in formatted_data:
        f.write(json.dumps(entry) + "\n")

# Convert the formatted data to a DataFrame with "input" and "output" columns
formatted_dataframe = pd.DataFrame([{
    "input": qa["content"] if qa["role"] == "user" else "",
    "output": qa["content"] if qa["role"] == "assistant" else "",
    "language": lang
} for entry in formatted_data for qa in entry["messages"]])

# Save the DataFrame to a CSV file
csv_file_path = '../Data/multilingual_formatted_tour_info_for_fine_tuning.csv'
formatted_dataframe.to_csv(csv_file_path, index=False)
