In [10]:
import pandas as pd
import json

# Sample data creation function for multilingual support
def create_multilingual_fine_tuning_dataframe(data, languages):
    # Initialize a list to store the formatted data
    data = pd.read_csv(data)
    formatted_data = []

    # Function to format individual row
    def format_row(row, lang):
        tour_name = row['Tour Name']
        description = row['Description']
        character = row['Character']
        tour_info = eval(row['Tour Info'])  # Assuming this is a string representation of a dictionary
        housing = eval(row['Housing Recommendations'])  # Assuming this is a string representation of a list
        location = tour_info.get('Location', 'Unknown')  # Add a location field in your data if it exists
        
        # Translation dictionary
        translations = {
            "en": {
                "Can you tell me about the tour?": "Can you tell me about the '{tour_name}' tour?",
                "What kind of tour is it?": "What kind of tour is '{tour_name}'?",
                "Where is the tour located?": "Where is the '{tour_name}' tour located?",
                "What can I expect on the tour?": "What can I expect on the '{tour_name}' tour?",
                "What is the terrain like on the tour?": "What is the terrain like on the '{tour_name}' tour?",
                "Are there any accommodations recommended?": "Are there any accommodations recommended for the '{tour_name}' tour?",
                "Where can I stay if I take the tour?": "Where can I stay if I take the '{tour_name}' tour?",
                "Can you recommend a tour near": "Can you recommend a tour near {location}?",
                "I am looking for a tour around": "I am looking for a tour around {location}, any suggestions?",
                "What are some good hiking tours in the": "What are some good hiking tours in the {region}?",
                "Are there any easy hikes in the": "Are there any easy hikes in the {region}?"
            },
            "de": {
                "Can you tell me about the tour?": "Können Sie mir etwas über die '{tour_name}' Tour erzählen?",
                "What kind of tour is it?": "Was für eine Tour ist '{tour_name}'?",
                "Where is the tour located?": "Wo befindet sich die '{tour_name}' Tour?",
                "What can I expect on the tour?": "Was kann ich auf der '{tour_name}' Tour erwarten?",
                "What is the terrain like on the tour?": "Wie ist das Gelände auf der '{tour_name}' Tour?",
                "Are there any accommodations recommended?": "Gibt es Empfehlungen für Unterkünfte für die '{tour_name}' Tour?",
                "Where can I stay if I take the tour?": "Wo kann ich übernachten, wenn ich die '{tour_name}' Tour mache?",
                "Can you recommend a tour near": "Können Sie eine Tour in der Nähe von {location} empfehlen?",
                "I am looking for a tour around": "Ich suche eine Tour in der Nähe von {location}, haben Sie Vorschläge?",
                "What are some good hiking tours in the": "Was sind einige gute Wandertouren in der {region}?",
                "Are there any easy hikes in the": "Gibt es einfache Wanderungen in der {region}?"
            },
            "it": {
                "Can you tell me about the tour?": "Puoi parlarmi del tour '{tour_name}'?",
                "What kind of tour is it?": "Che tipo di tour è '{tour_name}'?",
                "Where is the tour located?": "Dove si trova il tour '{tour_name}'?",
                "What can I expect on the tour?": "Cosa posso aspettarmi dal tour '{tour_name}'?",
                "What is the terrain like on the tour?": "Com'è il terreno durante il tour '{tour_name}'?",
                "Are there any accommodations recommended?": "Ci sono alloggi raccomandati per il tour '{tour_name}'?",
                "Where can I stay if I take the tour?": "Dove posso soggiornare se faccio il tour '{tour_name}'?",
                "Can you recommend a tour near": "Puoi consigliarmi un tour vicino a {location}?",
                "I am looking for a tour around": "Sto cercando un tour vicino a {location}, qualche suggerimento?",
                "What are some good hiking tours in the": "Quali sono alcuni buoni tour escursionistici nella {region}?",
                "Are there any easy hikes in the": "Ci sono escursioni facili nella {region}?"
            }
        }
        
        data = [
            {
                "question": translations[lang]["Can you tell me about the tour?"].format(tour_name=tour_name),
                "answer": description
            },
            {
                "question": translations[lang]["What kind of tour is it?"].format(tour_name=tour_name),
                "answer": f"{tour_name} is a {tour_info.get('Tourenkategorie', 'tour')}."
            },
            {
                "question": translations[lang]["Where is the tour located?"].format(tour_name=tour_name),
                "answer": f"The '{tour_name}' tour is located in the {tour_info.get('Gebirgsgruppe', 'region')}."
            },
            {
                "question": translations[lang]["What can I expect on the tour?"].format(tour_name=tour_name),
                "answer": character
            },
            {
                "question": translations[lang]["What is the terrain like on the tour?"].format(tour_name=tour_name),
                "answer": "The terrain is gentle, suitable for a comfortable hike."
            },
            {
                "question": translations[lang]["Are there any accommodations recommended?"].format(tour_name=tour_name),
                "answer": f"Yes, one recommended accommodation is {housing[0]['Title'] if housing else 'not specified'}."
            },
            {
                "question": translations[lang]["Where can I stay if I take the tour?"].format(tour_name=tour_name),
                "answer": f"You can stay at {housing[0]['Title'] if housing else 'not specified'} if you take the '{tour_name}' tour."
            },
            {
                "question": translations[lang]["Can you recommend a tour near"].format(location=location),
                "answer": f"You can consider the '{tour_name}' tour which is located near {location}."
            },
            {
                "question": translations[lang]["I am looking for a tour around"].format(location=location),
                "answer": f"You can try the '{tour_name}' tour which is near {location}."
            },
            {
                "question": translations[lang]["What are some good hiking tours in the"].format(region=tour_info.get('Gebirgsgruppe', 'region')),
                "answer": f"One good hiking tour in the {tour_info.get('Gebirgsgruppe', 'region')} is the '{tour_name}' tour."
            },
            {
                "question": translations[lang]["Are there any easy hikes in the"].format(region=tour_info.get('Gebirgsgruppe', 'region')),
                "answer": f"Yes, the '{tour_name}' is an easy hike in the {tour_info.get('Gebirgsgruppe', 'region')}."
            }
        ]
        
        return data

    # Applying the function to the dataset
    for lang in languages:
        for _, row in data.iterrows():
            formatted_data.extend(format_row(row, lang))
    
    # Convert formatted data into a DataFrame
    formatted_df = pd.DataFrame(formatted_data)
    return formatted_df

# Use the function to create the DataFrame
file_path = '../Data/tour_info.csv'
languages = ['en', 'de', 'it']  # Add more languages as needed
formatted_df = create_multilingual_fine_tuning_dataframe(file_path, languages)

# Save the DataFrame to a CSV file (optional)
formatted_df.to_csv('../Data/multilingual_formatted_tour_info_for_fine_tuning.csv', index=False)

formatted_df.head()


Unnamed: 0,question,answer
0,"Can you tell me about the '""Weg der Sinne"" am ...",Gemütliche Wanderung in den Tuxer Alpen mit sc...
1,"What kind of tour is '""Weg der Sinne"" am Hochp...","""Weg der Sinne"" am Hochpillberg in Tirol is a ..."
2,"Where is the '""Weg der Sinne"" am Hochpillberg ...","The '""Weg der Sinne"" am Hochpillberg in Tirol'..."
3,"What can I expect on the '""Weg der Sinne"" am H...",Charakter: Auf dieser gemütliche Wanderung gib...
4,"What is the terrain like on the '""Weg der Sinn...","The terrain is gentle, suitable for a comforta..."
