In [1]:
import os
import pandas as pd
import ast

# List of tags to keep
travel_tags = [
    "viewpoint",
    "attraction",
    "castle",
    "fort",
    "tomb",
    "ruins",
    "archaeological_site",
    "monument",
    "memorial",
    "artwork",
    "museum",
    "gallery",
    "park",
    "garden",
    "nature_reserve",
    "playground",
    "theme_park",
    "water_park",
    "miniature_golf",
    "swimming_pool",
    "sauna",
    "zoo",
    "aquarium",
    "cinema",
    "theatre",
    "cafe",
    "bar",
    "pub",
    "ice_cream",
    "restaurant",
    "fast_food",
    "hotel",
    "hostel",
    "guest_house",
    "chalet",
    "camp_site",
    "picnic_site",
    "fountain",
    "city_gate",
    "tower",
    "clock",
    "citywalls",
    "marina",
    "ferry_terminal",
    "ship",
    "aqueduct",
    "sports_centre",
    "stadium",
    "fitness_centre",
    "golf_course",
    "track",
    "pitch"
]

# Define the path to the input CSV file
input_file = os.path.join("datatest", "split", "clean_values.csv")

# Define the path to the output CSV file
output_file = os.path.join("great_data", "filtered_tags.csv")
os.makedirs(os.path.dirname(output_file), exist_ok=True)

# Load the CSV data
try:
    df = pd.read_csv(input_file)
except Exception as e:
    print(f"Error loading file {input_file}: {e}")
    exit(1)

# Check if 'tags' column exists
if 'tags' in df.columns:
    # Function to filter tags
    def filter_tags(tag_str):
        try:
            # Convert the tag string to a list
            tags = ast.literal_eval(tag_str)
            if isinstance(tags, list):
                # Keep only tags that are in the travel_tags list
                filtered = [tag for tag in tags if tag in travel_tags]
                return filtered if filtered else None  # Return None if list is empty
        except Exception:
            return None
        return None

    # Apply the filtering to the tags column
    df['tags'] = df['tags'].apply(filter_tags)

    # Drop rows where tags are now empty
    filtered_df = df.dropna(subset=['tags'])

    # Save the filtered DataFrame to a new CSV file
    filtered_df.to_csv(output_file, index=False)
    print(f"Filtered tags saved to {output_file}.")

else:
    print("'tags' column not found in the CSV file.")

Filtered tags saved to great_data\filtered_tags.csv.


In [None]:
import os
import pandas as pd
import ast

# Specify the file path
data_file = os.path.join("great_data", "filtered_tags.csv")

if not os.path.exists(data_file):
    print(f"File not found: {data_file}")
else:
    # Read the CSV file
    data = pd.read_csv(data_file)
    df = pd.DataFrame(data)

    if 'tags' in df.columns:
        # Convert the tags from string to list and flatten them
        all_tags = df['tags'].apply(lambda x: ast.literal_eval(x)).explode()
        # Get unique tags
        unique_tags = all_tags.unique()

        # Create a DataFrame with the unique tags
        unique_tags_df = pd.DataFrame(unique_tags, columns=['tags'])

        # Create the new folder if it doesn't exist
        output_dir = os.path.join("great_data")
        os.makedirs(output_dir, exist_ok=True)

        # Save the unique tags to a CSV file
        tags_file = os.path.join(output_dir, "tags.csv")
        unique_tags_df.to_csv(tags_file, index=False)

        print(f"Unique tags saved to: {tags_file}")
    else:
        print("'tags' column not found in the CSV file.")

Unique tags saved to: great_data\tags.csv


In [5]:
import json
import re
from llamaapi import LlamaAPI

# Initialize the LlamaAPI SDK
llama = LlamaAPI("2048ca26-30cc-4c2e-ad95-fe45e7e1b482")

tags_file = os.path.join("great_data", "tags.csv")

if not os.path.exists(tags_file):
    print(f"File not found: {tags_file}")
    existing_tags = []
else:
    # Read the CSV file into a DataFrame
    tags_df = pd.read_csv(tags_file)

    # Check if 'tags' column exists and extract the tags as a list
    if 'tags' in tags_df.columns:
        existing_tags = tags_df['tags'].tolist()
    else:
        print("'tags' column not found in the CSV file.")
        existing_tags = []

# Hardcoded prompt for now
prompt = "I want a place that i could chill and watch the sun rise"

# Convert the list to a formatted string (e.g., comma-separated)
existing_tags_str = ", ".join(existing_tags) if existing_tags else "None"

api_request_json = {
    "model": "llama3.1-70b",
    "messages": [
        {
            "role": "system",
            "content": (
                "You are an AI specialized in location tagging. "
                f"Here are the existing tags: {existing_tags_str}. "
                "Your task is to analyze the user's prompt and see if any of the existing tags match. "
                "If they don't fully capture the essence of the prompt, generate new tags that better fit the user's request. "
                "Return the result strictly in JSON format with two arrays: 'existed_tags' for matched tags and 'new_tag' for newly generated ones."
            )
        },
        {
            "role": "user",
            "content": f"Analyze this prompt and extract tags: '{prompt}'"
        }
    ],
    "functions": [
        {
            "name": "extract_location_tags",
            "description": (
                "Extract the most relevant tags based on the user's prompt. "
                "First, compare the prompt with the existing tags. If any existing tag (or its synonym) matches the prompt, return that tag. "
                "If no perfect match exists, generate new tags that are unique and non-redundant. "
                "Do not create synonyms or variations of existing tags when generating new tags."
            ),
            "parameters": {
                "type": "object",
                "properties": {
                    "existed_tags": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "List of tags that match the existing tags"
                    },
                    "new_tag": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "List of new tags generated from the prompt"
                    }
                },
                "required": ["existed_tags", "new_tag"]
            }
        }
    ],
    "function_call": "extract_location_tags",
    "max_tokens": 200,
    "temperature": 0.2,
    "top_p": 0.9,
    "frequency_penalty": 0.8,
    "presence_penalty": 0.3,
    "stream": False
}


# Execute the request and print the formatted JSON response
response = llama.run(api_request_json)

# Assuming 'response' is a Response object, use .json() method to get the raw data
response_data = response.json()

# Extract the content from the response
content = response_data['choices'][0]['message']['content']

# Displaying the content
print("Response Content START:")
print(content)
print("Response Content END:")
# Optionally, extract the JSON part from the content
match = re.search(r'```json\s*(\{.*\})\s*```', content, re.DOTALL)
if match:
    json_str = match.group(1)  # Extract only the JSON part
    tags_data = json.loads(json_str)  # Parse the JSON
    print("Extracted Tags Data:")
    print(json.dumps(tags_data, indent=2))
else:
    print("No valid JSON found in the response.")

Response Content START:
```json
{
  "existed_tags": ["viewpoint"],
  "new_tags": ["sunrise_spot", "relaxation_area"]
}
```

In this analysis, the existing tag 'viewpoint' is matched as it implies a location with a scenic view, which could be suitable for watching the sunrise. However, to better capture the essence of the prompt, two new tags are generated: 'sunrise_spot' to specifically denote a location known for its sunrise views and 'relaxation_area' to convey a place where one can unwind and chill.
Response Content END:
Extracted Tags Data:
{
  "existed_tags": [
    "viewpoint"
  ],
  "new_tags": [
    "sunrise_spot",
    "relaxation_area"
  ]
}
