In [7]:
# this code is for the retrieval of the data from the api bikereg
import requests
from datetime import datetime, timezone, timedelta
import pandas as pd
from bs4 import BeautifulSoup

# Configuration
BASE_URL = "http://www.BikeReg.com/api/search"  # Replace with the actual API endpoint


def parse_event_notes(event_notes):
    """
    Parse and clean the HTML content of EventNotes.

    Args:
        event_notes (str): The raw HTML content of EventNotes.

    Returns:
        str: Cleaned and readable text extracted from the HTML.
    """
    # Parse the HTML content
    if not event_notes:
        return ""
    soup = BeautifulSoup(event_notes, "html.parser")

    # Extract text while preserving meaningful structure
    cleaned_text = soup.get_text(separator="\n").strip()

    return cleaned_text

def convert_date_predefined(date_string):
    # Extract the timestamp and timezone offset
    import re
    match = re.match(r"/Date\((\d+)([+-]\d{4})\)/", date_string)
    if not match:
        raise ValueError("Invalid date format")

    timestamp_ms = int(match.group(1))  # Extract the Unix timestamp in milliseconds
    timezone_offset = match.group(2)   # Extract the timezone offset, e.g., -0500

    # Convert timestamp to seconds
    timestamp_s = timestamp_ms / 1000

    # Parse the timezone offset
    offset_hours = int(timezone_offset[:3])  # First 3 characters, e.g., -05
    offset_minutes = int(timezone_offset[0] + timezone_offset[3:])  # Sign + last 2 digits
    offset = timedelta(hours=offset_hours, minutes=offset_minutes)

    # Create a timezone-aware datetime
    utc_time = datetime.fromtimestamp(timestamp_s, tz=timezone.utc)
    local_time = utc_time + offset

    # Return as a readable string
    return local_time.strftime("%Y-%m-%d %H:%M:%S")

def get_events_near_location(latitude, longitude, radius=50):
    """
    Retrieve cycling events near a specific location without requiring an API key.

    Parameters:
        latitude (float): Latitude of the user's location.
        longitude (float): Longitude of the user's location.
        radius (int): Search radius in miles. Default is 50 miles.

    Returns:
        list: A list of cycling events matching the criteria.
    """
    all_events = []
    startpage = 1
    page_size = 100  # Assuming each page retrieves 100 results
    while True:
        params = {
            "startpage": startpage,
            "Latitude": latitude,
            "Longitude": longitude,
            "Distance": radius
        }
        try:
            response = requests.get(BASE_URL, params=params)
            response.raise_for_status()

            data = response.json()
            events = data.get("MatchingEvents", [])

            # Add events from the current page to the aggregated list
            all_events.extend(events)

            # Move to the next page
            startpage += 1

        except requests.exceptions.RequestException as e:
            print(f"Error fetching events on page {startpage}: {e}")
            break

    return all_events

def get_full_data():
    """
    Retrieve all cycling events without requiring an API key.

    Returns:
        list: A list of all cycling events.
    """
    all_events = []
    startpage = 1
    page_size = 100  # Assuming each page retrieves 100 results
    while True:
        params = {
            "startpage": startpage
        }
        try:
            response = requests.get(BASE_URL, params=params)
            response.raise_for_status()

            data = response.json()
            events = data.get("MatchingEvents", [])

            # Add events from the current page to the aggregated list
            all_events.extend(events)

            # Check if the number of events is less than the page size (last page)
            if len(events) < page_size:
                break

            # Move to the next page
            startpage += 1

        except requests.exceptions.RequestException as e:
            print(f"Error fetching events on page {startpage}: {e}")
            break

    return all_events

# Example Usage
if __name__ == "__main__":

    events = get_full_data()
    if events:
        transformed_events = []
        for event in events:
                readable_date = convert_date_predefined(event["EventDate"])
                transformed_events.append({
                    "Name": event["EventName"],
                    "Start_Date": convert_date_predefined(event['EventDate']),
                    "End_Date": convert_date_predefined(event['EventEndDate']),
                    "Location": event["EventAddress"],
                    "City": event["EventCity"],
                    "State": event["EventState"],
                    "Zip": event["EventZip"],
                    "Details": event["EventUrl"],
                    "Notes": parse_event_notes(event["EventNotes"]),
                    "Type": ", ".join(event.get("EventTypes", []))
                })

        # Create a DataFrame
        df = pd.DataFrame(transformed_events)
        df.fillna("", inplace=True)
        # optional
        df.to_csv("events.csv", index=False)

In [8]:
# this code is the code to ingest the data into vectara
import pandas as pd
import requests
import json

# Configuration
df = pd.read_csv("Events.csv")  # Path to the DataFrame file
vectara_key = "zut__oZQMNcHTl6aadKpWhXfw_236BWI5bEBAg_rfg"
corpus_key = "Events"

# Function to send the data to Vectara
def send_to_vectara():
    url = f"https://api.vectara.io/v2/corpora/{corpus_key}/documents"
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json',
        'x-api-key': vectara_key
    }

    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        # Construct the payload
        payload = {
            "id": f"event_{index + 1}",  # Unique ID for the document
            "type": "core",
            "metadata": {
                "EventName": row['Name'],
                "Start_Date": str(row['Start_Date']),
                "End_Date": str(row['End_Date']),
                "Location": row['Location'],
                "City": row['City'],
                "State": row['State'],
                "Zip": str(row['Zip']),
                "Longitude": row['Longitude'],
                "Latitude": row['Latitude'],
                "Details": row['Details'],
                "Notes": row['Notes'],
                "Type": row['Type']
            },
            "document_parts": [
                {
                    "text": (
                        f"EventName: {row['Name']}\n"
                        f"Start_Date: {str(row['Start_Date'])}\n"
                        f"End_Date: {str(row['End_Date'])}\n"
                        f"Location: {row['Location']}\n"
                        f"City: {row['City']}\n"
                        f"State: {row['State']}\n"
                        f"Zip: {str(row['Zip'])}\n"
                        f"Longitude: {row['Longitude']}\n"
                        f"Latitude: {row['Latitude']}\n"
                        f"Details: {row['Details']}\n"
                        f"Notes: {row['Notes']}\n"
                        f"Type: {row['Type']}"
                    ),
                    "metadata": {},
                    "context": "string",
                    "custom_dimensions": {}
                }
            ]
        }

        try:
            # Send the POST request to Vectara
            response = requests.post(url, headers=headers, json=payload)
            response.raise_for_status()
            print(f"Document {row['Name']} added successfully.")
        except requests.exceptions.RequestException as e:
            print(f"Error adding document {row['Name']}: {e}")

# Execute the function to send data to Vectara
send_to_vectara()


Error adding document Skylands Cycling 2024 Membership: Out of range float values are not JSON compliant
Document 2024 Galbraith Gravity Racing Team Spring Session Membership Drive added successfully.
Document NVP 2024 Membership and Waiver added successfully.
Document River Valley Cycling 2024 Membership added successfully.
Document Capital Bicycle Racing Club Membership 2024 added successfully.
Document Kids Bike League - 2024 Effingham Events added successfully.
Error adding document 2024 State 9 Racing Team Membership: Out of range float values are not JSON compliant
Document Dry Hill MicroShuttle 2024 added successfully.
Error adding document Mohawk Valley Bicycling Club (MVBC) 2024 Membership: Out of range float values are not JSON compliant
Error adding document 2024 Minuteman Road Club Membership: Out of range float values are not JSON compliant
Error adding document Spring City Spinners Membership: Out of range float values are not JSON compliant
Error adding document 2024 Mon

In [64]:
import requests
import json
from urllib.parse import quote

def text_to_url_query(text):
    """
    Convert plain text into URL query text.
    """
    # Convert text to URL-encoded format
    query_text = quote(text)
    return query_text

def ask_query(query):
    q = text_to_url_query(query)

    payload=json.dumps(
        {
        "query": query,
        "search": {              
          "metadata_filter": "",
          "lexical_interpolation": 0.005,
          "custom_dimensions": {},
          "offset": 0,
          "limit": 25,
          "context_configuration": {
            "sentences_before": 2,
            "sentences_after": 2,
            "start_tag": "%START_SNIPPET%",
            "end_tag": "%END_SNIPPET%"
          },
          "reranker": {
            "type": "customer_reranker",
            "reranker_id": "rnk_272725719"
          }
        },
        "stream_response": False,
        "generation": {
          "generation_preset_name": "mockingbird-1.0-2024-07-16",
          "max_used_search_results": 5,
          # "max_response_characters": 4000,
          "response_language": "eng",
          "enable_factual_consistency_score": False,
          # "model_parameters": {
          #     "max_tokens": 4000,
          # }
        },
      }
    )
    headers = {
      'Accept': 'application/json',
      'x-api-key': vectara_key
    }

    url = f"https://api.vectara.io/v2/corpora/{corpus_key}/query?query={q}"
    
    response = requests.request("POST", url, headers=headers, data=payload)

    # print(response.text)

    output = json.loads(response.text)

    return output["summary"]

    # output = json.loads(response)

    # print(output["summary"])

In [65]:
prompt = "which events are best for beginners?"

ask_query(prompt)

"Based on the provided sources, the events that are best for beginners are:\n\n* Mountain Cross #1 [1] - This event has a 30-minute beginner race, which is shorter than the other races, and it's a great training race for those who have their eye on other races coming in the Spring/Summer.\n* The Lit Whizzy [3] - This event has a beginner and family-friendly 18-mile option, which is suitable for beginners.\n* Insomniac Night Rides-Series Races [5] - This event has a juniors category loop for ages 8-14, which is suitable for beginners.\n\nThese events are suitable for beginners because they have shorter distances, beginner-friendly options, and a more relaxed atmosphere."

In [37]:
p = "Tell me more about Mountain Cross #1"

ask_query(p)

{"summary":"Mountain Cross #1 is a mountain bike and cyclocross event held on January 11, 2025, at 1257 County Farm Rd, Bedford, VA 24523 [1]. The event features three races: a 30-minute beginner race, a 45-minute sport race, and a 60-minute expert race [1]. The course is approximately 2 miles long and includes single track, rocks, roots, grass paths, gravel, and pavement [1]. The event also offers food, awards, and discounts, and is suitable for riders of all ages and skill levels [1]. Registration is required, and riders can check in before the race and receive a free t-shirt [1].","response_language":"eng","search_results":[{"text":"%START_SNIPPET%EventName: Mountain Cross #1\nStart_Date: 1/11/25 0:00\nEnd_Date: 1/11/25 0:00\nLocation: 1257 County Farm Rd\nCity: Bedford\nState: VA\nZip: 24523\nLongitude: -79.5399003\nLatitude: 37.3123905\nDetails: http://www.BikeReg.com/mtx-1\nNotes: WHAT IS Mountain Cross / MTX?\n \nThe \"Mountain\" part refers to many of the same aspects that you 