In [3]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Define the group levels and the tags
group_tags = {
    "1": ["beauty", "business", "entrepreneurship"],
    "2": ["diy", "crafts", "travel"],
    "3": ["fashion", "gaming"],
    "4": ["education", "learning", "art"],
    "5": ["music", "fitness"],
    "6": ["cars", "automobiles", "parenting", "family"],
    "7": ["nature", "travel"]
}

# Dictionary to map tags to fancy words for event names
fancy_name_components = {
    "beauty": ["Glamour", "Radiance", "Elegance", "Chic"],
    "business": ["Summit", "Venture", "Innovate", "Enterprise"],
    "entrepreneurship": ["Startup", "Visionary", "Pioneer", "Forge"],
    "diy": ["Craft", "Maker", "Artisan", "Create"],
    "crafts": ["Handmade", "Artistry", "Crafted", "Workshop"],
    "travel": ["Odyssey", "Journey", "Explorer", "Wanderlust"],
    "fashion": ["Couture", "Runway", "Style", "Vogue"],
    "gaming": ["Quest", "Arena", "Epic", "Virtual"],
    "education": ["Academy", "Insight", "Knowledge", "Learn"],
    "learning": ["Discovery", "Enlighten", "Scholar", "Mind"],
    "art": ["Canvas", "Gallery", "Masterpiece", "Creative"],
    "music": ["Harmony", "Rhythm", "Melody", "Concert"],
    "fitness": ["Vitality", "Pulse", "Energy", "FitFest"],
    "cars": ["Motor", "Drive", "Auto", "Speed"],
    "automobiles": ["Cruise", "Rally", "Gear", "Road"],
    "parenting": ["Family", "Nurture", "Kin", "Together"],
    "family": ["Legacy", "Bond", "Clan", "Gathering"],
    "nature": ["Wild", "Serenity", "Eco", "Bloom"]
}

# Generate mock venue data (inline for self-contained script)
def generate_mock_venues(num_venues=20):
    # Define cities and realistic street names for addresses
    city_streets = {
        "Manchester": [
            "Oxford Road", "Deansgate", "Piccadilly", "Whitworth Street", "Portland Street",
            "Princess Street", "Canal Street", "Mosley Street", "Market Street", "King Street"
        ],
        "Stockport": [
            "Merseyway", "Great Underbank", "Stockport Road", "Wellington Road", "Bramhall Lane",
            "Heaton Lane", "Shaw Heath", "Greek Street", "Churchgate", "Tiviot Dale"
        ],
        "Bolton": [
            "Deane Road", "Bradshawgate", "Chorley Old Road", "Hallwell Road", "Derby Street",
            "Blackburn Road", "St. Georges Road", "Victoria Square", "Newport Street", "Mawdsley Street"
        ],
        "Liverpool": [
            "Bold Street", "Lark Lane", "Sefton Street", "Hope Street", "Renshaw Street",
            "Berry Street", "London Road", "Dale Street", "Castle Street", "Mathew Street"
        ],
        "Preston": [
            "Fishergate", "Church Street", "Friargate", "Lancaster Road", "Deepdale Road",
            "Ribbleton Lane", "Blackpool Road", "Guildhall Street", "Avenham Street", "Winckley Street"
        ]
    }
    venue_types = ["Online", "In-Person", "Hybrid"]
    venue_name_components = [
        "Starlight", "Unity", "Horizon", "Pinnacle", "Crest", "Nexus", "Beacon", "Vanguard",
        "Serenity", "Empyrean", "Oasis", "Summit", "Arcadia", "Vista", "Harmony"
    ]
    venue_suffixes = ["Hall", "Centre", "Lodge", "Studio", "Arena", "Pavilion", "Hub", "Venue"]

    def generate_venue_name():
        return f"{random.choice(venue_name_components)} {random.choice(venue_suffixes)}"

    def generate_address(city):
        street = random.choice(city_streets[city])
        street_number = random.randint(1, 100)
        postal_codes = {
            "Manchester": f"M{random.randint(1, 60)} {random.randint(1, 9)}{chr(random.randint(65, 90))}{chr(random.randint(65, 90))}",
            "Stockport": f"SK{random.randint(1, 8)} {random.randint(1, 9)}{chr(random.randint(65, 90))}{chr(random.randint(65, 90))}",
            "Bolton": f"BL{random.randint(1, 7)} {random.randint(1, 9)}{chr(random.randint(65, 90))}{chr(random.randint(65, 90))}",
            "Liverpool": f"L{random.randint(1, 36)} {random.randint(1, 9)}{chr(random.randint(65, 90))}{chr(random.randint(65, 90))}",
            "Preston": f"PR{random.randint(1, 8)} {random.randint(1, 9)}{chr(random.randint(65, 90))}{chr(random.randint(65, 90))}"
        }
        return f"{street_number} {street}, {city}, {postal_codes[city]}"

    venues = {"venue_id": [], "name": [], "type": [], "city": [], "address": []}
    cities = ["Manchester", "Stockport", "Bolton", "Liverpool", "Preston"]
    for i in range(num_venues):
        venues["venue_id"].append(i + 1)
        venues["name"].append(generate_venue_name())
        venues["type"].append(random.choice(venue_types))
        city = random.choice(cities)
        venues["city"].append(city)
        venues["address"].append("Online" if venues["type"][-1] == "Online" else generate_address(city))
    
    return pd.DataFrame(venues)

# Generate fancy event name based on tags
def generate_fancy_event_name(tags):
    if not tags:
        return "Generic Event"
    selected_tags = random.sample(tags, min(len(tags), 2))
    name_parts = [random.choice(fancy_name_components[tag]) for tag in selected_tags]
    suffixes = ["Festival", "Expo", "Gala", "Summit", "Experience", "Showcase", "Retreat", "Conclave"]
    return " ".join(name_parts) + " " + random.choice(suffixes)

# Generate ticket price based on tags
def generate_ticket_price(tags):
    high_price_tags = ["travel", "fashion", "education", "cars", "automobiles"]
    has_high_price_tag = any(tag in high_price_tags for tag in tags)
    return round(random.uniform(50.00, 200.00), 2) if has_high_price_tag else round(random.uniform(10.00, 50.00), 2)

# Generate random tags for an event
def generate_tags():
    num_groups = random.randint(1, 3)
    selected_groups = random.sample(list(group_tags.keys()), num_groups)
    tags = []
    for group in selected_groups:
        num_tags = random.randint(1, 2)
        tags.extend(random.sample(group_tags[group], min(num_tags, len(group_tags[group]))))
    return tags

# Generate mock event data
def generate_mock_events(num_events=100, venues_df=None):
    if venues_df is None:
        venues_df = generate_mock_venues()  # Generate venues if not provided
    
    events = {
        "event_id": [],
        "event_name": [],
        "event_date": [],
        "location": [],
        "ticket_price": [],
        "venue_id": [],
        "tags": []
    }
    
    for i in range(num_events):
        events["event_id"].append(i + 1)
        tags = generate_tags()
        events["event_name"].append(generate_fancy_event_name(tags))
        events["event_date"].append(
            (datetime.now() + timedelta(days=random.randint(1, 30))).strftime("%Y-%m-%d")
        )
        # Sample a venue and align location with venue's city
        venue = venues_df.sample(1).iloc[0]
        events["venue_id"].append(venue["venue_id"])
        events["location"].append(venue["city"])
        events["ticket_price"].append(generate_ticket_price(tags))
        events["tags"].append(tags)
    
    return pd.DataFrame(events)

# Generate venue data
mock_venues = generate_mock_venues(35)

# Generate 100 mock events, passing the venue data
mock_events = generate_mock_events(100, mock_venues)

In [9]:
mock_events.head()

Unnamed: 0,event_id,event_name,event_date,location,ticket_price,venue_id,tags
0,1,Auto Innovate Showcase,2025-06-03,Bolton,106.96,3,"[business, cars, family]"
1,2,Maker Journey Experience,2025-06-03,Manchester,131.52,32,"[travel, diy, nature, travel, cars]"
2,3,Bond Mind Showcase,2025-05-29,Preston,41.78,8,"[learning, nature, family]"
3,4,Innovate Auto Festival,2025-06-20,Bolton,106.27,26,"[travel, business, entrepreneurship, cars]"
4,5,Crafted Forge Conclave,2025-06-01,Manchester,47.55,25,"[beauty, entrepreneurship, fitness, crafts]"


In [11]:
mock_events.to_csv('mock_events.csv')

In [8]:
# Display the first 5 rows for preview
# print("First 5 rows of the mock event data:")
mock_events.head()

# Display the last 5 rows for preview
print("\nLast 5 rows of the mock event data:")
# print(mock_events.tail(5))


Last 5 rows of the mock event data:


In [12]:
mock_venues.head()

Unnamed: 0,venue_id,name,type,city,address
0,1,Oasis Lodge,In-Person,Stockport,"93 Merseyway, Stockport, SK4 4PG"
1,2,Crest Pavilion,Hybrid,Liverpool,"41 Lark Lane, Liverpool, L12 9SQ"
2,3,Horizon Lodge,Hybrid,Bolton,"29 Victoria Square, Bolton, BL1 6YR"
3,4,Unity Pavilion,In-Person,Bolton,"62 Chorley Old Road, Bolton, BL4 4TO"
4,5,Pinnacle Pavilion,In-Person,Manchester,"31 Oxford Road, Manchester, M44 4VZ"


In [7]:
# Display the first 5 rows for preview
print("First 5 rows of the mock event data:")
print(mock_venues.head())

# Display the last 5 rows for preview
print("\nLast 5 rows of the mock event data:")
print(mock_venues.tail())

First 5 rows of the mock event data:
   venue_id               name       type        city  \
0         1        Crest Lodge  In-Person   Liverpool   
1         2          Vista Hub  In-Person   Stockport   
2         3  Vanguard Pavilion     Hybrid  Manchester   
3         4       Pinnacle Hub     Online     Preston   
4         5   Harmony Pavilion     Hybrid     Preston   

                               address  
0    10 London Road, Liverpool, L9 2VU  
1   99 Heaton Lane, Stockport, SK5 7YH  
2   1 King Street, Manchester, M28 3GE  
3                               Online  
4  29 Lancaster Road, Preston, PR5 4KE  

Last 5 rows of the mock event data:
    venue_id            name       type        city  \
30        31   Vanguard Hall     Hybrid  Manchester   
31        32  Oasis Pavilion  In-Person     Preston   
32        33     Crest Arena  In-Person  Manchester   
33        34  Empyrean Lodge  In-Person     Preston   
34        35   Horizon Arena     Hybrid   Liverpool   

      

In [9]:
# Save to CSV 
mock_events.to_csv("mock_events.csv", index=False)
mock_venues.to_csv("mock_venues.csv", index=False)
