Generates daily csv files organized in year month folders, e.g.
resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220121.csv

In [8]:
import random
import pandas as pd
from datetime import datetime, timedelta
import os

start_date = '2022-01-01'
end_date = '2025-03-31'
num_properties = 100  # Number of properties
value_change_probability = 0.04  # 4% chance of property value changing each day
label_change_probability = 0.002  # 0.2% chance of energy label changing each day

# Convert dates to datetime objects
start_date = datetime.strptime(start_date, '%Y-%m-%d')
end_date = datetime.strptime(end_date, '%Y-%m-%d')

# Predefined lists of cities and street names
cities = [
    "Charlotte", "Raleigh", "Greensboro", "Durham", "Winston-Salem",
    "Fayetteville", "Cary", "Wilmington", "High Point", "Asheville",
    "Concord", "Gastonia", "Chapel Hill", "Rocky Mount", "Burlington",
    "Huntersville", "Wilson", "Kannapolis", "Hickory", "Apex",
    "Goldsboro", "Indian Trail", "Mooresville", "Monroe", "Sanford"
]

street_names = [
    "Main Street", "Elm Street", "Oak Street", "Maple Street", "Pine Street",
    "Cedar Street", "Church Street", "High Street", "Park Avenue", "Broad Street",
    "Walnut Street", "Chestnut Street", "Hickory Street", "Willow Street", "Ash Street",
    "Birch Street", "Magnolia Street", "Sycamore Street", "Poplar Street", "Spruce Street",
    "River Road", "Lakeview Drive", "Sunset Drive", "Meadow Lane", "Hillcrest Drive",
    "Forest Drive", "Ridge Road", "Valley Road", "Spring Street", "Summer Street",
    "Autumn Lane", "Winter Street", "Carolina Avenue", "Blue Ridge Road", "Appalachian Drive",
    "Cape Fear Road", "Tar Heel Drive", "Wilmington Street", "Raleigh Road", "Durham Street",
    "Charlotte Avenue", "Greensboro Street", "Asheville Road", "Fayetteville Street", "Chapel Hill Road",
    "Goldsboro Drive", "Hickory Lane", "Rocky Mount Road", "Gastonia Street", "Concord Avenue"
]   

categories = ["Shop", "Office", "Warehouse", "Workshop"]
energy_labels = ["A", "B", "C", "D", "E", "F"]

# Generate static property data
properties = []
for i in range(num_properties):
    property_id = f"P{i+1:03d}"  # Unique property ID
    street = random.choice(street_names)
    street_number = random.randint(1, 999)
    city = random.choice(cities)
    zip_code = f"{random.randint(27000, 28999)}"  # North Carolina zip code range
    category = random.choice(categories)
    initial_value = round(random.uniform(50000, 500000), 2)  # Random initial value
    initial_label = random.choice(energy_labels)  # Random initial energy label
    properties.append({
        "property_id": property_id,
        "street": street,
        "street_number": street_number,
        "city": city,
        "zip_code": zip_code,
        "category": category,
        "initial_value": initial_value,
        "energy_label": initial_label
    })

# Generate daily snapshot csv
current_date = start_date
while current_date <= end_date:
    data = []
    for prop in properties:
        # Decide whether the property value changes
        if random.random() < value_change_probability:
            # Change the value slightly (e.g., +/- 5%)
            change_factor = random.uniform(0.98, 1.06)
            prop["initial_value"] = round(prop["initial_value"] * change_factor, 2)
        
        # Decide whether the energy label changes
        if random.random() < label_change_probability:
            prop["energy_label"] = random.choice(energy_labels)
        
        # Append the snapshot to the data
        data.append({
            "property_id": prop["property_id"],
            "street": prop["street"],
            "street_number": prop["street_number"],
            "city": prop["city"],
            "zip_code": prop["zip_code"],
            "category": prop["category"],
            "property_value": prop["initial_value"],
            "energy_label": prop["energy_label"]
        })

    file_path = f"../resources/generated/commercial_property/{current_date.strftime('%Y')}/{current_date.strftime('%m')}/commercial_property_snapshot_{current_date.strftime('%Y%m%d')}.csv"
    print(file_path)
    # Extract the directory from the file path
    directory = os.path.dirname(file_path)
    # Check if the directory exists
    if not os.path.exists(directory):
        # Create the directory and all intermediate levels if they don't exist
        os.makedirs(directory)
        print(f"Directory '{directory}' created.")
 
    # save the daily snapshot as csv
    pd.DataFrame(data).to_csv(file_path, index=False)

    # Move to the next day
    current_date += timedelta(days=1)


../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220101.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220102.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220103.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220104.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220105.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220106.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220107.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220108.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220109.csv
../resources/generated/commercial_property/2022/01/commercial_property_snapshot_20220110.csv
../resources/generated/commercial_property/2022/01/commercial_property