In [2]:
import pandas as pd
import random
from faker import Faker

# Initialize Faker with Australian localization
fake = Faker("en_AU")

# Function to generate a realistic Australian vehicle registration number
def generate_vehicle_rego():
    return f"{random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ')}{random.randint(1000, 9999)}{random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ')}"

# Function to create a single EV user record
def generate_ev_user():
    user = {
        "UserFirstName": fake.first_name(),
        "UserSurname": fake.last_name(),
        "UserFullName": "",  # Placeholder for full name
        "UserEmailAddress": fake.email(),
        "UserPassword": fake.password(length=12),
        "UserRole": "user",
        "UserHomePostCode": fake.postcode(),
        "UserMobilePhoneNumber": fake.phone_number(),
        "UserAuthenticated": fake.boolean(),
        "UserCurrentVehicleRego": generate_vehicle_rego(),
        "LoyaltyMemberNumber": f"LM{random.randint(10000, 99999)}",
        "SessionBookingNumber": f"SB{random.randint(100000, 999999)}",
        "ChargeSessionId": f"CS{random.randint(100000, 999999)}",
        "AccountCreatedDate": fake.date_this_decade(),
        "VehicleMake": random.choice(["Tesla", "Nissan", "Hyundai", "BMW", "Mercedes"]),
        "VehicleModel": random.choice(["Model 3", "Leaf", "Ioniq", "i3", "EQC"]),
        "BatteryCapacity": random.choice([40, 50, 75, 100]),  # in kWh
        "VehicleType": "Car",
        "PreferredChargingTime": random.choice(["Morning", "Afternoon", "Night"]),
        "AverageChargingDuration": random.choice([30, 45, 60, 90]),  # in minutes
        "ChargingFrequency": random.randint(1, 7),  # sessions per week
        "PaymentMethod": random.choice(["Credit Card", "PayPal"]),
        "RewardsEarned": random.randint(0, 5000),  # Reward points
        "DiscountCodesUsed": random.sample(["DISCOUNT20", "SUMMER50", "WINTER10", "NEWUSER25"], k=random.randint(0, 2)),
        "FrequentChargingStations": random.sample(["Station A", "Station B", "Station C", "Station D"], k=random.randint(1, 3)),
        "AppUsageFrequency": random.randint(1, 30),  # Interactions per month
        "WorkplacePostcode": fake.postcode(),
        "TravelDistance": random.randint(50, 500),  # in km per week
        "LastChargeDate": fake.date_this_year(),
        "TotalEnergyConsumed": random.uniform(100, 1500),  # in kWh
        "CarbonEmissionsSaved": round(random.uniform(0.1, 2.0), 2),  # in tons
    }
    user["UserFullName"] = f"{user['UserFirstName']} {user['UserSurname']}"  # Generate full name
    return user

# Generate a dataset of EV users
def generate_ev_users_dataset(num_users):
    data = [generate_ev_user() for _ in range(num_users)]
    return pd.DataFrame(data)

# Generate 100 EV users and display the first few rows
num_users = 100
ev_users_df = generate_ev_users_dataset(num_users)
print(ev_users_df.head())

# Save to CSV
ev_users_df.to_csv("ev_users_test_data.csv", index=False)


  UserFirstName UserSurname   UserFullName           UserEmailAddress  \
0        Morgan       Blair   Morgan Blair   adamsrichard@example.com   
1       Kristin       Stone  Kristin Stone      kylelewis@example.org   
2         Duane     Nichols  Duane Nichols  lestermelissa@example.com   
3         Traci     Sanchez  Traci Sanchez   lucaswiggins@example.org   
4        Austin      Duncan  Austin Duncan        david58@example.org   

   UserPassword UserRole UserHomePostCode UserMobilePhoneNumber  \
0  SQ0gU3!bQ@jW     user             2427             7753-3742   
1  Qm6yFm8F)5z9     user             2245       +61-8-4735-0434   
2  &28yThgkU9Vv     user             2652             8926 8255   
3  +509$jnSW2Mf     user             2984        (08).1880.9968   
4  P^)3MhiL55%o     user             2619          03-8976-2182   

   UserAuthenticated UserCurrentVehicleRego  ... PaymentMethod RewardsEarned  \
0               True                 D5869Z  ...   Credit Card           375  