In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load the sales data to get VINs
sales_df = pd.read_csv('tesla_sales_data_with_vin.csv')

In [None]:
# Generate vehicle usage data
np.random.seed(42)
usage_data = []

for _, row in sales_df.iterrows():
    # Calculate days since purchase
    days_since_purchase = (pd.Timestamp('2023-12-31') - pd.Timestamp(row['Date'])).days

    # Generate usage data with more variation
    usage_data.append({
        'VIN': row['VIN'],
        'AverageDailyMileage': np.random.gamma(shape=2, scale=30),  # More realistic distribution
        'ChargingFrequency': np.random.gamma(shape=2, scale=1.5),  # times per week
        'SuperchargerUsage': np.random.gamma(shape=1, scale=3),  # times per month
        'AutopilotUsage': np.random.beta(2, 5) * 100,  # percentage of total drive time, skewed towards lower usage
        'AnnualMileage': np.random.gamma(shape=5, scale=3000),
        'TotalMileage': np.random.gamma(shape=5, scale=3000) * (days_since_purchase / 365)  # Total mileage based on car age
    })

usage_df = pd.DataFrame(usage_data)

In [None]:
# Round numerical columns
usage_df['AverageDailyMileage'] = usage_df['AverageDailyMileage'].round(1)
usage_df['ChargingFrequency'] = usage_df['ChargingFrequency'].round(1)
usage_df['SuperchargerUsage'] = usage_df['SuperchargerUsage'].round(1)
usage_df['AutopilotUsage'] = usage_df['AutopilotUsage'].round(1)
usage_df['AnnualMileage'] = usage_df['AnnualMileage'].round()
usage_df['TotalMileage'] = usage_df['TotalMileage'].round()

usage_df.to_csv('tesla_vehicle_usage_data.csv', index=False)
print("Vehicle usage data saved to tesla_vehicle_usage_data.csv")