In [7]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [8]:
# Generate pseudo data for 4 years
np.random.seed(42)
start_date = datetime(2020, 1, 1)
end_date = datetime(2023, 12, 31)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
regions = ['North Europe', 'South Europe', 'Central Europe', 'UK']
products = ['Model S', 'Model 3', 'Model X', 'Model Y']

In [9]:
# Base prices for each model
base_prices = {'Model S': 80000, 'Model 3': 45000, 'Model X': 90000, 'Model Y': 55000}

data = []
vin_counter = 1

for date in date_range:
    for region in regions:
        for product in products:
            # Seasonal and yearly trends
            month = date.month
            year = date.year
            season_factor = 1 + 0.2 * np.sin(np.pi * month / 6)  # Peak in summer and winter
            year_factor = 1 + 0.1 * (year - 2020)  # 10% year-over-year growth

            # Regional variations
            region_factor = {'North Europe': 0.9, 'South Europe': 1.1, 'Central Europe': 1.0, 'UK': 1.05}[region]

            # Product popularity
            product_factor = {'Model S': 0.8, 'Model 3': 1.5, 'Model X': 0.7, 'Model Y': 1.2}[product]

            # Calculate sales
            base_sales = np.random.poisson(10)  # Base daily sales
            sales = int(base_sales * season_factor * year_factor * region_factor * product_factor)

            # Price calculation with inflation
            inflation_rate = 0.02  # 2% annual inflation
            years_since_start = (date - start_date).days / 365.25
            inflation_factor = (1 + inflation_rate) ** years_since_start
            price = base_prices[product] * inflation_factor

            # Customer satisfaction with some variation
            satisfaction = np.clip(np.random.normal(4.2, 0.5), 1, 5)

            for _ in range(sales):
                vin = f'VIN{vin_counter:07d}'
                vin_counter += 1
                data.append({
                    'Date': date,
                    'Region': region,
                    'Product': product,
                    'VIN': vin,
                    'Price': round(price, 2),
                    'CustomerSatisfaction': round(satisfaction, 2)
                })

df = pd.DataFrame(data)


In [6]:
# Save to CSV
df.to_csv('tesla_sales_data_with_vin.csv', index=False)
print("Updated sales data saved to tesla_sales_data_with_vin.csv")

Customer demographics data saved to tesla_customer_demographics.csv


In [None]:
# Display some basic statistics
print("\nUpdated Sales Dataset Overview:")
print(f"Date Range: {df['Date'].min()} to {df['Date'].max()}")
print(f"Total Records (Individual Sales): {len(df)}")
print("\nSummary Statistics:")
print(df.describe())