# Japan Gasoline Price Data Generator

This notebook generates sample gasoline price data for all 47 Japanese prefectures over the last 20 years.

In [None]:
# Install required packages
!pip install pandas requests beautifulsoup4

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime, timedelta
import os

In [None]:
def fetch_meti_data():
    """
    Fetch gasoline price data from METI (Ministry of Economy, Trade and Industry) website
    This is a simplified version - in reality we would need to handle pagination and multiple years
    """
    # Create sample data structure with TopoJSON-compatible prefecture names
    prefectures = [
        "Hokkaido", "Aomori Ken", "Iwate Ken", "Miyagi Ken", "Akita Ken", "Yamagata Ken", "Fukushima Ken",
        "Ibaraki Ken", "Tochigi Ken", "Gunma Ken", "Saitama Ken", "Chiba Ken", "Tokyo To", "Kanagawa Ken",
        "Niigata Ken", "Toyama Ken", "Ishikawa Ken", "Fukui Ken", "Yamanashi Ken", "Nagano Ken", "Gifu Ken",
        "Shizuoka Ken", "Aichi Ken", "Mie Ken", "Shiga Ken", "Kyoto Fu", "Osaka Fu", "Hyogo Ken", "Nara Ken",
        "Wakayama Ken", "Tottori Ken", "Shimane Ken", "Okayama Ken", "Hiroshima Ken", "Yamaguchi Ken",
        "Tokushima Ken", "Kagawa Ken", "Ehime Ken", "Kochi Ken", "Fukuoka Ken", "Saga Ken", "Nagasaki Ken",
        "Kumamoto Ken", "Oita Ken", "Miyazaki Ken", "Kagoshima Ken", "Okinawa Ken"
    ]
    
    # Generate sample data for last 20 years
    current_year = 2025
    start_year = 2005
    
    data = []
    base_price = 150  # Base price in yen
    
    for year in range(start_year, current_year + 1):
        for prefecture in prefectures:
            # Generate somewhat realistic price variations
            price = base_price + (year - start_year) * 2  # Slight increase over years
            # Add some regional variations
            if prefecture in ["Tokyo To", "Osaka Fu", "Kanagawa Ken"]:
                price += 10
            elif prefecture in ["Hokkaido", "Okinawa Ken"]:
                price += 15
            
            data.append({
                "year": year,
                "prefecture": prefecture,
                "price": round(price + ((hash(prefecture + str(year)) % 20) - 10), 2)
            })
            
    return data

In [None]:
# Generate the data
data = fetch_meti_data()

# Create a sample of the data to verify
print("Sample of generated data:")
df = pd.DataFrame(data)
print(df.head())

# Save as JSON
if not os.path.exists('data'):
    os.makedirs('data')

with open('data/gasoline_prices.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print("\nData has been saved to 'data/gasoline_prices.json'")

## Download the Generated JSON File

Run the following cell to download the generated JSON file. You can then use this file with the HTML visualization.

In [None]:
from google.colab import files
files.download('data/gasoline_prices.json')