In [4]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import datetime, timedelta
import math
import json

# ตั้งค่า Faker และ seed เพื่อให้ผลลัพธ์ reproducible
fake = Faker()
Faker.seed(0)
random.seed(0)

# -------------------------------------------
# ฟังก์ชันคำนวณระยะทาง (Haversine)
# -------------------------------------------
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # รัศมีโลกในกิโลเมตร
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

#########################################
# 1. สร้างตาราง Airport (25 รายการ จากข้อมูลจริง)
#########################################
airport_list = [
    {"name": "Suvarnabhumi Airport", "iata_code": "BKK", "country": "Thailand", "city": "Bangkok", "latitude": 13.6900, "longitude": 100.7501, "timezone": "Asia/Bangkok"},
    {"name": "Don Mueang International Airport", "iata_code": "DMK", "country": "Thailand", "city": "Bangkok", "latitude": 13.9126, "longitude": 100.6078, "timezone": "Asia/Bangkok"},
    {"name": "Phuket International Airport", "iata_code": "HKT", "country": "Thailand", "city": "Phuket", "latitude": 8.1132, "longitude": 98.3167, "timezone": "Asia/Bangkok"},
    {"name": "Chiang Mai International Airport", "iata_code": "CNX", "country": "Thailand", "city": "Chiang Mai", "latitude": 18.7883, "longitude": 98.9861, "timezone": "Asia/Bangkok"},
    {"name": "Haneda Airport", "iata_code": "HND", "country": "Japan", "city": "Tokyo", "latitude": 35.5494, "longitude": 139.7798, "timezone": "Asia/Tokyo"},
    {"name": "Narita International Airport", "iata_code": "NRT", "country": "Japan", "city": "Tokyo", "latitude": 35.7767, "longitude": 140.3189, "timezone": "Asia/Tokyo"},
    {"name": "Changi Airport", "iata_code": "SIN", "country": "Singapore", "city": "Singapore", "latitude": 1.3644, "longitude": 103.9915, "timezone": "Asia/Singapore"},
    {"name": "Heathrow Airport", "iata_code": "LHR", "country": "UK", "city": "London", "latitude": 51.4700, "longitude": -0.4543, "timezone": "Europe/London"},
    {"name": "Gatwick Airport", "iata_code": "LGW", "country": "UK", "city": "London", "latitude": 51.1537, "longitude": -0.1821, "timezone": "Europe/London"},
    {"name": "Los Angeles International Airport", "iata_code": "LAX", "country": "USA", "city": "Los Angeles", "latitude": 33.9416, "longitude": -118.4085, "timezone": "America/Los_Angeles"},
    {"name": "John F. Kennedy International Airport", "iata_code": "JFK", "country": "USA", "city": "New York", "latitude": 40.6413, "longitude": -73.7781, "timezone": "America/New_York"},
    {"name": "San Francisco International Airport", "iata_code": "SFO", "country": "USA", "city": "San Francisco", "latitude": 37.6213, "longitude": -122.3790, "timezone": "America/Los_Angeles"},
    {"name": "Incheon International Airport", "iata_code": "ICN", "country": "South Korea", "city": "Seoul", "latitude": 37.4602, "longitude": 126.4407, "timezone": "Asia/Seoul"},
    {"name": "Dubai International Airport", "iata_code": "DXB", "country": "UAE", "city": "Dubai", "latitude": 25.2532, "longitude": 55.3657, "timezone": "Asia/Dubai"},
    {"name": "Frankfurt Airport", "iata_code": "FRA", "country": "Germany", "city": "Frankfurt", "latitude": 50.1109, "longitude": 8.6821, "timezone": "Europe/Berlin"},
    {"name": "Charles de Gaulle Airport", "iata_code": "CDG", "country": "France", "city": "Paris", "latitude": 49.0097, "longitude": 2.5479, "timezone": "Europe/Paris"},
    {"name": "Amsterdam Schiphol Airport", "iata_code": "AMS", "country": "Netherlands", "city": "Amsterdam", "latitude": 52.3105, "longitude": 4.7683, "timezone": "Europe/Amsterdam"},
    {"name": "Madrid-Barajas Airport", "iata_code": "MAD", "country": "Spain", "city": "Madrid", "latitude": 40.4936, "longitude": -3.5668, "timezone": "Europe/Madrid"},
    {"name": "Beijing Capital International Airport", "iata_code": "PEK", "country": "China", "city": "Beijing", "latitude": 40.0801, "longitude": 116.5846, "timezone": "Asia/Shanghai"},
    {"name": "Shanghai Pudong International Airport", "iata_code": "PVG", "country": "China", "city": "Shanghai", "latitude": 31.1443, "longitude": 121.8083, "timezone": "Asia/Shanghai"},
    {"name": "Sydney Airport", "iata_code": "SYD", "country": "Australia", "city": "Sydney", "latitude": -33.9399, "longitude": 151.1753, "timezone": "Australia/Sydney"},
    {"name": "Auckland International Airport", "iata_code": "AKL", "country": "New Zealand", "city": "Auckland", "latitude": -37.0082, "longitude": 174.7922, "timezone": "Pacific/Auckland"},
    {"name": "Istanbul Airport", "iata_code": "IST", "country": "Turkey", "city": "Istanbul", "latitude": 41.2753, "longitude": 28.7519, "timezone": "Europe/Istanbul"},
    {"name": "Sheremetyevo International Airport", "iata_code": "SVO", "country": "Russia", "city": "Moscow", "latitude": 55.9726, "longitude": 37.4146, "timezone": "Europe/Moscow"},
    {"name": "São Paulo/Guarulhos International Airport", "iata_code": "GRU", "country": "Brazil", "city": "São Paulo", "latitude": -23.4356, "longitude": -46.4731, "timezone": "America/Sao_Paulo"}
]

df_airport = pd.DataFrame(airport_list)
print("Total airports:", len(airport_list))
df_airport.to_csv("real_airports.csv", index=False)
print("Real airport data saved to real_airports.csv")

#########################################
# 2. สร้างตาราง Aircraft (150 รายการ)
#########################################
aircraft_models = [
    {"model": "Airbus A320", "capacity_range": (150, 180)},
    {"model": "Boeing 737", "capacity_range": (130, 215)},
    {"model": "Airbus A330", "capacity_range": (250, 300)},
    {"model": "Boeing 777", "capacity_range": (300, 400)},
    {"model": "Boeing 787 Dreamliner", "capacity_range": (242, 335)},
    {"model": "Airbus A350", "capacity_range": (280, 350)}
]
airlines = ["Thai Airways", "Singapore Airlines", "Emirates", "American Airlines", "Lufthansa", "Air France", "Qantas", "British Airways", "Delta Air Lines"]

aircraft_records = []
for i in range(150):
    model_choice = random.choice(aircraft_models)
    model = model_choice["model"]
    capacity = random.randint(*model_choice["capacity_range"])
    manufacture_year = random.randint(2000, 2024)
    airline_owner = random.choice(airlines)
    maintenance_status = random.choice(["Active", "Maintenance", "Retired"])
    aircraft_history = fake.text(max_nb_chars=100)
    aircraft_records.append({
        "aircraft_id": i + 1,
        "model": model,
        "manufacture_year": manufacture_year,
        "capacity": capacity,
        "airline_owner": airline_owner,
        "maintenance_status": maintenance_status,
        "aircraft_history": aircraft_history
    })
df_aircraft = pd.DataFrame(aircraft_records)

#########################################
# 3. สร้างตาราง Route (200 รายการ)
#########################################
route_records = []
num_routes = 200
# ใช้ค่า airport_id จาก df_airport (index+1)
airport_ids = df_airport.index + 1
for i in range(num_routes):
    from_airport = random.choice(list(airport_ids))
    to_airport = random.choice(list(airport_ids))
    while to_airport == from_airport:
        to_airport = random.choice(list(airport_ids))
    airport_from = df_airport.iloc[from_airport - 1]
    airport_to = df_airport.iloc[to_airport - 1]
    distance = haversine(airport_from['latitude'], airport_from['longitude'],
                         airport_to['latitude'], airport_to['longitude'])
    duration_hours = distance / 900  # สมมุติความเร็ว 900 km/h
    estimated_duration = timedelta(hours=duration_hours)
    route_records.append({
        "route_id": i + 1,
        "from_airport": from_airport,
        "to_airport": to_airport,
        "distance": round(distance, 2),
        "estimated_duration": str(estimated_duration)
    })
df_route = pd.DataFrame(route_records)

#########################################
# 4. สร้างตาราง Flight (2000 รายการ)
#########################################
flight_records = []
num_flights = 2000
flight_status_options = ["Scheduled", "Departed", "Arrived", "Cancelled"]
for i in range(num_flights):
    aircraft_id = random.randint(1, 150)
    route_id = random.randint(1, num_routes)
    departure_time = fake.date_time_between(start_date="-90d", end_date="+90d")
    route_duration_str = df_route.loc[df_route['route_id'] == route_id, 'estimated_duration'].values[0]
    h, m, s = map(float, route_duration_str.split(':'))
    duration_td = timedelta(hours=h, minutes=m, seconds=s)
    delay = timedelta(minutes=random.randint(-10, 30))
    arrival_time = departure_time + duration_td + delay
    status = random.choices(flight_status_options, weights=[70, 10, 15, 5])[0]
    cancellation_reason = fake.sentence() if status == "Cancelled" else None
    flight_records.append({
        "flight_id": i + 1,
        "aircraft_id": aircraft_id,
        "route_id": route_id,
        "departure_time": departure_time,
        "arrival_time": arrival_time,
        "flight_status": status,
        "cancellation_reason": cancellation_reason
    })
df_flight = pd.DataFrame(flight_records)

#########################################
# 5. สร้างตาราง Crew (500 รายการ)
#########################################
crew_roles = ["Pilot", "Co-Pilot", "Cabin Crew"]
crew_records = []
for i in range(500):
    name = fake.name()
    role = random.choice(crew_roles)
    license_expiry_date = fake.date_between(start_date="today", end_date="+5y")
    passport_expiry_date = fake.date_between(start_date="today", end_date="+10y")
    flight_hours = round(random.uniform(0, 20000), 2)
    crew_records.append({
        "crew_id": i + 1,
        "name": name,
        "role": role,
        "license_expiry_date": license_expiry_date,
        "passport_expiry_date": passport_expiry_date,
        "flight_hours": flight_hours
    })
df_crew = pd.DataFrame(crew_records)

#########################################
# 6. สร้างตาราง Flight Crew Assignment (Many-to-Many)
#########################################
flight_crew_records = []
for flight_id in range(1, num_flights + 1):
    num_assignments = random.randint(3, 6)
    assigned_crew = random.sample(range(1, 501), num_assignments)
    for crew_id in assigned_crew:
        role_in_flight = random.choice(crew_roles)
        flight_crew_records.append({
            "flight_id": flight_id,
            "crew_id": crew_id,
            "role_in_flight": role_in_flight
        })
df_flight_crew = pd.DataFrame(flight_crew_records)

#########################################
# 7. สร้างตาราง Passenger (20,000 รายการ)
#########################################
passenger_records = []
for i in range(20000):
    name = fake.name()
    passport_number = fake.bothify(text='??######', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    nationality = fake.country()
    flight_id = random.randint(1, num_flights)
    seat_number = str(random.randint(1, 40)) + random.choice(["A", "B", "C", "D", "E", "F"])
    special_requests = fake.sentence() if random.random() < 0.1 else None
    passenger_records.append({
        "passenger_id": i + 1,
        "name": name,
        "passport_number": passport_number,
        "nationality": nationality,
        "flight_id": flight_id,
        "seat_number": seat_number,
        "special_requests": special_requests
    })
df_passenger = pd.DataFrame(passenger_records)

#########################################
# 8. สร้างตาราง Ticket (20,000 รายการ, หนึ่งต่อผู้โดยสาร)
#########################################
ticket_records = []
ticket_status_options = ["Confirmed", "Cancelled", "Pending"]
check_in_status_options = ["Checked-in", "Not Checked-in"]
for i in range(20000):
    passenger_id = i + 1
    flight_id = df_passenger.loc[df_passenger['passenger_id'] == passenger_id, 'flight_id'].values[0]
    seat_number = df_passenger.loc[df_passenger['passenger_id'] == passenger_id, 'seat_number'].values[0]
    ticket_status = random.choices(ticket_status_options, weights=[80, 5, 15])[0]
    check_in_status = random.choice(check_in_status_options)
    ticket_records.append({
        "ticket_id": i + 1,
        "passenger_id": passenger_id,
        "flight_id": flight_id,
        "seat_number": seat_number,
        "ticket_status": ticket_status,
        "check_in_status": check_in_status
    })
df_ticket = pd.DataFrame(ticket_records)

#########################################
# 9. สร้างตาราง Payment (ประมาณ 18,000 รายการ, 90% ของตั๋ว)
#########################################
payment_records = []
payment_methods = ["Credit Card", "PayPal", "QR Code", "Bank Transfer"]
ticket_ids = df_ticket['ticket_id'].tolist()
selected_ticket_ids = random.sample(ticket_ids, k=int(len(ticket_ids) * 0.9))
for ticket_id in selected_ticket_ids:
    payment_method = random.choice(payment_methods)
    amount = round(random.uniform(100.0, 1000.0), 2)
    payment_date = fake.date_time_between(start_date="-90d", end_date="+90d")
    payment_status = random.choice(["Paid", "Pending", "Failed"])
    payment_records.append({
        "payment_id": len(payment_records) + 1,
        "ticket_id": ticket_id,
        "payment_method": payment_method,
        "amount": amount,
        "payment_date": payment_date,
        "payment_status": payment_status
    })
df_payment = pd.DataFrame(payment_records)

#########################################
# 10. สร้างตาราง Maintenance Log (50 รายการ)
#########################################
maintenance_records = []
for i in range(50):
    aircraft_id = random.randint(1, 150)
    date_of_maintenance = fake.date_time_between(start_date="-1y", end_date="now")
    details = fake.paragraph(nb_sentences=3)
    maintenance_location = fake.city()
    maintenance_records.append({
        "log_id": i + 1,
        "aircraft_id": aircraft_id,
        "date_of_maintenance": date_of_maintenance,
        "details": details,
        "maintenance_location": maintenance_location
    })
df_maintenance_log = pd.DataFrame(maintenance_records)

#########################################
# 11. สร้างตาราง Users (1,000 รายการ)
#########################################
user_roles = ["admin", "crew", "passenger", "maintenance"]
user_records = []
for i in range(1000):
    username = fake.user_name() + str(i)
    hashed_password = fake.sha256(raw_output=False)
    role = random.choice(user_roles)
    created_at = fake.date_time_between(start_date="-2y", end_date="now")
    updated_at = fake.date_time_between(start_date=created_at, end_date="now")
    user_records.append({
        "user_id": i + 1,
        "username": username,
        "hashed_password": hashed_password,
        "role": role,
        "created_at": created_at,
        "updated_at": updated_at
    })
df_users = pd.DataFrame(user_records)

#########################################
# 12. สร้างตาราง Audit Log (5,000 รายการ)
#########################################
audit_actions = ["INSERT", "UPDATE", "DELETE"]
entities = ["airport", "aircraft", "route", "flight", "crew", "passenger", "ticket", "payment", "users"]
audit_records = []
for i in range(5000):
    user_id = random.randint(1, 1000)
    action = random.choice(audit_actions)
    entity = random.choice(entities)
    if entity == "airport":
        entity_id = random.randint(1, len(df_airport))
    elif entity == "aircraft":
        entity_id = random.randint(1, 150)
    elif entity == "route":
        entity_id = random.randint(1, num_routes)
    elif entity == "flight":
        entity_id = random.randint(1, num_flights)
    elif entity == "crew":
        entity_id = random.randint(1, 500)
    elif entity == "passenger":
        entity_id = random.randint(1, 20000)
    elif entity == "ticket":
        entity_id = random.randint(1, 20000)
    elif entity == "payment":
        entity_id = random.randint(1, len(df_payment))
    elif entity == "users":
        entity_id = random.randint(1, 1000)
    else:
        entity_id = random.randint(1, 100)
        
    old_data = json.dumps({"field": fake.word(), "value": fake.word()})
    new_data = json.dumps({"field": fake.word(), "value": fake.word()})
    created_at = fake.date_time_between(start_date="-1y", end_date="now")
    audit_records.append({
        "id": i + 1,
        "user_id": user_id,
        "action": action,
        "entity": entity,
        "entity_id": entity_id,
        "old_data": old_data,
        "new_data": new_data,
        "created_at": created_at
    })
df_audit_log = pd.DataFrame(audit_records)

#########################################
# บันทึก DataFrame แต่ละตารางเป็นไฟล์ CSV ในโฟลเดอร์เดียวกับสคริปต์นี้ (ใช้ Relative Path)
#########################################
df_airport.to_csv("mock_airport.csv", index=False)
df_aircraft.to_csv("mock_aircraft.csv", index=False)
df_route.to_csv("mock_route.csv", index=False)
df_flight.to_csv("mock_flight.csv", index=False)
df_crew.to_csv("mock_crew.csv", index=False)
df_flight_crew.to_csv("mock_flight_crew_assignment.csv", index=False)
df_passenger.to_csv("mock_passenger.csv", index=False)
df_ticket.to_csv("mock_ticket.csv", index=False)
df_payment.to_csv("mock_payment.csv", index=False)
df_maintenance_log.to_csv("mock_maintenance_log.csv", index=False)
df_users.to_csv("mock_users.csv", index=False)
df_audit_log.to_csv("mock_audit_log.csv", index=False)

print("Mock data generated and saved as CSV files in the current directory.")


Total airports: 25
Real airport data saved to real_airports.csv
Mock data generated and saved as CSV files in the current directory.


In [5]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import datetime, timedelta
import math
import json

# ตั้งค่า Faker และ seed เพื่อให้ผลลัพธ์ reproducible
fake = Faker()
Faker.seed(0)
random.seed(0)

# -------------------------------------------
# ฟังก์ชันคำนวณระยะทาง (Haversine)
# -------------------------------------------
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # รัศมีโลกในกิโลเมตร
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

#########################################
# ฟังก์ชันแปลง DataFrame เป็น SQL INSERT
#########################################
def dataframe_to_insert_sql(df, table_name):
    values = []
    for _, row in df.iterrows():
        row_str = []
        for val in row:
            if pd.isnull(val):
                row_str.append("NULL")
            elif isinstance(val, str):
                val_clean = val.replace("'", "''")
                row_str.append(f"'{val_clean}'")
            else:
                row_str.append(str(val))
        values.append(f"({', '.join(row_str)})")
    columns = ", ".join(df.columns)
    insert_sql = f"INSERT INTO {table_name} ({columns}) VALUES\n" + ",\n".join(values) + ";"
    return insert_sql

#########################################
# 1. สร้างตาราง Airport (25 รายการ จากข้อมูลจริง)
#########################################
airport_list = [
    {"name": "Suvarnabhumi Airport", "iata_code": "BKK", "country": "Thailand", "city": "Bangkok", "latitude": 13.6900, "longitude": 100.7501, "timezone": "Asia/Bangkok"},
    {"name": "Don Mueang International Airport", "iata_code": "DMK", "country": "Thailand", "city": "Bangkok", "latitude": 13.9126, "longitude": 100.6078, "timezone": "Asia/Bangkok"},
    {"name": "Phuket International Airport", "iata_code": "HKT", "country": "Thailand", "city": "Phuket", "latitude": 8.1132, "longitude": 98.3167, "timezone": "Asia/Bangkok"},
    {"name": "Chiang Mai International Airport", "iata_code": "CNX", "country": "Thailand", "city": "Chiang Mai", "latitude": 18.7883, "longitude": 98.9861, "timezone": "Asia/Bangkok"},
    {"name": "Haneda Airport", "iata_code": "HND", "country": "Japan", "city": "Tokyo", "latitude": 35.5494, "longitude": 139.7798, "timezone": "Asia/Tokyo"},
    {"name": "Narita International Airport", "iata_code": "NRT", "country": "Japan", "city": "Tokyo", "latitude": 35.7767, "longitude": 140.3189, "timezone": "Asia/Tokyo"},
    {"name": "Changi Airport", "iata_code": "SIN", "country": "Singapore", "city": "Singapore", "latitude": 1.3644, "longitude": 103.9915, "timezone": "Asia/Singapore"},
    {"name": "Heathrow Airport", "iata_code": "LHR", "country": "UK", "city": "London", "latitude": 51.4700, "longitude": -0.4543, "timezone": "Europe/London"},
    {"name": "Gatwick Airport", "iata_code": "LGW", "country": "UK", "city": "London", "latitude": 51.1537, "longitude": -0.1821, "timezone": "Europe/London"},
    {"name": "Los Angeles International Airport", "iata_code": "LAX", "country": "USA", "city": "Los Angeles", "latitude": 33.9416, "longitude": -118.4085, "timezone": "America/Los_Angeles"},
    {"name": "John F. Kennedy International Airport", "iata_code": "JFK", "country": "USA", "city": "New York", "latitude": 40.6413, "longitude": -73.7781, "timezone": "America/New_York"},
    {"name": "San Francisco International Airport", "iata_code": "SFO", "country": "USA", "city": "San Francisco", "latitude": 37.6213, "longitude": -122.3790, "timezone": "America/Los_Angeles"},
    {"name": "Incheon International Airport", "iata_code": "ICN", "country": "South Korea", "city": "Seoul", "latitude": 37.4602, "longitude": 126.4407, "timezone": "Asia/Seoul"},
    {"name": "Dubai International Airport", "iata_code": "DXB", "country": "UAE", "city": "Dubai", "latitude": 25.2532, "longitude": 55.3657, "timezone": "Asia/Dubai"},
    {"name": "Frankfurt Airport", "iata_code": "FRA", "country": "Germany", "city": "Frankfurt", "latitude": 50.1109, "longitude": 8.6821, "timezone": "Europe/Berlin"},
    {"name": "Charles de Gaulle Airport", "iata_code": "CDG", "country": "France", "city": "Paris", "latitude": 49.0097, "longitude": 2.5479, "timezone": "Europe/Paris"},
    {"name": "Amsterdam Schiphol Airport", "iata_code": "AMS", "country": "Netherlands", "city": "Amsterdam", "latitude": 52.3105, "longitude": 4.7683, "timezone": "Europe/Amsterdam"},
    {"name": "Madrid-Barajas Airport", "iata_code": "MAD", "country": "Spain", "city": "Madrid", "latitude": 40.4936, "longitude": -3.5668, "timezone": "Europe/Madrid"},
    {"name": "Beijing Capital International Airport", "iata_code": "PEK", "country": "China", "city": "Beijing", "latitude": 40.0801, "longitude": 116.5846, "timezone": "Asia/Shanghai"},
    {"name": "Shanghai Pudong International Airport", "iata_code": "PVG", "country": "China", "city": "Shanghai", "latitude": 31.1443, "longitude": 121.8083, "timezone": "Asia/Shanghai"},
    {"name": "Sydney Airport", "iata_code": "SYD", "country": "Australia", "city": "Sydney", "latitude": -33.9399, "longitude": 151.1753, "timezone": "Australia/Sydney"},
    {"name": "Auckland International Airport", "iata_code": "AKL", "country": "New Zealand", "city": "Auckland", "latitude": -37.0082, "longitude": 174.7922, "timezone": "Pacific/Auckland"},
    {"name": "Istanbul Airport", "iata_code": "IST", "country": "Turkey", "city": "Istanbul", "latitude": 41.2753, "longitude": 28.7519, "timezone": "Europe/Istanbul"},
    {"name": "Sheremetyevo International Airport", "iata_code": "SVO", "country": "Russia", "city": "Moscow", "latitude": 55.9726, "longitude": 37.4146, "timezone": "Europe/Moscow"},
    {"name": "São Paulo/Guarulhos International Airport", "iata_code": "GRU", "country": "Brazil", "city": "São Paulo", "latitude": -23.4356, "longitude": -46.4731, "timezone": "America/Sao_Paulo"}
]
df_airport = pd.DataFrame(airport_list)

#########################################
# 2. สร้างตาราง Aircraft (150 รายการ)
#########################################
aircraft_models = [
    {"model": "Airbus A320", "capacity_range": (150, 180)},
    {"model": "Boeing 737", "capacity_range": (130, 215)},
    {"model": "Airbus A330", "capacity_range": (250, 300)},
    {"model": "Boeing 777", "capacity_range": (300, 400)},
    {"model": "Boeing 787 Dreamliner", "capacity_range": (242, 335)},
    {"model": "Airbus A350", "capacity_range": (280, 350)}
]
airlines = ["Thai Airways", "Singapore Airlines", "Emirates", "American Airlines", "Lufthansa", "Air France", "Qantas", "British Airways", "Delta Air Lines"]

aircraft_records = []
for i in range(150):
    model_choice = random.choice(aircraft_models)
    model = model_choice["model"]
    capacity = random.randint(*model_choice["capacity_range"])
    manufacture_year = random.randint(2000, 2024)
    airline_owner = random.choice(airlines)
    maintenance_status = random.choice(["Active", "Maintenance", "Retired"])
    aircraft_history = fake.text(max_nb_chars=100)
    aircraft_records.append({
        "aircraft_id": i + 1,
        "model": model,
        "manufacture_year": manufacture_year,
        "capacity": capacity,
        "airline_owner": airline_owner,
        "maintenance_status": maintenance_status,
        "aircraft_history": aircraft_history
    })
df_aircraft = pd.DataFrame(aircraft_records)

#########################################
# 3. สร้างตาราง Route (200 รายการ)
#########################################
route_records = []
num_routes = 200
airport_ids = df_airport.index + 1  # airport_id assumed 1-indexed
for i in range(num_routes):
    from_airport = random.choice(list(airport_ids))
    to_airport = random.choice(list(airport_ids))
    while to_airport == from_airport:
        to_airport = random.choice(list(airport_ids))
    airport_from = df_airport.iloc[from_airport - 1]
    airport_to = df_airport.iloc[to_airport - 1]
    distance = haversine(airport_from['latitude'], airport_from['longitude'],
                         airport_to['latitude'], airport_to['longitude'])
    duration_hours = distance / 900  # สมมุติความเร็ว 900 km/h
    estimated_duration = timedelta(hours=duration_hours)
    route_records.append({
        "route_id": i + 1,
        "from_airport": from_airport,
        "to_airport": to_airport,
        "distance": round(distance, 2),
        "estimated_duration": str(estimated_duration)
    })
df_route = pd.DataFrame(route_records)

#########################################
# 4. สร้างตาราง Flight (2000 รายการ)
#########################################
flight_records = []
num_flights = 2000
flight_status_options = ["Scheduled", "Departed", "Arrived", "Cancelled"]
for i in range(num_flights):
    aircraft_id = random.randint(1, 150)
    route_id = random.randint(1, num_routes)
    departure_time = fake.date_time_between(start_date="-90d", end_date="+90d")
    route_duration_str = df_route.loc[df_route['route_id'] == route_id, 'estimated_duration'].values[0]
    h, m, s = map(float, route_duration_str.split(':'))
    duration_td = timedelta(hours=h, minutes=m, seconds=s)
    delay = timedelta(minutes=random.randint(-10, 30))
    arrival_time = departure_time + duration_td + delay
    status = random.choices(flight_status_options, weights=[70, 10, 15, 5])[0]
    cancellation_reason = fake.sentence() if status == "Cancelled" else None
    flight_records.append({
        "flight_id": i + 1,
        "aircraft_id": aircraft_id,
        "route_id": route_id,
        "departure_time": departure_time,
        "arrival_time": arrival_time,
        "flight_status": status,
        "cancellation_reason": cancellation_reason
    })
df_flight = pd.DataFrame(flight_records)

#########################################
# 5. สร้างตาราง Crew (500 รายการ)
#########################################
crew_roles = ["Pilot", "Co-Pilot", "Cabin Crew"]
crew_records = []
for i in range(500):
    name = fake.name()
    role = random.choice(crew_roles)
    license_expiry_date = fake.date_between(start_date="today", end_date="+5y")
    passport_expiry_date = fake.date_between(start_date="today", end_date="+10y")
    flight_hours = round(random.uniform(0, 20000), 2)
    crew_records.append({
        "crew_id": i + 1,
        "name": name,
        "role": role,
        "license_expiry_date": license_expiry_date,
        "passport_expiry_date": passport_expiry_date,
        "flight_hours": flight_hours
    })
df_crew = pd.DataFrame(crew_records)

#########################################
# 6. สร้างตาราง Flight Crew Assignment (Many-to-Many)
#########################################
flight_crew_records = []
for flight_id in range(1, num_flights + 1):
    num_assignments = random.randint(3, 6)
    assigned_crew = random.sample(range(1, 501), num_assignments)
    for crew_id in assigned_crew:
        role_in_flight = random.choice(crew_roles)
        flight_crew_records.append({
            "flight_id": flight_id,
            "crew_id": crew_id,
            "role_in_flight": role_in_flight
        })
df_flight_crew = pd.DataFrame(flight_crew_records)

#########################################
# 7. สร้างตาราง Passenger (20,000 รายการ)
#########################################
passenger_records = []
for i in range(20000):
    name = fake.name()
    passport_number = fake.bothify(text='??######', letters='ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    nationality = fake.country()
    flight_id = random.randint(1, num_flights)
    seat_number = str(random.randint(1, 40)) + random.choice(["A", "B", "C", "D", "E", "F"])
    special_requests = fake.sentence() if random.random() < 0.1 else None
    passenger_records.append({
        "passenger_id": i + 1,
        "name": name,
        "passport_number": passport_number,
        "nationality": nationality,
        "flight_id": flight_id,
        "seat_number": seat_number,
        "special_requests": special_requests
    })
df_passenger = pd.DataFrame(passenger_records)

#########################################
# 8. สร้างตาราง Ticket (20,000 รายการ, หนึ่งต่อผู้โดยสาร)
#########################################
ticket_records = []
ticket_status_options = ["Confirmed", "Cancelled", "Pending"]
check_in_status_options = ["Checked-in", "Not Checked-in"]
for i in range(20000):
    passenger_id = i + 1
    flight_id = df_passenger.loc[df_passenger['passenger_id'] == passenger_id, 'flight_id'].values[0]
    seat_number = df_passenger.loc[df_passenger['passenger_id'] == passenger_id, 'seat_number'].values[0]
    ticket_status = random.choices(ticket_status_options, weights=[80, 5, 15])[0]
    check_in_status = random.choice(check_in_status_options)
    ticket_records.append({
        "ticket_id": i + 1,
        "passenger_id": passenger_id,
        "flight_id": flight_id,
        "seat_number": seat_number,
        "ticket_status": ticket_status,
        "check_in_status": check_in_status
    })
df_ticket = pd.DataFrame(ticket_records)

#########################################
# 9. สร้างตาราง Payment (ประมาณ 18,000 รายการ, 90% ของตั๋ว)
#########################################
payment_records = []
payment_methods = ["Credit Card", "PayPal", "QR Code", "Bank Transfer"]
ticket_ids = df_ticket['ticket_id'].tolist()
selected_ticket_ids = random.sample(ticket_ids, k=int(len(ticket_ids) * 0.9))
for ticket_id in selected_ticket_ids:
    payment_method = random.choice(payment_methods)
    amount = round(random.uniform(100.0, 1000.0), 2)
    payment_date = fake.date_time_between(start_date="-90d", end_date="+90d")
    payment_status = random.choice(["Paid", "Pending", "Failed"])
    payment_records.append({
        "payment_id": len(payment_records) + 1,
        "ticket_id": ticket_id,
        "payment_method": payment_method,
        "amount": amount,
        "payment_date": payment_date,
        "payment_status": payment_status
    })
df_payment = pd.DataFrame(payment_records)

#########################################
# 10. สร้างตาราง Maintenance Log (50 รายการ)
#########################################
maintenance_records = []
for i in range(50):
    aircraft_id = random.randint(1, 150)
    date_of_maintenance = fake.date_time_between(start_date="-1y", end_date="now")
    details = fake.paragraph(nb_sentences=3)
    maintenance_location = fake.city()
    maintenance_records.append({
        "log_id": i + 1,
        "aircraft_id": aircraft_id,
        "date_of_maintenance": date_of_maintenance,
        "details": details,
        "maintenance_location": maintenance_location
    })
df_maintenance_log = pd.DataFrame(maintenance_records)

#########################################
# 11. สร้างตาราง Users (1,000 รายการ)
#########################################
user_roles = ["admin", "crew", "passenger", "maintenance"]
user_records = []
for i in range(1000):
    username = fake.user_name() + str(i)
    hashed_password = fake.sha256(raw_output=False)
    role = random.choice(user_roles)
    created_at = fake.date_time_between(start_date="-2y", end_date="now")
    updated_at = fake.date_time_between(start_date=created_at, end_date="now")
    user_records.append({
        "user_id": i + 1,
        "username": username,
        "hashed_password": hashed_password,
        "role": role,
        "created_at": created_at,
        "updated_at": updated_at
    })
df_users = pd.DataFrame(user_records)

#########################################
# 12. สร้างตาราง Audit Log (5,000 รายการ)
#########################################
audit_actions = ["INSERT", "UPDATE", "DELETE"]
entities = ["airport", "aircraft", "route", "flight", "crew", "passenger", "ticket", "payment", "users"]
audit_records = []
for i in range(5000):
    user_id = random.randint(1, 1000)
    action = random.choice(audit_actions)
    entity = random.choice(entities)
    if entity == "airport":
        entity_id = random.randint(1, len(df_airport))
    elif entity == "aircraft":
        entity_id = random.randint(1, 150)
    elif entity == "route":
        entity_id = random.randint(1, num_routes)
    elif entity == "flight":
        entity_id = random.randint(1, num_flights)
    elif entity == "crew":
        entity_id = random.randint(1, 500)
    elif entity == "passenger":
        entity_id = random.randint(1, 20000)
    elif entity == "ticket":
        entity_id = random.randint(1, 20000)
    elif entity == "payment":
        entity_id = random.randint(1, len(df_payment))
    elif entity == "users":
        entity_id = random.randint(1, 1000)
    else:
        entity_id = random.randint(1, 100)
        
    old_data = json.dumps({"field": fake.word(), "value": fake.word()})
    new_data = json.dumps({"field": fake.word(), "value": fake.word()})
    created_at = fake.date_time_between(start_date="-1y", end_date="now")
    audit_records.append({
        "id": i + 1,
        "user_id": user_id,
        "action": action,
        "entity": entity,
        "entity_id": entity_id,
        "old_data": old_data,
        "new_data": new_data,
        "created_at": created_at
    })
df_audit_log = pd.DataFrame(audit_records)

#########################################
# สร้าง SQL INSERT scripts และบันทึกเป็นไฟล์ .sql
#########################################
tables = {
    "airport": df_airport,
    "aircraft": df_aircraft,
    "route": df_route,
    "flight": df_flight,
    "crew": df_crew,
    "flight_crew_assignment": df_flight_crew,
    "passenger": df_passenger,
    "ticket": df_ticket,
    "payment": df_payment,
    "maintenance_log": df_maintenance_log,
    "users": df_users,
    "audit_log": df_audit_log
}

for table, df in tables.items():
    sql = dataframe_to_insert_sql(df, table)
    filename = f"insert_{table}.sql"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(sql)
    print(f"SQL INSERT script for table '{table}' saved to {filename}")

print("✅ All SQL INSERT scripts have been generated and saved in the current directory.")


SQL INSERT script for table 'airport' saved to insert_airport.sql
SQL INSERT script for table 'aircraft' saved to insert_aircraft.sql
SQL INSERT script for table 'route' saved to insert_route.sql
SQL INSERT script for table 'flight' saved to insert_flight.sql
SQL INSERT script for table 'crew' saved to insert_crew.sql
SQL INSERT script for table 'flight_crew_assignment' saved to insert_flight_crew_assignment.sql
SQL INSERT script for table 'passenger' saved to insert_passenger.sql
SQL INSERT script for table 'ticket' saved to insert_ticket.sql
SQL INSERT script for table 'payment' saved to insert_payment.sql
SQL INSERT script for table 'maintenance_log' saved to insert_maintenance_log.sql
SQL INSERT script for table 'users' saved to insert_users.sql
SQL INSERT script for table 'audit_log' saved to insert_audit_log.sql
✅ All SQL INSERT scripts have been generated and saved in the current directory.
