In [4]:
import pandas as pd
import numpy as np

# Dimensional Tables

# Airport Dimension
airports = pd.DataFrame({
    'airport_id': range(1, 6),
    'airport_code': ['JFK', 'LAX', 'ORD', 'DFW', 'SFO'],
    'airport_name': ['John F. Kennedy International', 'Los Angeles International', 'O\'Hare International', 'Dallas/Fort Worth International', 'San Francisco International'],
    'city': ['New York', 'Los Angeles', 'Chicago', 'Dallas', 'San Francisco'],
    'state': ['NY', 'CA', 'IL', 'TX', 'CA'],
    'country': ['USA', 'USA', 'USA', 'USA', 'USA']
})

# Aircraft Type Dimension
aircraft_types = pd.DataFrame({
    'aircraft_type_id': range(1, 5),
    'manufacturer': ['Boeing', 'Airbus', 'Embraer', 'Bombardier'],
    'model': ['737', 'A320', 'E175', 'CRJ900'],
    'capacity': [180, 150, 76, 90]
})

# Date Dimension
date_range = pd.date_range(start='2023-01-01', end='2023-12-31')
date_dim = pd.DataFrame({
    'date_id': range(1, len(date_range) + 1),
    'date': date_range,
    'day': date_range.day,
    'month': date_range.month,
    'year': date_range.year,
    'quarter': date_range.quarter,
    'day_of_week': date_range.dayofweek
})

# Passenger Dimension
passengers = pd.DataFrame({
    'passenger_id': range(1, 1001),
    'first_name': [f'First{i}' for i in range(1, 1001)],
    'last_name': [f'Last{i}' for i in range(1, 1001)],
    'email': [f'passenger{i}@example.com' for i in range(1, 1001)],
    'phone': [f'+1555{str(i).zfill(7)}' for i in range(1, 1001)]
})

# Fact Tables

# Aircraft Flight Information Fact Table
flight_info = pd.DataFrame({
    'flight_id': range(1, 1001),
    'aircraft_type_id': np.random.choice(aircraft_types['aircraft_type_id'], 1000),
    'departure_airport_id': np.random.choice(airports['airport_id'], 1000),
    'arrival_airport_id': np.random.choice(airports['airport_id'], 1000),
    'scheduled_departure_date_id': np.random.choice(date_dim['date_id'], 1000),
    'scheduled_arrival_date_id': np.random.choice(date_dim['date_id'], 1000),
    'actual_departure_date_id': np.random.choice(date_dim['date_id'], 1000),
    'actual_arrival_date_id': np.random.choice(date_dim['date_id'], 1000),
    'flight_duration_minutes': np.random.randint(30, 600, 1000),
    'distance_miles': np.random.randint(100, 3000, 1000)
})

# Passenger Reservations Fact Table
reservations = pd.DataFrame({
    'reservation_id': range(1, 2001),
    'passenger_id': np.random.choice(passengers['passenger_id'], 2000),
    'flight_id': np.random.choice(flight_info['flight_id'], 2000),
    'booking_date_id': np.random.choice(date_dim['date_id'], 2000),
    'seat_number': [f'{np.random.choice(list("ABCDEF"))}{np.random.randint(1, 31)}' for _ in range(2000)],
    'fare_amount': np.round(np.random.uniform(50, 1000, 2000), 2),
    'reservation_status': np.random.choice(['Confirmed', 'Cancelled', 'Checked-in'], 2000)
})
# Aircraft Details Fact Table
aircraft_details = pd.DataFrame({
    'aircraft_id': range(1, 101),
    'aircraft_type_id': np.random.choice(aircraft_types['aircraft_type_id'], 100),
    'registration_number': [f'N{np.random.randint(100, 1000)}AA' for _ in range(100)],
    'manufacture_date_id': np.random.choice(date_dim['date_id'], 100),
    'last_maintenance_date_id': np.random.choice(date_dim['date_id'], 100),
    'total_flight_hours': np.random.randint(1000, 50000, 100),
    'seating_capacity': np.random.choice([150, 180, 200, 250], 100),
    'max_fuel_capacity': np.random.randint(15000, 50000, 100),  # in gallons
    'current_fuel_level': np.random.randint(1000, 50000, 100),  # in gallons
    'average_travel_range': np.random.randint(2000, 8000, 100)  # in miles
})


# Ensure current_fuel_level doesn't exceed max_fuel_capacity
aircraft_details['current_fuel_level'] = np.minimum(aircraft_details['current_fuel_level'], aircraft_details['max_fuel_capacity'])




# Display sample data from each table
print("Airports:")
print(airports.head())
print("\nAircraft Types:")
print(aircraft_types.head())
print("\nDate Dimension:")
print(date_dim.head())
print("\nPassengers:")
print(passengers.head())
print("\nFlight Information:")
print(flight_info.head())
print("\nReservations:")
print(reservations.head())
print("\nAircraft Details:")
print(aircraft_details.head())

Airports:
   airport_id airport_code                     airport_name           city  \
0           1          JFK    John F. Kennedy International       New York   
1           2          LAX        Los Angeles International    Los Angeles   
2           3          ORD             O'Hare International        Chicago   
3           4          DFW  Dallas/Fort Worth International         Dallas   
4           5          SFO      San Francisco International  San Francisco   

  state country  
0    NY     USA  
1    CA     USA  
2    IL     USA  
3    TX     USA  
4    CA     USA  

Aircraft Types:
   aircraft_type_id manufacturer   model  capacity
0                 1       Boeing     737       180
1                 2       Airbus    A320       150
2                 3      Embraer    E175        76
3                 4   Bombardier  CRJ900        90

Date Dimension:
   date_id       date  day  month  year  quarter  day_of_week
0        1 2023-01-01    1      1  2023        1            6
