In [3]:
import os
import json
import random
from datetime import datetime, timedelta

# Parameters
N = 5000  # Number of JSON files
M_min = 50  # Minimum number of flights per file
M_max = 100  # Maximum number of flights per file
K_min = 100  # Minimum number of cities
K_max = 200  # Maximum number of cities
L_min = 0.001  # Minimum probability of NULL value (0.1%)
L_max = 0.005  # Maximum probability of NULL value (0.5%)

# Function to generate random dates
def random_date():
    start_date = datetime(2022, 1, 1)
    end_date = datetime(2023, 12, 31)
    delta = end_date - start_date
    random_days = random.randint(0, delta.days)
    return start_date + timedelta(days=random_days)

# Generate random flight data
def generate_flight():
    date = random_date().strftime('%Y-%m-%d')
    origin_city = f"City{random.randint(1, K)}"
    destination_city = f"City{random.randint(1, K)}"
    flight_duration_secs = random.randint(3600, 14400)  # Random duration between 1 to 4 hours
    passengers_on_board = random.randint(50, 300)  # Random number of passengers

    # Introduce null values based on probability L
    if random.random() < L:
        date = None
    if random.random() < L:
        origin_city = None
    if random.random() < L:
        destination_city = None
    if random.random() < L:
        flight_duration_secs = None
    if random.random() < L:
        passengers_on_board = None

    return {
        'date': date,
        'origin_city': origin_city,
        'destination_city': destination_city,
        'flight_duration_secs': flight_duration_secs,
        'passengers_on_board': passengers_on_board
    }

# Generate JSON files
for i in range(N):
    M = random.randint(M_min, M_max)  # Number of flights in this file
    K = random.randint(K_min, K_max)  # Total number of cities
    L = random.uniform(L_min, L_max)  # Probability of NULL value

    flights = []
    for _ in range(M):
        flights.append(generate_flight())

    # Prepare file path and name
    month_year = datetime.now().strftime('%m-%Y')
    origin_city = f"City{random.randint(1, K)}"
    filename = f"/tmp/flights/{month_year}-{origin_city}-flights.json"

    # Ensure directory exists
    os.makedirs(os.path.dirname(filename), exist_ok=True)

    # Write flights to JSON file
    with open(filename, 'w') as file:
        json.dump(flights, file, indent=2)

    print(f"Generated file {filename} with {M} flights.")


Generated file /tmp/flights/07-2024-City87-flights.json with 51 flights.
Generated file /tmp/flights/07-2024-City45-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City6-flights.json with 58 flights.
Generated file /tmp/flights/07-2024-City114-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City35-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City112-flights.json with 70 flights.
Generated file /tmp/flights/07-2024-City20-flights.json with 91 flights.
Generated file /tmp/flights/07-2024-City63-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City105-flights.json with 86 flights.
Generated file /tmp/flights/07-2024-City186-flights.json with 78 flights.
Generated file /tmp/flights/07-2024-City67-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City77-flights.json with 94 flights.
Generated file /tmp/flights/07-2024-City4-flights.json with 53 flights.
Generated file /tmp/flights/07-2024-City58-flight

Generated file /tmp/flights/07-2024-City105-flights.json with 56 flights.
Generated file /tmp/flights/07-2024-City9-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City51-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City20-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City10-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City106-flights.json with 60 flights.
Generated file /tmp/flights/07-2024-City75-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City110-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City153-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City134-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City102-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City117-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City73-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City81-fl

Generated file /tmp/flights/07-2024-City52-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City27-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City72-flights.json with 99 flights.
Generated file /tmp/flights/07-2024-City129-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City99-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City8-flights.json with 56 flights.
Generated file /tmp/flights/07-2024-City33-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City5-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City98-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City75-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City119-flights.json with 80 flights.
Generated file /tmp/flights/07-2024-City148-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City150-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City90-flight

Generated file /tmp/flights/07-2024-City110-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City34-flights.json with 71 flights.
Generated file /tmp/flights/07-2024-City1-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City87-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City6-flights.json with 86 flights.
Generated file /tmp/flights/07-2024-City127-flights.json with 83 flights.
Generated file /tmp/flights/07-2024-City129-flights.json with 74 flights.
Generated file /tmp/flights/07-2024-City176-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City155-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City6-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City53-flights.json with 51 flights.
Generated file /tmp/flights/07-2024-City72-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City102-flights.json with 55 flights.
Generated file /tmp/flights/07-2024-City100-flig

Generated file /tmp/flights/07-2024-City89-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City21-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City71-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City189-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City19-flights.json with 87 flights.
Generated file /tmp/flights/07-2024-City20-flights.json with 64 flights.
Generated file /tmp/flights/07-2024-City62-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City83-flights.json with 71 flights.
Generated file /tmp/flights/07-2024-City157-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City37-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City82-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City62-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City34-flight

Generated file /tmp/flights/07-2024-City27-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City2-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City88-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City41-flights.json with 99 flights.
Generated file /tmp/flights/07-2024-City166-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City12-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City81-flights.json with 87 flights.
Generated file /tmp/flights/07-2024-City128-flights.json with 71 flights.
Generated file /tmp/flights/07-2024-City66-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City4-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City138-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City58-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City79-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City35-flights

Generated file /tmp/flights/07-2024-City103-flights.json with 100 flights.
Generated file /tmp/flights/07-2024-City164-flights.json with 87 flights.
Generated file /tmp/flights/07-2024-City41-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City5-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City29-flights.json with 60 flights.
Generated file /tmp/flights/07-2024-City110-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City49-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City143-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City68-flights.json with 91 flights.
Generated file /tmp/flights/07-2024-City51-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City174-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City78-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City4-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City1-fligh

Generated file /tmp/flights/07-2024-City110-flights.json with 55 flights.
Generated file /tmp/flights/07-2024-City79-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City24-flights.json with 80 flights.
Generated file /tmp/flights/07-2024-City175-flights.json with 71 flights.
Generated file /tmp/flights/07-2024-City96-flights.json with 89 flights.
Generated file /tmp/flights/07-2024-City112-flights.json with 74 flights.
Generated file /tmp/flights/07-2024-City24-flights.json with 88 flights.
Generated file /tmp/flights/07-2024-City52-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City16-flights.json with 89 flights.
Generated file /tmp/flights/07-2024-City99-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City27-flights.json with 56 flights.
Generated file /tmp/flights/07-2024-City96-flights.json with 58 flights.
Generated file /tmp/flights/07-2024-City16-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City15-fligh

Generated file /tmp/flights/07-2024-City78-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City66-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City42-flights.json with 53 flights.
Generated file /tmp/flights/07-2024-City57-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City36-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City120-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City91-flights.json with 60 flights.
Generated file /tmp/flights/07-2024-City51-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City80-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City194-flights.json with 89 flights.
Generated file /tmp/flights/07-2024-City110-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City96-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City110-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City107-fli

Generated file /tmp/flights/07-2024-City79-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City142-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City12-flights.json with 55 flights.
Generated file /tmp/flights/07-2024-City65-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City7-flights.json with 57 flights.
Generated file /tmp/flights/07-2024-City27-flights.json with 76 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City96-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City102-flights.json with 57 flights.
Generated file /tmp/flights/07-2024-City7-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City64-flights.json with 58 flights.
Generated file /tmp/flights/07-2024-City19-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 58 flights.
Generated file /tmp/flights/07-2024-City11-flights.

Generated file /tmp/flights/07-2024-City108-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City115-flights.json with 80 flights.
Generated file /tmp/flights/07-2024-City102-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City106-flights.json with 86 flights.
Generated file /tmp/flights/07-2024-City30-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City51-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City161-flights.json with 55 flights.
Generated file /tmp/flights/07-2024-City42-flights.json with 64 flights.
Generated file /tmp/flights/07-2024-City139-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City106-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City67-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City102-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City41-flights.json with 91 flights.
Generated file /tmp/flights/07-2024-City56-

Generated file /tmp/flights/07-2024-City67-flights.json with 55 flights.
Generated file /tmp/flights/07-2024-City10-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City63-flights.json with 53 flights.
Generated file /tmp/flights/07-2024-City101-flights.json with 53 flights.
Generated file /tmp/flights/07-2024-City119-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City26-flights.json with 76 flights.
Generated file /tmp/flights/07-2024-City71-flights.json with 100 flights.
Generated file /tmp/flights/07-2024-City11-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City20-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City22-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City117-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City87-flights.json with 80 flights.
Generated file /tmp/flights/07-2024-City51-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City130-fli

Generated file /tmp/flights/07-2024-City8-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City63-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City109-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City43-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City78-flights.json with 57 flights.
Generated file /tmp/flights/07-2024-City77-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City58-flights.json with 70 flights.
Generated file /tmp/flights/07-2024-City68-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City108-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City31-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City106-flights.json with 64 flights.
Generated file /tmp/flights/07-2024-City40-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City9-flights

Generated file /tmp/flights/07-2024-City32-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City82-flights.json with 99 flights.
Generated file /tmp/flights/07-2024-City81-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City82-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City49-flights.json with 91 flights.
Generated file /tmp/flights/07-2024-City89-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City71-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City81-flights.json with 51 flights.
Generated file /tmp/flights/07-2024-City24-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City30-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City71-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City66-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City59-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City141-flights

Generated file /tmp/flights/07-2024-City119-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City191-flights.json with 78 flights.
Generated file /tmp/flights/07-2024-City33-flights.json with 94 flights.
Generated file /tmp/flights/07-2024-City96-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City92-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City4-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City43-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City116-flights.json with 99 flights.
Generated file /tmp/flights/07-2024-City144-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City89-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City26-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City8-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City2-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City122-flight

Generated file /tmp/flights/07-2024-City50-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City66-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City77-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City115-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City146-flights.json with 91 flights.
Generated file /tmp/flights/07-2024-City71-flights.json with 94 flights.
Generated file /tmp/flights/07-2024-City144-flights.json with 55 flights.
Generated file /tmp/flights/07-2024-City124-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City9-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City64-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City23-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City43-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City44-flights.json with 57 flights.
Generated file /tmp/flights/07-2024-City54-fligh

Generated file /tmp/flights/07-2024-City93-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City76-flights.json with 51 flights.
Generated file /tmp/flights/07-2024-City54-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City26-flights.json with 60 flights.
Generated file /tmp/flights/07-2024-City53-flights.json with 89 flights.
Generated file /tmp/flights/07-2024-City147-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City133-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City32-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City59-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City44-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City85-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City12-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City49-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City108-fligh

Generated file /tmp/flights/07-2024-City73-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City78-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City50-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City35-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City165-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City165-flights.json with 71 flights.
Generated file /tmp/flights/07-2024-City47-flights.json with 58 flights.
Generated file /tmp/flights/07-2024-City113-flights.json with 57 flights.
Generated file /tmp/flights/07-2024-City15-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City17-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City42-flights.json with 51 flights.
Generated file /tmp/flights/07-2024-City42-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City32-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City20-fligh

Generated file /tmp/flights/07-2024-City56-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City99-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City21-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City68-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City13-flights.json with 70 flights.
Generated file /tmp/flights/07-2024-City84-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City79-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City8-flights.json with 78 flights.
Generated file /tmp/flights/07-2024-City28-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City31-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City47-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City121-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City123-flights

Generated file /tmp/flights/07-2024-City50-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City41-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City21-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City28-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City148-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City60-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City56-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City2-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City2-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City66-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City75-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City136-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City167-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City68-flights

Generated file /tmp/flights/07-2024-City54-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City55-flights.json with 78 flights.
Generated file /tmp/flights/07-2024-City62-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City129-flights.json with 88 flights.
Generated file /tmp/flights/07-2024-City85-flights.json with 60 flights.
Generated file /tmp/flights/07-2024-City20-flights.json with 76 flights.
Generated file /tmp/flights/07-2024-City3-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City97-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City69-flights.json with 94 flights.
Generated file /tmp/flights/07-2024-City125-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City26-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City148-flights.json with 86 flights.
Generated file /tmp/flights/07-2024-City84-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City128-fligh

Generated file /tmp/flights/07-2024-City112-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City152-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City128-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City94-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City39-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City156-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City53-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City32-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City109-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City16-flights.json with 71 flights.
Generated file /tmp/flights/07-2024-City50-flights.json with 100 flights.
Generated file /tmp/flights/07-2024-City20-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City59-flights.json with 58 flights.
Generated file /tmp/flights/07-2024-City71-fl

Generated file /tmp/flights/07-2024-City69-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City123-flights.json with 83 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City13-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City58-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City40-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City75-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City15-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City125-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City157-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City11-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City92-flights.json with 88 flights.
Generated file /tmp/flights/07-2024-City28-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City132-flig

Generated file /tmp/flights/07-2024-City61-flights.json with 83 flights.
Generated file /tmp/flights/07-2024-City79-flights.json with 91 flights.
Generated file /tmp/flights/07-2024-City103-flights.json with 57 flights.
Generated file /tmp/flights/07-2024-City59-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City104-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City87-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City26-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City44-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City44-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City73-flights.json with 83 flights.
Generated file /tmp/flights/07-2024-City28-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City110-flights.json with 83 flights.
Generated file /tmp/flights/07-2024-City68-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City17-fligh

Generated file /tmp/flights/07-2024-City30-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City4-flights.json with 100 flights.
Generated file /tmp/flights/07-2024-City57-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City111-flights.json with 78 flights.
Generated file /tmp/flights/07-2024-City63-flights.json with 64 flights.
Generated file /tmp/flights/07-2024-City120-flights.json with 74 flights.
Generated file /tmp/flights/07-2024-City113-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City82-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City53-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City78-flights.json with 60 flights.
Generated file /tmp/flights/07-2024-City141-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City178-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City85-flights.json with 74 flights.
Generated file /tmp/flights/07-2024-City78-fli

Generated file /tmp/flights/07-2024-City106-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City166-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City170-flights.json with 74 flights.
Generated file /tmp/flights/07-2024-City44-flights.json with 58 flights.
Generated file /tmp/flights/07-2024-City32-flights.json with 78 flights.
Generated file /tmp/flights/07-2024-City159-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City141-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City86-flights.json with 87 flights.
Generated file /tmp/flights/07-2024-City45-flights.json with 87 flights.
Generated file /tmp/flights/07-2024-City68-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City102-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City48-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City27-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City93-fl

Generated file /tmp/flights/07-2024-City132-flights.json with 99 flights.
Generated file /tmp/flights/07-2024-City48-flights.json with 58 flights.
Generated file /tmp/flights/07-2024-City133-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City57-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City11-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City71-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City117-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City65-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City37-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City100-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City104-flights.json with 56 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City35-flights.json with 53 flights.
Generated file /tmp/flights/07-2024-City84-fli

Generated file /tmp/flights/07-2024-City96-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City60-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City104-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City92-flights.json with 60 flights.
Generated file /tmp/flights/07-2024-City62-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City92-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City82-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City56-flights.json with 90 flights.
Generated file /tmp/flights/07-2024-City2-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City24-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City32-flights.json with 54 flights.
Generated file /tmp/flights/07-2024-City147-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City21-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City9-flights.

Generated file /tmp/flights/07-2024-City133-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City83-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City40-flights.json with 51 flights.
Generated file /tmp/flights/07-2024-City12-flights.json with 89 flights.
Generated file /tmp/flights/07-2024-City136-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City23-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City49-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City41-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City142-flights.json with 57 flights.
Generated file /tmp/flights/07-2024-City107-flights.json with 56 flights.
Generated file /tmp/flights/07-2024-City19-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City46-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City57-flights.json with 86 flights.
Generated file /tmp/flights/07-2024-City171-fli

Generated file /tmp/flights/07-2024-City24-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City183-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City164-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City39-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City16-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City119-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City45-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City81-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City109-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City129-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City34-flights.json with 69 flights.
Generated file /tmp/flights/07-2024-City71-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City56-flights.json with 53 flights.
Generated file /tmp/flights/07-2024-City85-fli

Generated file /tmp/flights/07-2024-City84-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City65-flights.json with 89 flights.
Generated file /tmp/flights/07-2024-City165-flights.json with 99 flights.
Generated file /tmp/flights/07-2024-City119-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City84-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City90-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City123-flights.json with 91 flights.
Generated file /tmp/flights/07-2024-City68-flights.json with 70 flights.
Generated file /tmp/flights/07-2024-City143-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City20-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City50-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City89-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City22-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City165-fli

Generated file /tmp/flights/07-2024-City158-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City66-flights.json with 64 flights.
Generated file /tmp/flights/07-2024-City103-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City49-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City61-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City109-flights.json with 83 flights.
Generated file /tmp/flights/07-2024-City13-flights.json with 91 flights.
Generated file /tmp/flights/07-2024-City33-flights.json with 52 flights.
Generated file /tmp/flights/07-2024-City38-flights.json with 64 flights.
Generated file /tmp/flights/07-2024-City74-flights.json with 83 flights.
Generated file /tmp/flights/07-2024-City110-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City91-flights.json with 72 flights.
Generated file /tmp/flights/07-2024-City50-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City78-flig

Generated file /tmp/flights/07-2024-City44-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City14-flights.json with 79 flights.
Generated file /tmp/flights/07-2024-City55-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City130-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City29-flights.json with 65 flights.
Generated file /tmp/flights/07-2024-City69-flights.json with 94 flights.
Generated file /tmp/flights/07-2024-City126-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City146-flights.json with 88 flights.
Generated file /tmp/flights/07-2024-City31-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City50-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City137-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City117-flights.json with 53 flights.
Generated file /tmp/flights/07-2024-City55-flights.json with 73 flights.
Generated file /tmp/flights/07-2024-City4-flig

Generated file /tmp/flights/07-2024-City96-flights.json with 74 flights.
Generated file /tmp/flights/07-2024-City15-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City131-flights.json with 53 flights.
Generated file /tmp/flights/07-2024-City117-flights.json with 89 flights.
Generated file /tmp/flights/07-2024-City77-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City98-flights.json with 70 flights.
Generated file /tmp/flights/07-2024-City6-flights.json with 85 flights.
Generated file /tmp/flights/07-2024-City73-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City60-flights.json with 81 flights.
Generated file /tmp/flights/07-2024-City15-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City60-flights.json with 77 flights.
Generated file /tmp/flights/07-2024-City97-flights.json with 76 flights.
Generated file /tmp/flights/07-2024-City165-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City48-flight

Generated file /tmp/flights/07-2024-City80-flights.json with 64 flights.
Generated file /tmp/flights/07-2024-City116-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City61-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City91-flights.json with 80 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 98 flights.
Generated file /tmp/flights/07-2024-City124-flights.json with 84 flights.
Generated file /tmp/flights/07-2024-City69-flights.json with 75 flights.
Generated file /tmp/flights/07-2024-City68-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City141-flights.json with 66 flights.
Generated file /tmp/flights/07-2024-City18-flights.json with 82 flights.
Generated file /tmp/flights/07-2024-City121-flights.json with 96 flights.
Generated file /tmp/flights/07-2024-City75-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City21-flights.json with 93 flights.
Generated file /tmp/flights/07-2024-City45-flig

Generated file /tmp/flights/07-2024-City95-flights.json with 92 flights.
Generated file /tmp/flights/07-2024-City103-flights.json with 59 flights.
Generated file /tmp/flights/07-2024-City85-flights.json with 88 flights.
Generated file /tmp/flights/07-2024-City89-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City117-flights.json with 63 flights.
Generated file /tmp/flights/07-2024-City158-flights.json with 97 flights.
Generated file /tmp/flights/07-2024-City70-flights.json with 68 flights.
Generated file /tmp/flights/07-2024-City18-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City115-flights.json with 74 flights.
Generated file /tmp/flights/07-2024-City20-flights.json with 100 flights.
Generated file /tmp/flights/07-2024-City59-flights.json with 50 flights.
Generated file /tmp/flights/07-2024-City103-flights.json with 62 flights.
Generated file /tmp/flights/07-2024-City93-flights.json with 55 flights.
Generated file /tmp/flights/07-2024-City119-f

Generated file /tmp/flights/07-2024-City17-flights.json with 67 flights.
Generated file /tmp/flights/07-2024-City105-flights.json with 83 flights.
Generated file /tmp/flights/07-2024-City4-flights.json with 61 flights.
Generated file /tmp/flights/07-2024-City27-flights.json with 99 flights.
Generated file /tmp/flights/07-2024-City126-flights.json with 94 flights.
Generated file /tmp/flights/07-2024-City99-flights.json with 86 flights.
Generated file /tmp/flights/07-2024-City104-flights.json with 95 flights.
Generated file /tmp/flights/07-2024-City30-flights.json with 76 flights.


In [4]:
import os
import json
import glob
import statistics
from collections import defaultdict
import time

# Function to process JSON files
def process_json_files(directory):
    total_records = 0
    dirty_records = 0
    flights_data = []
    start_time = time.time()

    # Dictionary to track passengers arrived and left
    passengers_arrived = defaultdict(int)
    passengers_left = defaultdict(int)

    # Dictionary to store flight durations by destination city
    flight_durations = defaultdict(list)

    # Process each JSON file in the directory
    for file_path in glob.glob(os.path.join(directory, '*.json')):
        with open(file_path, 'r') as file:
            try:
                flights = json.load(file)
                total_records += len(flights)

                for flight in flights:
                    # Check for dirty records
                    if any(value is None for value in flight.values()):
                        dirty_records += 1
                    
                    # Count passengers arriving and leaving
                    if flight['origin_city'] is not None:
                        passengers_left[flight['origin_city']] += flight['passengers_on_board'] or 0
                    if flight['destination_city'] is not None:
                        passengers_arrived[flight['destination_city']] += flight['passengers_on_board'] or 0

                    # Collect flight durations by destination city
                    if flight['destination_city'] is not None and flight['flight_duration_secs'] is not None:
                        flight_durations[flight['destination_city']].append(flight['flight_duration_secs'])

            except json.JSONDecodeError:
                print(f"Error decoding JSON in file: {file_path}")

    end_time = time.time()
    total_duration = end_time - start_time

    # Calculate AVG and P95 flight duration for top 25 destination cities
    top_25_cities = sorted(flight_durations.keys(), key=lambda city: len(flight_durations[city]), reverse=True)[:25]

    avg_flight_durations = {}
    p95_flight_durations = {}

    for city in top_25_cities:
        durations = flight_durations[city]
        avg_flight_durations[city] = statistics.mean(durations) if durations else None
        p95_flight_durations[city] = statistics.quantiles(durations, n=20)[18] if durations else None

    # Find cities with max passengers arrived and left
    max_arrived_city = max(passengers_arrived, key=passengers_arrived.get) if passengers_arrived else None
    max_left_city = max(passengers_left, key=passengers_left.get) if passengers_left else None

    # Print results
    print(f"Total records processed: {total_records}")
    print(f"Dirty records (with NULL values): {dirty_records}")
    print(f"Total run duration: {total_duration:.2f} seconds\n")

    print("Average and P95 flight duration for Top 25 destination cities:")
    for city in top_25_cities:
        print(f"{city}:")
        print(f"  AVG duration: {avg_flight_durations[city]:.2f} seconds" if avg_flight_durations[city] is not None else "  AVG duration: N/A")
        print(f"  P95 duration: {p95_flight_durations[city]:.2f} seconds" if p95_flight_durations[city] is not None else "  P95 duration: N/A")
    print()

    print("Cities with maximum passengers arrived and left:")
    print(f"Max passengers arrived: {max_arrived_city} ({passengers_arrived[max_arrived_city]} passengers)" if max_arrived_city else "No data available")
    print(f"Max passengers left: {max_left_city} ({passengers_left[max_left_city]} passengers)" if max_left_city else "No data available")

# Main function
def main():
    directory = '/tmp/flights'
    process_json_files(directory)

if __name__ == "__main__":
    main()


Total records processed: 14652
Dirty records (with NULL values): 241
Total run duration: 1.12 seconds

Average and P95 flight duration for Top 25 destination cities:
City38:
  AVG duration: 9036.63 seconds
  P95 duration: 13923.15 seconds
City87:
  AVG duration: 9424.66 seconds
  P95 duration: 14083.75 seconds
City81:
  AVG duration: 8544.85 seconds
  P95 duration: 13957.40 seconds
City97:
  AVG duration: 8482.71 seconds
  P95 duration: 13812.15 seconds
City62:
  AVG duration: 9096.78 seconds
  P95 duration: 13911.20 seconds
City50:
  AVG duration: 9319.42 seconds
  P95 duration: 13627.40 seconds
City8:
  AVG duration: 8479.40 seconds
  P95 duration: 13810.40 seconds
City88:
  AVG duration: 9247.82 seconds
  P95 duration: 13784.50 seconds
City14:
  AVG duration: 8687.16 seconds
  P95 duration: 14115.00 seconds
City96:
  AVG duration: 9114.72 seconds
  P95 duration: 13660.20 seconds
City10:
  AVG duration: 9089.16 seconds
  P95 duration: 13639.80 seconds
City5:
  AVG duration: 9277.29 s