In [300]:
import pandas as pd
import numpy as np
from datetime import datetime

## Pilots

In [301]:
n_pilots = 50
pilot_fields = ['sex', 'bd', 'p9', 'p10', 'p11', 'p12', 'p13', 'p14', 'trend']

In [302]:
pilots = pd.DataFrame(np.zeros((n_pilots, len(pilot_fields))), columns=pilot_fields)

In [303]:
pilots['sex'] = np.random.randint(0,2, n_pilots)

In [304]:
base_date = np.datetime64('1960-01-01')
pilots['bd'] = base_date + np.random.randint(0, 365*35, n_pilots)

In [305]:
pilots.iloc[:, 2:8] = np.random.randint(50, 71, (n_pilots, 6))

In [306]:
pilots['trend'] = np.random.randint(-3, 4, n_pilots)

In [307]:
#pilots

In [308]:
pilots.to_excel('Pilots.xlsx')

## Aircrafts

In [309]:
n_aircrafts = 2
aircrafts_fields = ['Name']

In [310]:
aircrafts = pd.DataFrame(np.zeros((n_aircrafts, len(aircrafts_fields))), columns=aircrafts_fields)

In [311]:
aircrafts_names = np.arange(100, 1000)
aircrafts['Name'] = np.random.choice(aircrafts_names, n_aircrafts, replace=False)

In [312]:
# aircrafts

In [313]:
aircrafts.to_excel('Aircrafts.xlsx')

## Flights

In [314]:
flight_fields = ['pilot_id', 'aircraft_id', 'date', 'weather', 'time', 'duration',
                 'sex', 'age', 'p9', 'p10', 'p11', 'p12', 'p13', 'p14']

In [315]:
f = pd.Series(index=flight_fields)

In [316]:
def rand(low, high):
    return np.random.random() * (high - low) + low

In [327]:
base_date = np.datetime64('2017-01-01')

flights = pd.DataFrame(columns=flight_fields)

for day_of_year in range(0, 366):
    date_repeat = np.random.randint(20, 30)
    curdate = base_date + day_of_year
    
    for i in range(date_repeat):
        f = pd.Series(index=flight_fields)
        
        f.pilot_id = np.random.choice(pilots.index.values)
        pilot = pilots.loc[f.pilot_id, :]
        
        f.aircraft_id = np.random.choice(aircrafts.index.values)
        f.date = curdate.__str__()
        f.weather = np.random.randint(0, 2)
        f.time = np.random.randint(0, 2)
        f.duration = np.random.randint(90, 601)
        
        f.sex = pilot['sex']
        birthdate = np.datetime64(pilot['bd'].__str__().split()[0])
        f.age = int((curdate - birthdate).astype('timedelta64[Y]').__str__().split()[0])
        
        trend = pilot['trend']
        
        for feature in ['p9', 'p10', 'p11']:
            f[feature] = (pilot[feature] 
                          + trend * day_of_year * rand(-0.005, 0.01)
                         )
            if f[feature] < 20:
                f[feature] = np.random.randint(10, 21)
            elif f[feature] > 95:
                f[feature] = np.random.randint(95, 101)
            else:
                f[feature] = f[feature].astype(int)
        
        for feature in ['p12', 'p13', 'p14']:
            f[feature] = (pilot[feature] 
                          + trend * day_of_year * rand(-0.03, 0.15)
                          + f['weather'] * rand(-20, 1)
                          + f['time'] * rand(-20, 1)
                          + f['duration'] * rand(-0.002, 0)
                          + f['sex'] * rand(-2, 5)
                          + f['age'] * rand(-0.05, 0.1)
                          + f['p9'] * rand(0, 0.03)
                          + f['p10'] * rand(0, 0.03)
                          + f['p11'] * rand(0, 0.03)
                         )
            if f[feature] < 20:
                f[feature] = np.random.randint(10, 21)
            elif f[feature] > 95:
                f[feature] = np.random.randint(95, 101)
            else:
                f[feature] = f[feature].astype(int)
                
        flights = flights.append(f, ignore_index=True)         
        

In [328]:
flights.to_excel('Flights.xlsx')