In [7]:
# import necessary packages
import numpy as np
import pandas as pd
import datetime
# import MySQLdb

libraries = {'numpy': np, 'pandas': pd, 'datetime': datetime}

In [140]:
tm = pd.DataFrame(np.array([['2013-01-01', 108.614], ['2013-02-01', 108.034], ['2013-03-01', 108.769], ['2013-04-01', 110.209]]),
                  columns=['Three_Month_Start_Date', 'Average_Flying_Hours'])

tm['Three_Month_Start_Date'] = tm['Three_Month_Start_Date'].astype('datetime64')
tm['Average_Flying_Hours'] = tm['Average_Flying_Hours'].astype('float')

# print(tm.head())
# print(tm.dtypes)

In [141]:
wq = pd.DataFrame(np.array([[.001, .35, 0.24, 0.52], [.01, 7.29, 5.61, 9.38], [.02, 18.2, 14.56, 22.55], [.03, 31.17, 25.51, 37.77]]),
                  columns=['quantile', 'time', 'lcl', 'ucl'])

wq['quantile'] = wq['quantile'].astype('float') 
wq['time'] = wq['time'].astype('float') 
wq['lcl'] = wq['lcl'].astype('float') 
wq['ucl'] = wq['ucl'].astype('float') 

# print(wq.head())
# print(wq.dtypes)

In [142]:
def domain_check(tm, wq, X, q_cut, libraries):
    """ Calculates window value columns to determine what periods a flight record could be grouped into.
    Example: '2016-01-04' would return window values '2015-11-01', '2015-12-01', '2016-01-01'.
    Args:
        tm: date frame from previous function
        wq: currently only works where period = 3. Long-term, will try and make dynamic based on this value
        X: quantile for domain check (note: converted to percentage)
        libraries: dictionary of libraries; access by name
    Returns:
       windowed: data frame with window values where record is active
    """
    pd = libraries['pandas']

    # initialize output data frame
    out = pd.DataFrame(index=[0], columns={'domain_check'})

    # compute fleet90_75 NOTE: this may require debug due to weird dict issue
    fleet90_75 = tm['Average_Flying_Hours'].quantile(X * .01)

    # keep only relevant columns from weibull quantiles
    wq = wq[['quantile', 'time']]

    # sort by quantile ascending
    wq_sort = wq.sort_values(by=['quantile'], axis=0).reset_index(drop=True)
    
    # cut off low end of quantile values
    try:
        wq_sort = wq_sort.iloc[q_cut:]
    
        # create an incremental time column & fillna's with time for missing (first) value
        wq_sort['incremental_time'] = wq_sort['time'] - wq_sort['time'].shift(periods=1)
        wq_sort['incremental_time'] = wq_sort['incremental_time'].fillna(wq_sort['time'])

        # compute min_inc_time
        min_inc_time = min(wq_sort['incremental_time'])
     
    except ValueError:
        print("Cutoff value out-of-bounds. Resetting to 0 for this test.")
        
        wq_sort = wq.sort_values(by=['quantile'], axis=0).reset_index(drop=True)
        
        # create an incremental time column & fillna's with time for missing (first) value
        wq_sort['incremental_time'] = wq_sort['time'] - wq_sort['time'].shift(periods=1)
        wq_sort['incremental_time'] = wq_sort['incremental_time'].fillna(wq_sort['time'])

        # compute min_inc_time
        min_inc_time = min(wq_sort['incremental_time'])
        
    
    # determine if it fails the domain check
    if min_inc_time > fleet90_75:
        out.iloc[0]['domain_check'] = 'Fail'
        print("{} percentile of flight hours accrued in 90 days, {}, is more than the minimum time change between 0.01 quantile increments, {}. Domain Check fails.".format(X, fleet90_75, min_inc_time))

    else:
        out.iloc[0]['domain_check'] = 'Pass'
        print("{} percentile of flight hours accrued in 90 days, {}, is less than the minimum time change between 0.01 quantile increments, {}. Domain Check passes.".format(X, fleet90_75, min_inc_time))

    return wq_sort, out

In [143]:
X = 5
quant_cut = 10
wq, df_out = domain_check(tm, wq, X, quant_cut, libraries)
print(wq.head(10))
# print(df_out.head())

Cutoff value out-of-bounds. Resetting to 0 for this test.
5 percentile of flight hours accrued in 90 days, 108.121, is less than the minimum time change between 0.01 quantile increments, 0.35. Domain Check passes.
   quantile   time  incremental_time
0     0.001   0.35              0.35
1     0.010   7.29              6.94
2     0.020  18.20             10.91
3     0.030  31.17             12.97


In [95]:
# database credentials
dsn_database = "ercm_kc135"
dsn_hostname = "localhost"
dsn_port = 3306
dsn_uid = "root"
dsn_pwd = "root"

# create the database connection
conn = MySQLdb.connect(host = dsn_hostname, port = dsn_port, user = dsn_uid, passwd = dsn_pwd, db = dsn_database)

NameError: name 'MySQLdb' is not defined

In [None]:
# load data from three month flight history collector
query1 = "SELECT * FROM %s WHERE type='tow_ci'" # edit this sql
tm = pd.read_sql(sql = query2, con = conn)

# load data from single weibull quantities
query2 = "SELECT * FROM %s WHERE type='tow_ci'" # edit this sql
wq = pd.read_sql(sql = query2, con = conn)

In [None]:
def fn(conn, libraries, params, predecessors):
    pd = libraries["pandas"]

    # load data from three month flight history collector
    query1 = "SELECT * FROM %s WHERE type='tow_ci'" % predecessors[0]
    three_month_flight_history = pd.read_sql(sql = query2, con = conn)
    
    # load data from single weibull quantities
    query2 = "SELECT * FROM %s WHERE type='tow_ci'" % predecessors[1]
    single_weibull_quantiles = pd.read_sql(sql = query2, con = conn)
    

    # grab parameter values
    X = params['X'] #  default is 75
    df_out = domain_check(three_month_flight_history, single_weibull_quantiles, X, libraries):

    return df_out

In [58]:
# compute fleet90_75
X = 75
fleet90_75 = tm['Average_Flying_Hours'].quantile(.75)
print(fleet90_75)

109.129


In [111]:
# keep only relevant columns from weibull quantiles
wq = wq[['quantile', 'time', 'lcl', 'ucl']]

# sort by quantile ascending and reset the index for later iteration
wq_sort = wq.sort_values(by = ['quantile'], axis=0).reset_index()

time_list = list(wq_sort['time'])
print(time_list)

[0.35, 7.29, 18.2, 31.17]
