#Objective

#Notebook Initialization

##Import Packages

In [0]:
from pyspark.sql.functions import col,isnan,when,count,lit, to_date,lpad,date_format,rpad,regexp_replace,concat,to_utc_timestamp,to_timestamp, countDistinct,unix_timestamp, row_number, when
from pyspark.sql.types import IntegerType,BooleanType,DateType,StringType,TimestampType
from pyspark.sql import DataFrameNaFunctions
from pyspark import StorageLevel
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from pytz import timezone
import datetime
from pyspark.ml import Pipeline, PipelineModel
from pyspark.ml.feature import StringIndexer, VectorAssembler, OneHotEncoder, StandardScaler, PCA, VectorSlicer, Imputer
from pyspark.ml.linalg import Vectors
from pyspark.sql.functions import percent_rank
from pyspark.sql import Window
from pyspark.ml.classification import LogisticRegression, RandomForestClassifier, GBTClassifier, DecisionTreeClassifier
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.sql import functions as f

##Cloud Storage Parameters

In [0]:
blob_container = "tm30container" # The name of your container created in https://portal.azure.com
storage_account = "w261tm30" # The name of your Storage account created in https://portal.azure.com
secret_scope = "w261tm30" # The name of the scope created in your local computer using the Databricks CLI
secret_key = "tm30key" # The name of the secret key created in your local computer using the Databricks CLI 
blob_url = f"wasbs://{blob_container}@{storage_account}.blob.core.windows.net"
mount_path = "/mnt/mids-w261"

test_pq = spark.read.parquet(f"{blob_url}/2022-03-24_data_chkpt_PQ_full")

##Define Functions

In [0]:
def data_pull(df, time_window = 'full', date_col='FLIGHT_UTC_DATE'):
    """Pull processed dataset"""
    if time_window == '2019':
        df = df.filter(f.year(col(date_col)) == 2019)
    elif time_window == '2018':
        df = df.filter(f.year(col(date_col)) == 2018)
    elif time_window == '2017':
        df = df.filter(f.year(col(date_col)) == 2017)
    elif time_window == '2016':
        df = df.filter(f.year(col(date_col)) == 2016) 
    
    #The commands below are for 2015 data
    elif time_window == '6m':
        df = df.filter(col(date_col) < "2015-07-01T00:00:00.000")  
    elif time_window == '3m':
        df = df.filter(col(date_col) < "2015-04-01T00:00:00.000")
        #comment this out if it takes too long
    
    print(f'{df.count():,} total records imported for the {time_window} dataset')
    return df

In [0]:
def pre_pipeline(index_cols, cont_cols, cat_cols, pred_cols):
    ''' This function creates a pre-processed pipeline to be used to prepare for crossfold validation and model training
    '''
    pre_pipeline = None
    
    #Convert string to index
    indexer = StringIndexer(inputCols=cat_cols, outputCols=[c+"_idx" for c in cat_cols]).setHandleInvalid("keep")

    #Convert categorical columns to index
    encoder = OneHotEncoder(inputCols=[c+"_idx" for c in cat_cols], outputCols= [c+"_OHE" for c in cat_cols])
    
    #Vector assembler for categorical
    assembler_cat = VectorAssembler(inputCols= [x+"_OHE" for x in cat_cols], outputCol="cat_features")

    #Ensure continuous variables have values
    imputer = Imputer(inputCols=cont_cols, outputCols=cont_cols)
        
    assembler_lab = StringIndexer(inputCol='DEP_DEL15', outputCol="label")
        
    pre_pipeline = Pipeline(stages=[indexer, encoder, assembler_cat, imputer, assembler_lab])
    
    return pre_pipeline

In [0]:
def scaled_pipeline(model):
    ''' This function creates a scaled processed and scaled pipeline to be used to train models.
        Parameters:
            model:    lr = Logistic Regression;
                      rf = Random Forest 
                      dt = Decision Trees
        Returns: a pipeline model
    '''
    pipeline_model = None
    
    #Assemble cont variables
    assembler_num = VectorAssembler(inputCols=cont_cols, outputCol="scale_nums")
    
    #Scale the values
    scaler = StandardScaler(inputCol="scale_nums", outputCol="scaledFeatures", withStd=True, withMean=True)

    #Vector assembler combined
    assembler = VectorAssembler(inputCols=["scaledFeatures", "cat_features"], outputCol="features")
    
    #Models for the pipeline
    if model == 'lr':
        
        class_model = LogisticRegression(featuresCol = 'features', labelCol = 'label', threshold=0.7)
        
    elif model == 'rf':
        
        class_model = RandomForestClassifier(featuresCol = 'features', labelCol = 'label')
        
    elif model == 'dt':
        class_model = DecisionTreeClassifier(featuresCol = 'features', labelCol = 'label')
        
    pipeline_model = Pipeline(stages=[assembler_num, scaler, assembler, class_model])
    
    return pipeline_model

In [0]:
def custom_CV(df_rank, pre_pipeline, class_model, sample, kfolds):

    #Create evaluation metric lists
    f_5_score_list_CV_average = []
    
    # Create Time Splits
    splits = 1.0/(kfolds + 1)
    cutoff = splits
    
    #Preprocess Pipleine
    fit_df = pre_pipeline.fit(df_rank)
    transform_df = fit_df.transform(df_rank).persist(StorageLevel.MEMORY_AND_DISK)

                
    f_5_score_list = []
    scaled_pipelines = scaled_pipeline('lr')
                    
    for split in range(kfolds):
                        
        train_df = transform_df.where(f"rank <= {cutoff}").cache()
        test_df = transform_df.where(f"rank > {cutoff} and rank <= {cutoff+splits}").cache()
        cutoff += splits

        if sample == 'down':
             train_df = downsample(train_df)
                    
                        #Generate model 
        model = scaled_pipelines.fit(train_df)
        predict = model.transform(test_df)

                        #Calculate evaluation metrics
        evaluatorf_5 = MulticlassClassificationEvaluator(metricName='fMeasureByLabel', metricLabel=1, beta=0.5)
        f_5 = evaluatorf_5.evaluate(predict)
        f_5_score_list.append(f_5)
    
    transform_df.unpersist()
    return predict, np.mean(f_5_score_list)

In [0]:
def downsample(train_df):
    '''Downsample minority class to balance classes. 
    Only works if delay count is less than on_time count (should be the case...)'''
    
    delay_count = train_df.filter(col("label") == 1).count()
    on_time_count = train_df.filter(col("label") == 0).count()
    
    total_count = delay_count + on_time_count
    delay_pct = delay_count / on_time_count
    
    train_delay = train_df.filter(col('label') == 1)
    train_on_time = train_df.filter(col('label') == 0).sample(withReplacement=False, fraction = delay_pct, seed= 2022)
    train_downsampled = train_delay.union(train_on_time)
    
    return train_downsampled

#Pipeline Initialization

In [0]:
#Last minute data curation...
test_pq = test_pq.na.replace('', None, 'wnd_type')\
            .na.replace('', None, 'ga1_cld')\
            .na.replace('', None, 'ga1_cov')\
            .withColumn('wnd_dir_angle',col('wnd_dir_angle').cast(IntegerType()))\
            .withColumn('ka1_temp', when(f.isnull('ka1_temp'), '0').when(f.col('ka1_temp') < 0, -1).otherwise('1'))\
            .withColumn('FLIGHT_ROUTE', concat(col('ORIGIN'),lit("-"),col('DEST')))

df_2015_2018 = test_pq.filter(col('FLIGHT_UTC_DATE') < "2019-01-01T00:00:00.000")

df_6m = data_pull(test_pq, time_window='6m', date_col='FLIGHT_UTC_DATE')

df_3m = data_pull(test_pq, time_window='3m', date_col='FLIGHT_UTC_DATE')

df_small_test = test_pq.filter(col('FLIGHT_UTC_DATE') < "2015-02-01T00:00:00.000")

df_2019 = data_pull(test_pq, time_window='2019', date_col='FLIGHT_UTC_DATE')

In [0]:
df_2015_2018.select('FLIGHT_ROUTE').distinct().count()

In [0]:
df_small_test = test_pq.filter(col('FLIGHT_UTC_DATE') < "2015-03-01T00:00:00.000")

In [0]:
(df_small_test.count(), len(df_small_test.columns))

##Chosen Model Columns

In [0]:
index_cols = ['UNIQUE_ID','FLIGHT_UTC_DATE', 'rank']

cat_cols = ['TIME_OF_DAY', 'MONTH', 'DAY_OF_WEEK', 'OP_UNIQUE_CARRIER', 'wnd_type', 'cig_ceil_is_qual', 'tmp_air_is_qual',  'slp_prs_is_qual', 'ga1_cov','ga1_cld', 'ga1_bs_ht_is_qual', 'wnd_spd_is_qual', 'ga1_cld_qual', 'dew_pnt_is_qual', 'ga1_cov_is_qual', 'aa1_is_qual', 'vis_dist_is_qual', 'ka1_temp', 'FLIGHT_ROUTE']


cont_cols = ['ELEVATION', 'wnd_dir_angle', 'wnd_spd_rate', 'cig_ceil_ht', 'vis_dist', 'tmp_air', 'dew_pnt_tmp','slp_prs', 'aa1_prd_quant_hr', 'aa1_dp', 'ga1_bs_ht']

pred_cols = ['DEP_DEL15']

##Test Pipeline Function

In [0]:
df_test = df_6m.filter(col('FLIGHT_UTC_DATE') < "2015-02-01T00:00:00.000")
df_test.count()

In [0]:
df_validate = df_6m.filter((col('FLIGHT_UTC_DATE') > "2015-02-01T00:00:00.000") &\
                          (col('FLIGHT_UTC_DATE') < "2015-04-01T00:00:00.000"))

df_validate.count()

In [0]:
display(df_test)

In [0]:
pre_pipe_test = pre_pipeline(index_cols, cont_cols, cat_cols, pred_cols)

fit_test = pre_pipe_test.fit(df_test)
transform_test = fit_test.transform(df_test)

df_test_ds = downsample(transform_test)
# display(df_test_ds)

In [0]:
param_grid = {'maxIter': 1, 'regParam': 0, 'elasticNetParam': .5}
scaled_pipelines = scaled_pipeline('lr', param_grid)

test_model = scaled_pipelines.fit(df_test_ds)

In [0]:
transform_validate = fit_test.transform(df_validate)

transform = test_model.transform(transform_validate)

In [0]:
#calcuate f Score
evaluatorf_5 = MulticlassClassificationEvaluator(metricName='fMeasureByLabel', metricLabel=1, beta=0.5)
f_5 = evaluatorf_5.evaluate(transform)

print('F Score: {:3f}\nParam Grid: {}'.format(f_5, param_grid.items()))

In [0]:
#calcuate f Score
evaluatorf_5 = MulticlassClassificationEvaluator(metricName='fMeasureByLabel', metricLabel=1, beta=0.5)
f_5 = evaluatorf_5.evaluate(transform)

print('F Score: {:3f}\nParam Grid: {}'.format(f_5, param_grid.items()))

##Test Crossfold Pipeline

In [0]:
# Create Time Splits
splits = 1/(5 + 1)
cutoff += splits 

train_df_split = train_test_window.where(f"rank <= {cutoff}").cache()
test_df = train_test_window.where(f"rank > {cutoff} and rank <= {cutoff+splits}").cache()

In [0]:
#Initialize Pre-processing Pipeline
pre_pipe = pre_pipeline(index_cols, cont_cols, cat_cols, pred_cols)

In [0]:
fit_test = pre_pipe.fit(train_df_split)
transform_test = fit_test.transform(train_df_split)

df_test_ds = downsample(transform_test)

In [0]:
param_grid = {'maxIter': 1, 'regParam': 0, 'elasticNetParam': .5}
scaled_pipelines = scaled_pipeline('lr', param_grid)

test_model = scaled_pipelines.fit(df_test_ds)

In [0]:
transform_validate = fit_test.transform(test_df)

transform = test_model.transform(transform_validate)

In [0]:
#calcuate f Score
evaluatorf_5 = MulticlassClassificationEvaluator(metricName='fMeasureByLabel', metricLabel=1, beta=0.5)
f_5 = evaluatorf_5.evaluate(transform)

print('F Score: {:3f}\nParam Grid: {}'.format(f_5, param_grid.items()))

#Create Baseline Models

In [0]:
#Modify to switch from test to scaled up model
df_model = df_small_test

In [0]:
#Add rank to allow forx custom crossvalidation and windowing
train_test_window = df_model.withColumn("rank", percent_rank().over(Window.partitionBy().orderBy("FLIGHT_UTC_DATE")))

In [0]:
# #Create pipeline for Logistic Regression
# lr_model = scaled_pipeline('lr')

# #Create pipeline for Random Forest Classification
# rf_model = scaled_pipeline('rf')

# #Create pipeline for Decision Tree Classification
# dt_model = scaled_pipeline('dt')

##Logistic Regression Pipeline

In [0]:
df_predict, f_5 = custom_CV(train_test_window, pre_pipe, 'lr', 'down', 5)

In [0]:
f_5

In [0]:
display(df_predict)

UNIQUE_ID,FLIGHT_UTC_DATE,WEATHER_UTC_DATE,TIME_OF_DAY,STATION,NAME,MONTH,DAY_OF_WEEK,OP_UNIQUE_CARRIER,TAIL_NUM,ORIGIN,DEST,DEP_DEL15,DEP_DELAY_NEW,ARR_DELAY_NEW,CRS_ELAPSED_TIME,SOURCE,LATITUDE,LONGITUDE,ELEVATION,CALL_SIGN,wnd_dir_angle,wnd_dir_qual,wnd_type,wnd_spd_rate,wnd_spd_qual,wnd_ex,wnd_dir_is_qual,wnd_spd_is_qual,cig_ceil_ht,cig_ceil_qual,cig_ceil_det,cig_cavok,cig_ex,cig_cavok_bool,cig_ceil_is_qual,vis_dist,vis_dist_qual,vis_dist_var,vis_dist_qual_var,vis_ex,vis_dist_var_bool,vis_dist_is_qual,vis_dist_is_qual_var,tmp_air,tmp_air_qual,tmp_ex,tmp_air_is_qual,dew_pnt_tmp,dew_pnt_qual,dew_ex,dew_pnt_is_qual,slp_prs,slp_prs_qual,slp_ex,slp_prs_is_qual,aa1_prd_quant_hr,aa1_dp,aa1_cond,aa1_qual,aa1_ex,aa1_is_qual,aj1_dim,aj1_cond,aj1_qual,aj1_eq_wtr_dp,aj1_eq_wtr_cond,aj1_eq_wtr_cond_qual,aj1_ex,aj1_is_qual,aj1_eq_wtr_cond_is_qual,ga1_cov,ga1_cov_qual,ga1_bs_ht,ga1_bs_ht_qual,ga1_cld,ga1_cld_qual,ga1_ex,ga1_cov_is_qual,ga1_bs_ht_is_qual,ka1_prd_quant,ka1_code,ka1_temp,ka1_temp_qual,ka1_ex,ka1_temp_is_qual,at1_src_elem,at1_wthr,at1_wthr_abrv,at1_qual,at1_ex,at1_is_qual,ax1_atm,ax1_qual,ax1_prd_quant,ax1_prd_qual,ax1_ex,ax1_is_qual,ax1_prd_is_qual,FLIGHT_ROUTE,rank,TIME_OF_DAY_idx,MONTH_idx,DAY_OF_WEEK_idx,OP_UNIQUE_CARRIER_idx,wnd_type_idx,cig_ceil_is_qual_idx,tmp_air_is_qual_idx,slp_prs_is_qual_idx,ga1_cov_idx,ga1_cld_idx,ga1_bs_ht_is_qual_idx,wnd_spd_is_qual_idx,ga1_cld_qual_idx,dew_pnt_is_qual_idx,ga1_cov_is_qual_idx,aa1_is_qual_idx,vis_dist_is_qual_idx,ka1_temp_idx,FLIGHT_ROUTE_idx,TIME_OF_DAY_OHE,MONTH_OHE,DAY_OF_WEEK_OHE,OP_UNIQUE_CARRIER_OHE,wnd_type_OHE,cig_ceil_is_qual_OHE,tmp_air_is_qual_OHE,slp_prs_is_qual_OHE,ga1_cov_OHE,ga1_cld_OHE,ga1_bs_ht_is_qual_OHE,wnd_spd_is_qual_OHE,ga1_cld_qual_OHE,dew_pnt_is_qual_OHE,ga1_cov_is_qual_OHE,aa1_is_qual_OHE,vis_dist_is_qual_OHE,ka1_temp_OHE,FLIGHT_ROUTE_OHE,cat_features,label,scale_nums,scaledFeatures,features,rawPrediction,probability,prediction
AA-1301-2015-01-26 08:21:00,2015-01-26T14:21:00.000+0000,2015-01-26T11:53:00.000+0000,Morning,72658014922,"MINNEAPOLIS ST PAUL INTERNATIONAL AIRPORT, MN US",JAN,MONDAY,AA,N528AA,MSP,DFW,0,0,0,164,7,44.8831,-93.2289,265.8,KMSP,150,5,N,46,5,1,1,1,335,5,M,N,1,1,1,16093,5,N,5,1,1,1,1,-28,5,1,1,-50,5,1,1,10084,5,1,1,1,0,,5.0,1,1,3.0,,5.0,250.0,,9.0,1,1,1,8,5,335,5,,1,1,1,1,60.0,M,-1,1.0,1,1,,,,,1,1,,,,,1,1,1,MSP-DFW,0.8333819545738695,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,408.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(408), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 16, 25, 28, 30, 32, 36, 44, 46, 48, 49, 51, 53, 55, 59, 468), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(265.8, 150.0, 46.0, 335.0, 16093.0, -28.0, -50.0, 10084.0, 1.0, 0.0, 335.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.013887385846916109, -0.482164165889995, 0.2967675033175611, -0.8162237275132836, 0.7111340197335272, -0.7213405350600057, -0.37227050723632843, 0.550674592157361, -0.009607333673014263, -0.16855228591298274, -0.39800155197083564))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 27, 36, 39, 41, 43, 47, 55, 57, 59, 60, 62, 64, 66, 70, 479), values -> List(-0.013887385846916109, -0.482164165889995, 0.2967675033175611, -0.8162237275132836, 0.7111340197335272, -0.7213405350600057, -0.37227050723632843, 0.550674592157361, -0.009607333673014263, -0.16855228591298274, -0.39800155197083564, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.5265636472951348, -0.5265636472951348))","Map(vectorType -> dense, length -> 2, values -> List(0.6286812812702535, 0.37131871872974653))",0.0
AA-1390-2015-01-26 06:22:00,2015-01-26T14:22:00.000+0000,2015-01-26T11:51:00.000+0000,Morning,72290023188,"SAN DIEGO INTERNATIONAL AIRPORT, CA US",JAN,MONDAY,AA,N471AA,SAN,DFW,0,0,0,183,7,32.7336,-117.1831,4.6,KSAN,201,9,C,0,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,172,5,1,1,6,5,1,1,10149,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,2,5,4572,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,SAN-DFW,0.8333837665455666,0.0,0.0,2.0,4.0,1.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,354.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(354), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 16, 26, 28, 30, 32, 34, 44, 46, 48, 49, 51, 53, 55, 58, 414), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(4.6, 201.0, 0.0, 22000.0, 16093.0, 172.0, 6.0, 10149.0, 1.0, 0.0, 4572.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.6229740154820835, 0.020211930374550338, -1.442631875362315, 1.367860018044355, 0.7111340197335272, 1.3136162593391945, 0.18964481341467032, 0.5652828205868823, -0.009607333673014263, -0.16855228591298274, -0.25846183249998955))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 27, 37, 39, 41, 43, 45, 55, 57, 59, 60, 62, 64, 66, 69, 425), values -> List(-0.6229740154820835, 0.020211930374550338, -1.442631875362315, 1.367860018044355, 0.7111340197335272, 1.3136162593391945, 0.18964481341467032, 0.5652828205868823, -0.009607333673014263, -0.16855228591298274, -0.25846183249998955, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(1.0917112616038072, -1.0917112616038072))","Map(vectorType -> dense, length -> 2, values -> List(0.7487038263371574, 0.2512961736628426))",0.0
MQ-3430-2015-01-26 08:22:00,2015-01-26T14:22:00.000+0000,2015-01-26T11:52:00.000+0000,Morning,72546014933,"DES MOINES INTERNATIONAL AIRPORT, IA US",JAN,MONDAY,MQ,N900MQ,DSM,ORD,0,0,0,88,6,41.5338,-93.653,291.7,KDSM,220,5,N,41,5,1,1,1,427,5,M,N,1,1,1,16093,5,N,5,1,1,1,1,0,5,1,1,-30,5,1,1,0,9,1,1,1,1,,,1,1,,,,,,,1,1,1,7,5,427,5,,1,1,1,1,,,0,,1,1,,,,,1,1,,,,,1,1,1,DSM-ORD,0.8333837665455666,0.0,0.0,2.0,7.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,436.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(7), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(436), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 19, 25, 28, 30, 32, 35, 44, 46, 48, 49, 51, 53, 55, 57, 496), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(291.7, 220.0, 41.0, 427.0, 16093.0, 0.0, -30.0, 0.0, 1.0, 1.0, 427.0))","Map(vectorType -> dense, length -> 11, values -> List(0.04650826387571338, 0.20737165251232215, 0.1077023534610528, -0.8069490584806825, 0.7111340197335272, -0.4364465838441178, -0.17158646414668602, -1.715623492200991, -0.009607333673014263, -0.06976860237201812, -0.39497165954899993))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 30, 36, 39, 41, 43, 46, 55, 57, 59, 60, 62, 64, 66, 68, 507), values -> List(0.04650826387571338, 0.20737165251232215, 0.1077023534610528, -0.8069490584806825, 0.7111340197335272, -0.4364465838441178, -0.17158646414668602, -1.715623492200991, -0.009607333673014263, -0.06976860237201812, -0.39497165954899993, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.13959352926166715, -0.13959352926166715))","Map(vectorType -> dense, length -> 2, values -> List(0.5348418223433464, 0.46515817765665357))",0.0
MQ-3430-2015-01-26 08:22:00,2015-01-26T14:22:00.000+0000,2015-01-26T11:54:00.000+0000,Morning,72546014933,"DES MOINES INTERNATIONAL AIRPORT, IA US",JAN,MONDAY,MQ,N900MQ,DSM,ORD,0,0,0,88,7,41.5338,-93.653,291.7,KDSM,230,5,N,41,5,1,1,1,427,5,M,N,1,1,1,16093,5,N,5,1,1,1,1,6,5,1,1,-33,5,1,1,10147,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,7,5,427,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,DSM-ORD,0.8333837665455666,0.0,0.0,2.0,7.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,436.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(7), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(436), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 19, 25, 28, 30, 32, 35, 44, 46, 48, 49, 51, 53, 55, 58, 496), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(291.7, 230.0, 41.0, 427.0, 16093.0, 6.0, -33.0, 10147.0, 1.0, 0.0, 427.0))","Map(vectorType -> dense, length -> 11, values -> List(0.04650826387571338, 0.30587676942693887, 0.1077023534610528, -0.8069490584806825, 0.7111340197335272, -0.37539788001214175, -0.2016890706101324, 0.5648333366352047, -0.009607333673014263, -0.16855228591298274, -0.39497165954899993))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 30, 36, 39, 41, 43, 46, 55, 57, 59, 60, 62, 64, 66, 69, 507), values -> List(0.04650826387571338, 0.30587676942693887, 0.1077023534610528, -0.8069490584806825, 0.7111340197335272, -0.37539788001214175, -0.2016890706101324, 0.5648333366352047, -0.009607333673014263, -0.16855228591298274, -0.39497165954899993, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.359586094792114, -0.359586094792114))","Map(vectorType -> dense, length -> 2, values -> List(0.5889402355881924, 0.4110597644118076))",0.0
DL-1767-2015-01-26 09:23:00,2015-01-26T14:23:00.000+0000,2015-01-26T11:52:00.000+0000,Morning,72219013874,"ATLANTA HARTSFIELD INTERNATIONAL AIRPORT, GA US",JAN,MONDAY,DL,N957DL,ATL,FNT,0,0,0,115,7,33.6301,-84.4418,307.8,KATL,290,5,N,62,5,1,1,1,518,5,M,N,1,1,1,16093,5,N,5,1,1,1,1,67,5,1,1,39,5,1,1,10082,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,7,5,518,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,ATL-FNT,0.8333892024606576,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1751.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(1751), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 13, 25, 28, 30, 32, 35, 44, 46, 48, 49, 51, 53, 55, 58, 1811), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(307.8, 290.0, 62.0, 518.0, 16093.0, 67.0, 39.0, 10082.0, 1.0, 0.0, 518.0))","Map(vectorType -> dense, length -> 11, values -> List(0.08405150559518586, 0.8969074709146393, 0.9017759828583876, -0.797775201068001, 0.7111340197335272, 0.2452639422796143, 0.5207734845125803, 0.5502251082056834, -0.009607333673014263, -0.16855228591298274, -0.39197470074044505))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 24, 36, 39, 41, 43, 46, 55, 57, 59, 60, 62, 64, 66, 69, 1822), values -> List(0.08405150559518586, 0.8969074709146393, 0.9017759828583876, -0.797775201068001, 0.7111340197335272, 0.2452639422796143, 0.5207734845125803, 0.5502251082056834, -0.009607333673014263, -0.16855228591298274, -0.39197470074044505, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(1.160091424330185, -1.160091424330185))","Map(vectorType -> dense, length -> 2, values -> List(0.7613493267233172, 0.23865067327668277))",0.0
UA-635-2015-01-26 06:23:00,2015-01-26T14:23:00.000+0000,2015-01-26T11:56:00.000+0000,Morning,72386023169,"LAS VEGAS MCCARRAN INTERNATIONAL AIRPORT, NV US",JAN,MONDAY,UA,N496UA,LAS,LAX,0,0,0,75,7,36.0719,-115.1634,664.5,KLAS,210,5,N,41,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,106,5,1,1,0,5,1,1,10192,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,4,5,7620,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,LAS-LAX,0.8333892024606576,0.0,0.0,2.0,5.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(5), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(3), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(6), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 17, 25, 28, 30, 32, 37, 44, 46, 48, 49, 51, 53, 55, 58, 66), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(664.5, 210.0, 41.0, 22000.0, 16093.0, 106.0, 0.0, 10192.0, 1.0, 0.0, 7620.0))","Map(vectorType -> dense, length -> 11, values -> List(0.9158325193427518, 0.1088665355977054, 0.1077023534610528, 1.367860018044355, 0.7111340197335272, 0.6420805171874583, 0.1294396004877776, 0.5749467255479502, -0.009607333673014263, -0.16855228591298274, -0.15808017922004172))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 28, 36, 39, 41, 43, 48, 55, 57, 59, 60, 62, 64, 66, 69, 77), values -> List(0.9158325193427518, 0.1088665355977054, 0.1077023534610528, 1.367860018044355, 0.7111340197335272, 0.6420805171874583, 0.1294396004877776, 0.5749467255479502, -0.009607333673014263, -0.16855228591298274, -0.15808017922004172, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.8315861463180845, -0.8315861463180845))","Map(vectorType -> dense, length -> 2, values -> List(0.6966902076303192, 0.3033097923696808))",0.0
UA-672-2015-01-26 08:24:00,2015-01-26T14:24:00.000+0000,2015-01-26T11:53:00.000+0000,Morning,72243012960,"HOUSTON INTERCONTINENTAL AIRPORT, TX US",JAN,MONDAY,UA,N813UA,IAH,ORD,0,0,4,160,7,29.98,-95.36,29.0,KIAH,310,5,N,21,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,89,5,1,1,-17,5,1,1,10224,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,4,5,7620,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,IAH-ORD,0.8333928264040515,0.0,0.0,2.0,5.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,179.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(5), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(3), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(179), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 17, 25, 28, 30, 32, 37, 44, 46, 48, 49, 51, 53, 55, 58, 239), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(29.0, 310.0, 21.0, 22000.0, 16093.0, 89.0, -17.0, 10224.0, 1.0, 0.0, 7620.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.5660761833109577, 1.0939177047438728, -0.6485582459649804, 1.367860018044355, 0.7111340197335272, 0.4691091896635263, -0.041141836138418444, 0.5821384687747915, -0.009607333673014263, -0.16855228591298274, -0.15808017922004172))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 28, 36, 39, 41, 43, 48, 55, 57, 59, 60, 62, 64, 66, 69, 250), values -> List(-0.5660761833109577, 1.0939177047438728, -0.6485582459649804, 1.367860018044355, 0.7111340197335272, 0.4691091896635263, -0.041141836138418444, 0.5821384687747915, -0.009607333673014263, -0.16855228591298274, -0.15808017922004172, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.6918224261350767, -0.6918224261350767))","Map(vectorType -> dense, length -> 2, values -> List(0.6663722118241444, 0.3336277881758556))",0.0
AA-1296-2015-01-26 08:24:00,2015-01-26T14:24:00.000+0000,2015-01-26T11:53:00.000+0000,Morning,72254013904,"AUSTIN BERGSTROM INTERNATIONAL AIRPORT, TX US",JAN,MONDAY,AA,N505AA,AUS,DFW,0,0,0,66,7,30.1831,-97.6799,146.3,KAUS,201,9,C,0,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,33,5,1,1,0,5,1,1,10241,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,0,5,99999,9,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,AUS-DFW,0.8333928264040515,0.0,0.0,2.0,4.0,1.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,114.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(114), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 16, 26, 28, 30, 32, 38, 44, 46, 48, 49, 51, 53, 55, 58, 174), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(146.3, 201.0, 0.0, 22000.0, 16093.0, 33.0, 0.0, 10241.0, 1.0, 0.0, 99999.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.2925468507833729, 0.020211930374550338, -1.442631875362315, 1.367860018044355, 0.7111340197335272, -0.10067871276824973, 0.1294396004877776, 0.5859590823640509, -0.009607333673014263, -0.16855228591298274, 2.8842940820490854))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 27, 37, 39, 41, 43, 49, 55, 57, 59, 60, 62, 64, 66, 69, 185), values -> List(-0.2925468507833729, 0.020211930374550338, -1.442631875362315, 1.367860018044355, 0.7111340197335272, -0.10067871276824973, 0.1294396004877776, 0.5859590823640509, -0.009607333673014263, -0.16855228591298274, 2.8842940820490854, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(1.4183340202863703, -1.4183340202863703))","Map(vectorType -> dense, length -> 2, values -> List(0.805077110485897, 0.19492288951410297))",0.0
AA-1184-2015-01-26 07:24:00,2015-01-26T14:24:00.000+0000,2015-01-26T11:52:00.000+0000,Morning,72365023050,"ALBUQUERQUE INTERNATIONAL AIRPORT, NM US",JAN,MONDAY,AA,N4XVAA,ABQ,DFW,0,0,3,101,7,35.0419,-106.6155,1618.5,KABQ,360,5,N,57,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,6,5,1,1,-44,5,1,1,10262,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,0,5,99999,9,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,ABQ-DFW,0.8333928264040515,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1244.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(1244), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 16, 25, 28, 30, 32, 38, 44, 46, 48, 49, 51, 53, 55, 58, 1304), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(1618.5, 360.0, 57.0, 22000.0, 16093.0, 6.0, -44.0, 10262.0, 1.0, 0.0, 99999.0))","Map(vectorType -> dense, length -> 11, values -> List(3.140444482099068, 1.5864432893169564, 0.7127108330018793, 1.367860018044355, 0.7111340197335272, -0.37539788001214175, -0.3120652943094357, 0.5906786638566655, -0.009607333673014263, -0.16855228591298274, 2.8842940820490854))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 27, 36, 39, 41, 43, 49, 55, 57, 59, 60, 62, 64, 66, 69, 1315), values -> List(3.140444482099068, 1.5864432893169564, 0.7127108330018793, 1.367860018044355, 0.7111340197335272, -0.37539788001214175, -0.3120652943094357, 0.5906786638566655, -0.009607333673014263, -0.16855228591298274, 2.8842940820490854, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(2.232945193255058, -2.232945193255058))","Map(vectorType -> dense, length -> 2, values -> List(0.9031692356348547, 0.09683076436514526))",0.0
OO-4624-2015-01-26 06:24:00,2015-01-26T14:24:00.000+0000,2015-01-26T11:49:00.000+0000,Morning,72389093193,"FRESNO YOSEMITE INTERNATIONAL, CA US",JAN,MONDAY,OO,N161PQ,FAT,SLC,0,0,0,105,7,36.78,-119.7194,101.5,KFAT,201,9,C,0,5,1,1,1,30,5,W,N,1,1,1,402,5,N,5,1,1,1,1,30,5,1,1,20,5,1,1,0,9,1,1,1,1,,,1,1,,,,,,,1,1,1,9,5,30,5,,1,1,1,1,,,0,,1,1,,,,,1,1,,,,,1,1,1,FAT-SLC,0.8333928264040515,0.0,0.0,2.0,3.0,1.0,0.0,0.0,0.0,5.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1808.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(3), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(5), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(1808), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 15, 26, 28, 30, 32, 39, 44, 46, 48, 49, 51, 53, 55, 57, 1868), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",0.0,"Map(vectorType -> dense, length -> 11, values -> List(101.5, 201.0, 0.0, 30.0, 402.0, 30.0, 20.0, 0.0, 1.0, 1.0, 30.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.39701500165494835, 0.020211930374550338, -1.442631875362315, -0.8469712715887546, -2.0183778182229957, -0.13120306468423773, 0.33012364357742, -1.715623492200991, -0.009607333673014263, -0.06976860237201812, -0.40804630402148656))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 26, 37, 39, 41, 43, 50, 55, 57, 59, 60, 62, 64, 66, 68, 1879), values -> List(-0.39701500165494835, 0.020211930374550338, -1.442631875362315, -0.8469712715887546, -2.0183778182229957, -0.13120306468423773, 0.33012364357742, -1.715623492200991, -0.009607333673014263, -0.06976860237201812, -0.40804630402148656, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.9721498116213775, -0.9721498116213775))","Map(vectorType -> dense, length -> 2, values -> List(0.7255477934023233, 0.27445220659767666))",0.0


In [0]:
df_predict.groupBy('label', 'prediction').count().show()

In [0]:
FN = df_predict.filter('prediction = 0 AND label <> prediction')

In [0]:
display(FN)

UNIQUE_ID,FLIGHT_UTC_DATE,WEATHER_UTC_DATE,TIME_OF_DAY,STATION,NAME,MONTH,DAY_OF_WEEK,OP_UNIQUE_CARRIER,TAIL_NUM,ORIGIN,DEST,DEP_DEL15,DEP_DELAY_NEW,ARR_DELAY_NEW,CRS_ELAPSED_TIME,SOURCE,LATITUDE,LONGITUDE,ELEVATION,CALL_SIGN,wnd_dir_angle,wnd_dir_qual,wnd_type,wnd_spd_rate,wnd_spd_qual,wnd_ex,wnd_dir_is_qual,wnd_spd_is_qual,cig_ceil_ht,cig_ceil_qual,cig_ceil_det,cig_cavok,cig_ex,cig_cavok_bool,cig_ceil_is_qual,vis_dist,vis_dist_qual,vis_dist_var,vis_dist_qual_var,vis_ex,vis_dist_var_bool,vis_dist_is_qual,vis_dist_is_qual_var,tmp_air,tmp_air_qual,tmp_ex,tmp_air_is_qual,dew_pnt_tmp,dew_pnt_qual,dew_ex,dew_pnt_is_qual,slp_prs,slp_prs_qual,slp_ex,slp_prs_is_qual,aa1_prd_quant_hr,aa1_dp,aa1_cond,aa1_qual,aa1_ex,aa1_is_qual,aj1_dim,aj1_cond,aj1_qual,aj1_eq_wtr_dp,aj1_eq_wtr_cond,aj1_eq_wtr_cond_qual,aj1_ex,aj1_is_qual,aj1_eq_wtr_cond_is_qual,ga1_cov,ga1_cov_qual,ga1_bs_ht,ga1_bs_ht_qual,ga1_cld,ga1_cld_qual,ga1_ex,ga1_cov_is_qual,ga1_bs_ht_is_qual,ka1_prd_quant,ka1_code,ka1_temp,ka1_temp_qual,ka1_ex,ka1_temp_is_qual,at1_src_elem,at1_wthr,at1_wthr_abrv,at1_qual,at1_ex,at1_is_qual,ax1_atm,ax1_qual,ax1_prd_quant,ax1_prd_qual,ax1_ex,ax1_is_qual,ax1_prd_is_qual,FLIGHT_ROUTE,rank,TIME_OF_DAY_idx,MONTH_idx,DAY_OF_WEEK_idx,OP_UNIQUE_CARRIER_idx,wnd_type_idx,cig_ceil_is_qual_idx,tmp_air_is_qual_idx,slp_prs_is_qual_idx,ga1_cov_idx,ga1_cld_idx,ga1_bs_ht_is_qual_idx,wnd_spd_is_qual_idx,ga1_cld_qual_idx,dew_pnt_is_qual_idx,ga1_cov_is_qual_idx,aa1_is_qual_idx,vis_dist_is_qual_idx,ka1_temp_idx,FLIGHT_ROUTE_idx,TIME_OF_DAY_OHE,MONTH_OHE,DAY_OF_WEEK_OHE,OP_UNIQUE_CARRIER_OHE,wnd_type_OHE,cig_ceil_is_qual_OHE,tmp_air_is_qual_OHE,slp_prs_is_qual_OHE,ga1_cov_OHE,ga1_cld_OHE,ga1_bs_ht_is_qual_OHE,wnd_spd_is_qual_OHE,ga1_cld_qual_OHE,dew_pnt_is_qual_OHE,ga1_cov_is_qual_OHE,aa1_is_qual_OHE,vis_dist_is_qual_OHE,ka1_temp_OHE,FLIGHT_ROUTE_OHE,cat_features,label,scale_nums,scaledFeatures,features,rawPrediction,probability,prediction
US-1931-2015-01-26 09:25:00,2015-01-26T14:25:00.000+0000,2015-01-26T11:52:00.000+0000,Morning,72314013881,"CHARLOTTE DOUGLAS AIRPORT, NC US",JAN,MONDAY,US,N754UW,CLT,MEM,1,15,7,112,7,35.2236,-80.9552,221.9,KCLT,150,5,N,21,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,44,5,1,1,0,5,1,1,10037,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,4,5,2591,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,CLT-MEM,0.8334109461210216,0.0,0.0,2.0,6.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1917.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(6), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(3), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(1917), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 18, 25, 28, 30, 32, 37, 44, 46, 48, 49, 51, 53, 55, 58, 1977), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(221.9, 150.0, 21.0, 22000.0, 16093.0, 44.0, 0.0, 10037.0, 1.0, 0.0, 2591.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.11530733610254852, -0.4848676957561217, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, 0.009751321085943588, 0.12913890403886272, 0.5394009378440807, -0.010108098709965605, -0.16631886394289552, -0.32458724676103007))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 29, 36, 39, 41, 43, 48, 55, 57, 59, 60, 62, 64, 66, 69, 1988), values -> List(-0.11530733610254852, -0.4848676957561217, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, 0.009751321085943588, 0.12913890403886272, 0.5394009378440807, -0.010108098709965605, -0.16631886394289552, -0.32458724676103007, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(1.4642485479259433, -1.4642485479259433))","Map(vectorType -> dense, length -> 2, values -> List(0.8121816192195411, 0.18781838078045887))",0.0
MQ-2967-2015-01-26 08:25:00,2015-01-26T14:25:00.000+0000,2015-01-26T11:53:00.000+0000,Morning,72259003927,"DAL FTW WSCMO AIRPORT, TX US",JAN,MONDAY,MQ,N923MQ,DFW,SAV,1,18,23,133,7,32.8978,-97.0189,170.7,KDFW,10,5,N,21,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,28,5,1,1,-33,5,1,1,10223,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,0,5,99999,9,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,DFW-SAV,0.8334109461210216,0.0,0.0,2.0,7.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1600.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(7), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(1600), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 19, 25, 28, 30, 32, 38, 44, 46, 48, 49, 51, 53, 55, 58, 1660), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(170.7, 10.0, 21.0, 22000.0, 16093.0, 28.0, -33.0, 10223.0, 1.0, 0.0, 99999.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.23474977002909184, -1.863816065454715, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, -0.1527737517331995, -0.20160550534602295, 0.5812284792363897, -0.010108098709965605, -0.16631886394289552, 2.878951261840257))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 30, 36, 39, 41, 43, 49, 55, 57, 59, 60, 62, 64, 66, 69, 1671), values -> List(-0.23474977002909184, -1.863816065454715, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, -0.1527737517331995, -0.20160550534602295, 0.5812284792363897, -0.010108098709965605, -0.16631886394289552, 2.878951261840257, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.9581793527115964, -0.9581793527115964))","Map(vectorType -> dense, length -> 2, values -> List(0.7227571331665881, 0.2772428668334119))",0.0
AA-1667-2015-01-26 08:25:00,2015-01-26T14:25:00.000+0000,2015-01-26T11:53:00.000+0000,Morning,72259003927,"DAL FTW WSCMO AIRPORT, TX US",JAN,MONDAY,AA,N424AA,DFW,MSY,1,22,17,80,7,32.8978,-97.0189,170.7,KDFW,10,5,N,21,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,28,5,1,1,-33,5,1,1,10223,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,0,5,99999,9,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,DFW-MSY,0.8334109461210216,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,389.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(389), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 16, 25, 28, 30, 32, 38, 44, 46, 48, 49, 51, 53, 55, 58, 449), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(170.7, 10.0, 21.0, 22000.0, 16093.0, 28.0, -33.0, 10223.0, 1.0, 0.0, 99999.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.23474977002909184, -1.863816065454715, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, -0.1527737517331995, -0.20160550534602295, 0.5812284792363897, -0.010108098709965605, -0.16631886394289552, 2.878951261840257))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 27, 36, 39, 41, 43, 49, 55, 57, 59, 60, 62, 64, 66, 69, 460), values -> List(-0.23474977002909184, -1.863816065454715, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, -0.1527737517331995, -0.20160550534602295, 0.5812284792363897, -0.010108098709965605, -0.16631886394289552, 2.878951261840257, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(1.4539338969440887, -1.4539338969440887))","Map(vectorType -> dense, length -> 2, values -> List(0.8106031263406086, 0.1893968736593914))",0.0
WN-452-2015-01-26 08:25:00,2015-01-26T14:25:00.000+0000,2015-01-26T11:51:00.000+0000,Morning,72253012921,"SAN ANTONIO INTERNATIONAL AIRPORT, TX US",JAN,MONDAY,WN,N7705A,SAT,BWI,1,27,49,180,7,29.5443,-98.4839,240.5,KSAT,320,5,N,26,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,44,5,1,1,6,5,1,1,10232,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,4,5,7620,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,SAT-BWI,0.8334109461210216,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2593.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(3), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(2593), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 12, 25, 28, 30, 32, 37, 44, 46, 48, 49, 51, 53, 55, 58, 2653), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(240.5, 320.0, 26.0, 22000.0, 16093.0, 44.0, 6.0, 10232.0, 1.0, 0.0, 7620.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.07191613940267148, 1.1895696103064557, -0.45920347598269057, 1.3673899145858197, 0.7100846408177613, 0.009751321085943588, 0.18927425119975105, 0.5832523925295658, -0.010108098709965605, -0.16631886394289552, -0.15919431024908165))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 23, 36, 39, 41, 43, 48, 55, 57, 59, 60, 62, 64, 66, 69, 2664), values -> List(-0.07191613940267148, 1.1895696103064557, -0.45920347598269057, 1.3673899145858197, 0.7100846408177613, 0.009751321085943588, 0.18927425119975105, 0.5832523925295658, -0.010108098709965605, -0.16631886394289552, -0.15919431024908165, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.4964667053971591, -0.4964667053971591))","Map(vectorType -> dense, length -> 2, values -> List(0.6216286352885242, 0.3783713647114758))",0.0
DL-2467-2015-01-26 09:30:00,2015-01-26T14:30:00.000+0000,2015-01-26T11:52:00.000+0000,Morning,72219013874,"ATLANTA HARTSFIELD INTERNATIONAL AIRPORT, GA US",JAN,MONDAY,DL,N950DN,ATL,DAY,1,16,12,96,7,33.6301,-84.4418,307.8,KATL,290,5,N,62,5,1,1,1,518,5,M,N,1,1,1,16093,5,N,5,1,1,1,1,67,5,1,1,39,5,1,1,10082,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,7,5,518,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,ATL-DAY,0.8335377841398117,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1251.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(1251), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 13, 25, 28, 30, 32, 35, 44, 46, 48, 49, 51, 53, 55, 58, 1311), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(307.8, 290.0, 62.0, 518.0, 16093.0, 67.0, 39.0, 10082.0, 1.0, 0.0, 518.0))","Map(vectorType -> dense, length -> 11, values -> List(0.08508534112967937, 0.8940806739424715, 0.9046337354158529, -0.7981046385041479, 0.7100846408177613, 0.24338111326346176, 0.5200186605846367, 0.5495205043099619, -0.010108098709965605, -0.16631886394289552, -0.39276373460936354))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 24, 36, 39, 41, 43, 46, 55, 57, 59, 60, 62, 64, 66, 69, 1322), values -> List(0.08508534112967937, 0.8940806739424715, 0.9046337354158529, -0.7981046385041479, 0.7100846408177613, 0.24338111326346176, 0.5200186605846367, 0.5495205043099619, -0.010108098709965605, -0.16631886394289552, -0.39276373460936354, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(1.9662276112130979, -1.9662276112130979))","Map(vectorType -> dense, length -> 2, values -> List(0.877205343938328, 0.12279465606167195))",0.0
US-413-2015-01-26 09:30:00,2015-01-26T14:30:00.000+0000,2015-01-26T11:52:00.000+0000,Morning,72314013881,"CHARLOTTE DOUGLAS AIRPORT, NC US",JAN,MONDAY,US,N558UW,CLT,ATL,1,19,8,77,7,35.2236,-80.9552,221.9,KCLT,150,5,N,21,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,44,5,1,1,0,5,1,1,10037,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,4,5,2591,5,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,CLT-ATL,0.8335377841398117,0.0,0.0,2.0,6.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,47.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(6), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(3), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(47), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 18, 25, 28, 30, 32, 37, 44, 46, 48, 49, 51, 53, 55, 58, 107), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(221.9, 150.0, 21.0, 22000.0, 16093.0, 44.0, 0.0, 10037.0, 1.0, 0.0, 2591.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.11530733610254852, -0.4848676957561217, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, 0.009751321085943588, 0.12913890403886272, 0.5394009378440807, -0.010108098709965605, -0.16631886394289552, -0.32458724676103007))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 29, 36, 39, 41, 43, 48, 55, 57, 59, 60, 62, 64, 66, 69, 118), values -> List(-0.11530733610254852, -0.4848676957561217, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, 0.009751321085943588, 0.12913890403886272, 0.5394009378440807, -0.010108098709965605, -0.16631886394289552, -0.32458724676103007, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(1.411405949310274, -1.411405949310274))","Map(vectorType -> dense, length -> 2, values -> List(0.8039876040455779, 0.1960123959544221))",0.0
DL-442-2015-01-26 09:30:00,2015-01-26T14:30:00.000+0000,2015-01-26T11:51:00.000+0000,Morning,74486094789,"JFK INTERNATIONAL AIRPORT, NY US",JAN,MONDAY,DL,N702TW,JFK,SLC,1,45,38,334,7,40.6386,-73.7622,3.4,KJFK,50,5,N,62,5,1,1,1,1006,5,M,N,1,1,1,14484,5,N,5,1,1,1,1,-28,5,1,1,-117,5,1,1,10164,5,1,1,1,0,2.0,5.0,1,1,,,,,,,1,1,1,8,5,1006,5,,1,1,1,1,60.0,M,-1,1.0,1,1,,,,,1,1,,,,,1,1,1,JFK-SLC,0.8335377841398117,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,948.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(948), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 13, 25, 28, 30, 32, 36, 44, 46, 48, 49, 51, 53, 55, 59, 1008), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(3.4, 50.0, 62.0, 1006.0, 14484.0, -28.0, -117.0, 10164.0, 1.0, 0.0, 1006.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.6250372543242225, -1.4698308169694025, 0.9046337354158529, -0.7489117634502468, 0.42998690095205516, -0.7216115066002003, -1.0435003655984594, 0.5679606032033454, -0.010108098709965605, -0.16631886394289552, -0.37671446974202794))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 24, 36, 39, 41, 43, 47, 55, 57, 59, 60, 62, 64, 66, 70, 1019), values -> List(-0.6250372543242225, -1.4698308169694025, 0.9046337354158529, -0.7489117634502468, 0.42998690095205516, -0.7216115066002003, -1.0435003655984594, 0.5679606032033454, -0.010108098709965605, -0.16631886394289552, -0.37671446974202794, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.028018145594610046, -0.028018145594610046))","Map(vectorType -> dense, length -> 2, values -> List(0.5070040782115771, 0.4929959217884229))",0.0
DL-422-2015-01-26 09:30:00,2015-01-26T14:30:00.000+0000,2015-01-26T11:51:00.000+0000,Morning,74486094789,"JFK INTERNATIONAL AIRPORT, NY US",JAN,MONDAY,DL,N712TW,JFK,LAX,1,46,19,395,7,40.6386,-73.7622,3.4,KJFK,50,5,N,62,5,1,1,1,1006,5,M,N,1,1,1,14484,5,N,5,1,1,1,1,-28,5,1,1,-117,5,1,1,10164,5,1,1,1,0,2.0,5.0,1,1,,,,,,,1,1,1,8,5,1006,5,,1,1,1,1,60.0,M,-1,1.0,1,1,,,,,1,1,,,,,1,1,1,JFK-LAX,0.8335377841398117,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 13, 25, 28, 30, 32, 36, 44, 46, 48, 49, 51, 53, 55, 59, 60), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(3.4, 50.0, 62.0, 1006.0, 14484.0, -28.0, -117.0, 10164.0, 1.0, 0.0, 1006.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.6250372543242225, -1.4698308169694025, 0.9046337354158529, -0.7489117634502468, 0.42998690095205516, -0.7216115066002003, -1.0435003655984594, 0.5679606032033454, -0.010108098709965605, -0.16631886394289552, -0.37671446974202794))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 24, 36, 39, 41, 43, 47, 55, 57, 59, 60, 62, 64, 66, 70, 71), values -> List(-0.6250372543242225, -1.4698308169694025, 0.9046337354158529, -0.7489117634502468, 0.42998690095205516, -0.7216115066002003, -1.0435003655984594, 0.5679606032033454, -0.010108098709965605, -0.16631886394289552, -0.37671446974202794, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.8321702657273431, -0.8321702657273431))","Map(vectorType -> dense, length -> 2, values -> List(0.6968136254499286, 0.30318637455007136))",0.0
US-1809-2015-01-26 09:30:00,2015-01-26T14:30:00.000+0000,2015-01-26T11:54:00.000+0000,Morning,72509014739,"BOSTON, MA US",JAN,MONDAY,US,N956UW,BOS,PHL,1,54,47,96,7,42.3606,-71.0097,3.7,KBOS,330,5,N,41,5,1,1,1,3962,5,M,N,1,1,1,16093,5,N,5,1,1,1,1,-117,5,1,1,-189,5,1,1,10208,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,2,5,2134,5,,1,1,1,1,60.0,M,-1,1.0,1,1,,,,,1,1,,,,,1,1,1,BOS-PHL,0.8335377841398117,0.0,0.0,2.0,6.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,119.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(6), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(119), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 18, 25, 28, 30, 32, 34, 44, 46, 48, 49, 51, 53, 55, 59, 179), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(3.7, 330.0, 41.0, 3962.0, 16093.0, -117.0, -189.0, 10208.0, 1.0, 0.0, 2134.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.6243373963129342, 1.288065922427784, 0.10906202876670255, -0.4509319711155506, 0.7100846408177613, -1.6256572241566838, -1.765124531529119, 0.5778552904144293, -0.010108098709965605, -0.16631886394289552, -0.33961698865523576))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 29, 36, 39, 41, 43, 45, 55, 57, 59, 60, 62, 64, 66, 70, 190), values -> List(-0.6243373963129342, 1.288065922427784, 0.10906202876670255, -0.4509319711155506, 0.7100846408177613, -1.6256572241566838, -1.765124531529119, 0.5778552904144293, -0.010108098709965605, -0.16631886394289552, -0.33961698865523576, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(0.22341376903977217, -0.22341376903977217))","Map(vectorType -> dense, length -> 2, values -> List(0.5556222749796159, 0.4443777250203841))",0.0
AA-2385-2015-01-26 08:30:00,2015-01-26T14:30:00.000+0000,2015-01-26T11:53:00.000+0000,Morning,72259003927,"DAL FTW WSCMO AIRPORT, TX US",JAN,MONDAY,AA,N548AA,DFW,JAX,1,62,60,130,7,32.8978,-97.0189,170.7,KDFW,10,5,N,21,5,1,1,1,22000,5,,N,1,1,1,16093,5,N,5,1,1,1,1,28,5,1,1,-33,5,1,1,10223,5,1,1,1,0,,5.0,1,1,,,,,,,1,1,1,0,5,99999,9,,1,1,1,1,60.0,M,1,1.0,1,1,,,,,1,1,,,,,1,1,1,DFW-JAX,0.8335377841398117,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1268.0,"Map(vectorType -> sparse, length -> 4, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(2), values -> List(1.0))","Map(vectorType -> sparse, length -> 13, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 7, indices -> List(4), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(), values -> List())","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 1, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 2, indices -> List(0), values -> List(1.0))","Map(vectorType -> sparse, length -> 3, indices -> List(1), values -> List(1.0))","Map(vectorType -> sparse, length -> 3870, indices -> List(1268), values -> List(1.0))","Map(vectorType -> sparse, length -> 3930, indices -> List(0, 4, 7, 16, 25, 28, 30, 32, 38, 44, 46, 48, 49, 51, 53, 55, 58, 1328), values -> List(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))",1.0,"Map(vectorType -> dense, length -> 11, values -> List(170.7, 10.0, 21.0, 22000.0, 16093.0, 28.0, -33.0, 10223.0, 1.0, 0.0, 99999.0))","Map(vectorType -> dense, length -> 11, values -> List(-0.23474977002909184, -1.863816065454715, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, -0.1527737517331995, -0.20160550534602295, 0.5812284792363897, -0.010108098709965605, -0.16631886394289552, 2.878951261840257))","Map(vectorType -> sparse, length -> 3941, indices -> List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 18, 27, 36, 39, 41, 43, 49, 55, 57, 59, 60, 62, 64, 66, 69, 1339), values -> List(-0.23474977002909184, -1.863816065454715, -0.6486253108991549, 1.3673899145858197, 0.7100846408177613, -0.1527737517331995, -0.20160550534602295, 0.5812284792363897, -0.010108098709965605, -0.16631886394289552, 2.878951261840257, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))","Map(vectorType -> dense, length -> 2, values -> List(1.2072026254195873, -1.2072026254195873))","Map(vectorType -> dense, length -> 2, values -> List(0.7698036116303606, 0.2301963883696394))",0.0


##Decision Tree Pipeline

In [0]:
Eval_dt, metric_dt, best_model_dt = custom_CV(train_test_window, pre_pipe, dt_model, 'down', 5)

In [0]:
Eval_dt

Unnamed: 0,K-Fold,F_0.5 Score,Recall,Precision
0,1,0.266504,0.657183,0.232021
1,2,0.311281,0.653145,0.275262
2,3,0.317414,0.67554,0.280269
3,4,0.301232,0.659018,0.265233
4,5,0.321497,0.689491,0.28365


##Random Forest Pipeline

In [0]:
Eval_rf, metric_rf, best_model_rf = custom_CV(train_test_window, pre_pipe, rf_model, 'down', 5)

In [0]:
Eval_rf

Unnamed: 0,K-Fold,F_0.5 Score,Recall,Precision
0,1,0.262586,0.617124,0.229608
1,2,0.304598,0.652695,0.268764
2,3,0.313122,0.642113,0.277569
3,4,0.302325,0.594569,0.26924
4,5,0.319489,0.69461,0.281485


##Test on unseen data

In [0]:
pre_pipeline = pre_pipe.fit(df_model)
transform_2019 = pre_pipeline.transform(df_2019)

In [0]:
lr_pred_2019 = best_model_lr.transform(transform_2019)

In [0]:
#calcuate f Score
lr_evaluatorf_5 = MulticlassClassificationEvaluator(metricName='fMeasureByLabel', metricLabel=1, beta=0.5)
lr_f_5 = lr_evaluatorf_5.evaluate(lr_pred_2019)

lr_evaluator_recall = MulticlassClassificationEvaluator(metricName='recallByLabel', metricLabel=1)
lr_recall = lr_evaluator_recall.evaluate(lr_pred_2019)

lr_evaluator_precision = MulticlassClassificationEvaluator(metricName='precisionByLabel', metricLabel=1)
lr_precision = lr_evaluator_precision.evaluate(lr_pred_2019)

print('F Score: {:3f}\nRecall: {:3f}\nPrecision  {:3f}'.format(lr_f_5, lr_recall, lr_precision))

In [0]:
dt_pred_2019 = best_model_dt.transform(transform_2019)

In [0]:
#calcuate f Score
dt_evaluatorf_5 = MulticlassClassificationEvaluator(metricName='fMeasureByLabel', metricLabel=1, beta=0.5)
dt_f_5 = dt_evaluatorf_5.evaluate(dt_pred_2019)

dt_evaluator_recall = MulticlassClassificationEvaluator(metricName='recallByLabel', metricLabel=1)
dt_recall = dt_evaluator_recall.evaluate(dt_pred_2019)

dt_evaluator_precision = MulticlassClassificationEvaluator(metricName='precisionByLabel', metricLabel=1)
dt_precision = dt_evaluator_precision.evaluate(dt_pred_2019)

print('F Score: {:3f}\nRecall: {:3f}\nPrecision  {:3f}'.format(dt_f_5, dt_recall, dt_precision))

In [0]:
rf_pred_2019 = best_model_rf.transform(transform_2019)

In [0]:
#calcuate f Score
rf_evaluatorf_5 = MulticlassClassificationEvaluator(metricName='fMeasureByLabel', metricLabel=1, beta=0.5)
rf_f_5 = rf_evaluatorf_5.evaluate(rf_pred_2019)

rf_evaluator_recall = MulticlassClassificationEvaluator(metricName='recallByLabel', metricLabel=1)
rf_recall = rf_evaluator_recall.evaluate(rf_pred_2019)

rf_evaluator_precision = MulticlassClassificationEvaluator(metricName='precisionByLabel', metricLabel=1)
rf_precision = rf_evaluator_precision.evaluate(rf_pred_2019)

print('F Score: {:3f}\nRecall: {:3f}\nPrecision  {:3f}'.format(rf_f_5, rf_recall, rf_precision))

#References

 - https://machinelearningmastery.com/k-fold-cross-validation/
 - https://www.analyticsvidhya.com/blog/2019/11/build-machine-learning-pipelines-pyspark/
 - https://medium.com/@junwan01/oversampling-and-undersampling-with-pyspark-5dbc25cdf253