# Data Load

## Procedure

In [1]:
import re
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
runtime_log = []
section_flag = 0
def log_time():
  t = time.time()
  runtime_log.append(t)
  return t
def time_flag(note = 'Process complete!', frum = -2, to = -1, save_flag = False):
  log_time()
  print(f'\n{note} ' +
        f'({np.floor((runtime_log[to] - runtime_log[frum]) / 60)} minutes and {(runtime_log[to] - runtime_log[frum]) % 60} seconds)')
  if save_flag:
    return len(runtime_log) - 1

In [3]:
log_time()
!pip install kaggle
!kaggle datasets download -d dilwong/flightprices
time_flag()

Dataset URL: https://www.kaggle.com/datasets/dilwong/flightprices
License(s): Attribution 4.0 International (CC BY 4.0)
Downloading flightprices.zip to /content
100% 5.51G/5.51G [02:25<00:00, 45.4MB/s]
100% 5.51G/5.51G [02:25<00:00, 40.7MB/s]

Process complete! (2.0 minutes and 35.344008684158325 seconds)


In [4]:
log_time()
!unzip -n flightprices.zip
time_flag()

Archive:  flightprices.zip
  inflating: itineraries.csv         

Process complete! (6.0 minutes and 21.258015871047974 seconds)


In [5]:
log_time()
!pip install pyspark
time_flag()


Process complete! (0.0 minutes and 6.2746641635894775 seconds)


In [6]:
log_time()
from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
from pyspark.sql.functions import *
try:
  sc.stop()
except:
  pass
sc = SparkContext()
sqlContext = SQLContext(sc)
time_flag()




Process complete! (0.0 minutes and 18.824759006500244 seconds)


In [7]:
log_time()
ss = SparkSession.builder.getOrCreate()
time_flag()


Process complete! (0.0 minutes and 0.06793665885925293 seconds)


In [8]:
log_time()
df = sqlContext.read.csv('itineraries.csv', header = True)
time_flag()


Process complete! (0.0 minutes and 28.551556825637817 seconds)


In [9]:
log_time()
df.show()
time_flag()

+--------------------+----------+----------+---------------+------------------+-------------+--------------+-----------+--------------+------------+---------+--------+---------+--------------+-------------------+---------------------------------+------------------------+-------------------------------+----------------------+--------------------------+----------------------------+--------------------+-------------------+----------------------------+-------------------------+----------------+-----------------+
|               legId|searchDate|flightDate|startingAirport|destinationAirport|fareBasisCode|travelDuration|elapsedDays|isBasicEconomy|isRefundable|isNonStop|baseFare|totalFare|seatsRemaining|totalTravelDistance|segmentsDepartureTimeEpochSeconds|segmentsDepartureTimeRaw|segmentsArrivalTimeEpochSeconds|segmentsArrivalTimeRaw|segmentsArrivalAirportCode|segmentsDepartureAirportCode| segmentsAirlineName|segmentsAirlineCode|segmentsEquipmentDescription|segmentsDurationInSeconds|segments

## Summary
(View runtime of procedure following execution)

In [10]:
section_flag = time_flag(note = 'Data Load Complete!', frum = section_flag, save_flag = True)


Data Load Complete! (9.0 minutes and 52.26051378250122 seconds)


# Data Partitioning/Preprocessing

New approach: begin by first grouping by the number of flight legs. Since this implicitly partitions the data by feature space dimension, an intuitive next step would be to train one model per partition. One must be careful to check that the distribution of partition sizes is appropriately balanced when employing this strategy.

## Procedure

In [11]:
log_time()
from pyspark.ml import Pipeline, PipelineModel
from pyspark.ml.tuning import TrainValidationSplit
from pyspark.ml.regression import *
from pyspark.ml.feature import *
time_flag()


Process complete! (0.0 minutes and 0.8046152591705322 seconds)


In [12]:
log_time()
# Count flight legs per entry.
nL = udf(lambda x: len(x.split('||')))
qcols = ['"' + c + '"' for c in df.columns]
flights_w_legs = eval(f"df.select({', '.join(qcols)}, nL('segmentsDistance').cast('int').alias('legs'))")
per_leg_stats = flights_w_legs.groupby('legs').count()
per_leg_stats.show()
time_flag()

+----+--------+
|legs|   count|
+----+--------+
|   1|22066888|
|   3| 7586488|
|   4|  199812|
|   2|52285467|
|   5|      98|
+----+--------+


Process complete! (11.0 minutes and 23.38756775856018 seconds)


In [13]:
log_time()
# Only interested in flights that are not incredibly rare with respect to number
# of legs. Given the extensive bulk of our dataset, we can use a relatively
# generous threshold, e.g., 0.05 * df.count() / (per_leg_stats.count() - 1)

thresh = 0.05 * df.count() / (per_leg_stats.count() - 1)
common_wrt_legs = list(per_leg_stats.where(col('count') > thresh).select('legs').toPandas()['legs'])
time_flag()


Process complete! (26.0 minutes and 3.4636456966400146 seconds)


In [14]:
log_time()
# Train-test split
train_df, test_df = df.randomSplit([0.75, 0.25], 42)
time_flag()


Process complete! (0.0 minutes and 0.06395244598388672 seconds)


In [15]:
log_time()
# Get leg-based partitions for train/test sets...
lbp_train = {}
lbp_test = {}

flights_w_legs_train = eval(f"train_df.select({', '.join(qcols)}, nL('segmentsDistance').cast('int').alias('legs'))")
flights_w_legs_test = eval(f"test_df.select({', '.join(qcols)}, nL('segmentsDistance').cast('int').alias('legs'))")

for i in common_wrt_legs:
  lbp_train[i] = flights_w_legs_train.where(col('legs') == i)
  lbp_test[i] = flights_w_legs_test.where(col('legs') == i)

# Data has now been officially partitioned/filtered!
time_flag()


Process complete! (0.0 minutes and 0.48276281356811523 seconds)


In [16]:
log_time()
# Define abbreviations for the leg-based features.
feat_abbs = dict(zip(['segmentsDepartureTimeRaw', 'segmentsArrivalTimeRaw',
                      'segmentsArrivalAirportCode', 'segmentsDepartureAirportCode',
                     'segmentsAirlineCode', 'segmentsDurationInSeconds', 'segmentsDistance'],
                      ['sDTR_', 'sATR_', 'sAAC_', 'sDAC_', 'sAC_', 'sDIS_', 'sD_']))
feat_others = ['searchDate', 'flightDate', 'isBasicEconomy', 'seatsRemaining']
targets = ['baseFare', 'totalFare']

def leg_breaker_maker(delim, n):
  return udf(lambda x: x.split(delim)[n])

plc_train = {}
plc_test = {}
for i in common_wrt_legs:
  tmp = []
  for j in feat_abbs:
    tmp.append(", ".join([f'leg_breaker_maker("||", {k})(col("{j}").alias(str({k})))' for k in range(i)]))
  plc_train[i] = (eval(f'lbp_train[{i}].select([{", ".join(tmp)}] + feat_others + targets)'))
  plc_train[i] = eval(f'plc_train[{i}].withColumnsRenamed(dict(zip(plc_train[{i}].columns, ' +
                      f'np.concatenate([[feat_abbs[a] + str(k + 1) for k in range({i})] for a in feat_abbs]))))')
  plc_test[i] = (eval(f'lbp_test[{i}].select([{", ".join(tmp)}] + feat_others + targets)'))
  plc_test[i] = eval(f'plc_test[{i}].withColumnsRenamed(dict(zip(plc_test[{i}].columns, ' +
                      f'np.concatenate([[feat_abbs[a] + str(k + 1) for k in range({i})] for a in feat_abbs]))))')
time_flag()


Process complete! (0.0 minutes and 2.614377737045288 seconds)


In [17]:
log_time()
for i in common_wrt_legs:
  plc_train[i].show()
time_flag()

+--------------------+--------------------+------+------+-----+------+----+----------+----------+--------------+--------------+--------+---------+
|              sDTR_1|              sATR_1|sAAC_1|sDAC_1|sAC_1|sDIS_1|sD_1|searchDate|flightDate|isBasicEconomy|seatsRemaining|baseFare|totalFare|
+--------------------+--------------------+------+------+-----+------+----+----------+----------+--------------+--------------+--------+---------+
|2022-05-16T18:08:...|2022-05-16T20:00:...|   JFK|   CLT|   B6|  6720| 545|2022-04-17|2022-05-16|         False|             7|  143.26|   167.11|
|2022-05-05T09:00:...|2022-05-05T10:57:...|   DTW|   ATL|   F9|  7020| 604|2022-04-17|2022-05-05|         False|             4|   70.12|    89.98|
|2022-04-20T08:00:...|2022-04-20T09:34:...|   LGA|   BOS|   DL|  5640| 185|2022-04-16|2022-04-20|         False|             5|  591.63|   650.60|
|2022-04-21T11:00:...|2022-04-21T12:13:...|   CLT|   ATL|   DL|  4380| 228|2022-04-17|2022-04-21|         False|      

In [18]:
log_time()
# Upon preliminary preprocessing, we can make the following observations:

# sAAC, sDAC, sAC, isBasicEconomy are categorical variables--these are all to be
# one-hot encoded.

# sDIS, sD, seatsRemaining are numerical variables--these are only to be cast to
# numerical data types.

# sDTR/sATR, searchDate, flightDate are date varibles--from these, we can
# extract more nuanced features such as times of day (TOD) for
# departures/arrivals, days till flight (DTF) between search and flight dates,
# and days of weeks (DOY) and months (MOY) of flight dates.

rp_train = {}
rp_test = {}
cat_feats = {}
num_feats = {}
date_feats = {}

# Prepare lists of names for one-hot encoding of categorical features.
cat_feats_si = {}
cat_feats_ohe = {}

for i in common_wrt_legs:
  cat_feats[i] = ['isBasicEconomy'] + [c for c in plc_train[i].columns if c.startswith(('sAAC_', 'sDAC_', 'sAC_'))]
  num_feats[i] = ['seatsRemaining'] + [c for c in plc_train[i].columns if c.startswith(('sDIS_', 'sD_'))]
  date_feats[i] = ['searchDate', 'flightDate'] + [c for c in plc_train[i].columns if c.startswith(('sDTR_', 'sATR_'))]

  # Fill lists of names for one-hot encoding of categorical features.
  cat_feats_si[i] = [f + '_si' for f in cat_feats[i]]
  cat_feats_ohe[i] = [f + '_ohe' for f in cat_feats[i]]

  # Cast numerical features to numerical data types, and extract select
  # date-based features (TOD will be forgone due to the ambiguity of timezones
  # in the dataset).
  rp_train[i] = plc_train[i].select(cat_feats[i] +
   [plc_train[i][f].cast('float') for f in num_feats[i] + targets] +
    [datediff('flightDate', 'searchDate').alias('DTF')] +
     [dayofweek('flightDate').alias('DOW')] +
      [month('flightDate').alias('MOY')])
  rp_test[i] = plc_test[i].select(cat_feats[i] +
   [plc_test[i][f].cast('float') for f in num_feats[i] + targets] +
    [datediff('flightDate', 'searchDate').alias('DTF')] +
     [dayofweek('flightDate').alias('DOW')] +
      [month('flightDate').alias('MOY')])

time_flag()


Process complete! (0.0 minutes and 0.8649418354034424 seconds)


## Summary
(View runtime of procedure following execution)

In [19]:
section_flag = time_flag(note = 'Data Partitioning Complete!', frum = section_flag, save_flag = True)


Data Partitioning Complete! (37.0 minutes and 54.93595838546753 seconds)


# Partition Selection
It is recommended to evaluate a small number of partitions (e.g., one or two) at a time due to computational constraints.

## Procedure

In [20]:
# Select whichever partitions are desired for evaluation.
pnos = [1]
fp_train = dict(zip(pnos, [rp_train[p] for p in pnos]))
fp_test = dict(zip(pnos, [rp_test[p] for p in pnos]))

## Summary
(View runtime of procedure following execution)

In [21]:
section_flag = time_flag(note = 'Partition Selection Complete!', frum = section_flag, save_flag = True)


Partition Selection Complete! (0.0 minutes and 0.030663251876831055 seconds)


# Data Pipeline

## Procedure

In [22]:
# Pre-PCA pipeline.
fe_date_feats = ['DTF', 'DOW', 'MOY']

success = True
!mkdir -p PrePCA
pipeline_prefs = {'new': True, 'load_paths': None, 'save_new': True,
                  'save_paths': dict(zip(common_wrt_legs,
                   [f'/content/PrePCA/pipeline_{i}' for i in common_wrt_legs]))}
pre_pipelines = {}
train_fes = {}
test_fes = {}

for i, j in enumerate(fp_train):
  log_time()
  if pipeline_prefs['new']:
    print(f'Fitting pipeline to data partition {i + 1} of {len(fp_train)}...')
    pre_pipelines[j] = Pipeline(stages = [Imputer(strategy = 'median',
                                                  inputCols = num_feats[j],
                                                  outputCols = num_feats[j]),
                                          StringIndexer(inputCols = cat_feats[j],
                                                        outputCols = cat_feats_si[j],
                                                        handleInvalid = 'keep'),
                                          OneHotEncoder(inputCols = cat_feats_si[j],
                                                        outputCols = cat_feats_ohe[j],
                                                        handleInvalid = 'keep'),
                                          VectorAssembler(inputCols = num_feats[j] +
                                                          cat_feats_ohe[j] +
                                                          fe_date_feats,
                                                          outputCol = 'vaf'),
                                          StandardScaler(inputCol = 'vaf',
                                                         outputCol = 'ssf')]).fit(fp_train[j])
    if pipeline_prefs['save_new']:
      pre_pipelines[j].write().overwrite().save(pipeline_prefs['save_paths'][j])
      !tar czf prePCA_part_{j}.tar.gz {pipeline_prefs['save_paths'][j][9:]}
  else:
    try:
      print(f'Retrieving previously fitted pipeline stored in {pipeline_prefs["load_paths"]}...')
      !tar xf prePCA_part_{j}.tar.gz
      pre_pipelines[j] = PipelineModel.load(pipeline_prefs['load_paths'][j])
    except:
      print(f'Failed to retrieve pipeline from {pipeline_prefs["load_paths"]}!')
      success = False
      break

  time_flag()

  print(f'Running data partition {i + 1} of {len(fp_train)} through pipeline...')
  log_time()
  train_fes[j] = pre_pipelines[j].transform(fp_train[j])
  test_fes[j] = pre_pipelines[j].transform(fp_test[j])
  time_flag(note = f'Successfully ran data partition {i + 1} of {len(fp_train)} through pipeline!')

if success:
  cell_flag = time_flag(note = 'ALL DATA PARTITIONS have been run through pipeline!!!',
                        frum = section_flag, save_flag = True)

Fitting pipeline to data partition 1 of 1...

Process complete! (80.0 minutes and 26.226526498794556 seconds)
Running data partition 1 of 1 through pipeline...

Successfully ran data partition 1 of 1 through pipeline! (0.0 minutes and 0.8586592674255371 seconds)

ALL DATA PARTITIONS have been run through pipeline!!! (80.0 minutes and 27.48966693878174 seconds)


In [23]:
# Generate PCA stats.
success = True
!mkdir -p PCA
pca_stats_prefs = {'new': True, 'load_paths': None, 'save_new': True,
                   'save_paths': dict(zip(common_wrt_legs,
                    [f'/content/PCA/stats_{i}.npy' for i in common_wrt_legs]))}
vecLen = udf(lambda x: len(x))
raw_feat_space_dims = {}
pca_stats = {}

for i, j in enumerate(train_fes):
  log_time()
  if pca_stats_prefs['new']:
    print(f'Verifying raw feature space dimension for data partition {i + 1} of {len(train_fes)}...')

    # Get length of assembled vector for evaluation of PCA.
    tmp_ssf_len = train_fes[j].select(vecLen('ssf').cast('float')).distinct().toPandas()
    assert(tmp_ssf_len.size == 1)
    raw_feat_space_dims[j] = list(tmp_ssf_len.iloc[0])[0]
    time_flag(note = 'Raw feature space dimension verified.')

    print('Proceeding to generate PCA stats...')
    log_time()

    # Now evaluate PCA.
    pca_stats[j] = PCA(k = raw_feat_space_dims[j], inputCol = 'ssf',
                       outputCol = 'pcf').fit(train_fes[j]).explainedVariance.values

    if pca_stats_prefs['save_new']:
      np.save(pca_stats_prefs['save_paths'][j], pca_stats[j])
  else:
    try:
      print(f'Retrieving previously generated PCA stats stored in {pca_stats_prefs["load_paths"]}...')
      pca_stats[j] = np.load(pca_stats_prefs['load_paths'][j])
    except:
      print(f'Failed to retrieve PCA stats from {pca_stats_prefs["load_paths"]}!')
      success = False
      break

  time_flag(note = f'PCA stats obtained for {i + 1} / {len(train_fes)} of the data partitions...')

if success:
  cell_flag = time_flag(note = 'PCA stats generated for ALL DATA PARTITIONS!!!',
                        frum = cell_flag, save_flag = True)

Verifying raw feature space dimension for data partition 1 of 1...

Raw feature space dimension verified. (43.0 minutes and 49.59289503097534 seconds)
Proceeding to generate PCA stats...

PCA stats obtained for 1 / 1 of the data partitions... (86.0 minutes and 52.279377698898315 seconds)

PCA stats generated for ALL DATA PARTITIONS!!! (130.0 minutes and 42.163208961486816 seconds)


In [24]:
log_time()
# Impose an arbitrary threshold on the minimum fraction of variance to be
# preserved in the reduced-dimension space, and compute the corresponding
# minimum reduced dimension.
rds_var_thresh = 0.95

pca_ks = {}
for i, j in enumerate(pca_stats):
  pca_ks[j] = np.argmax(np.cumsum(pca_stats[j]) > rds_var_thresh) + 1

cell_flag = time_flag(save_flag = True)


Process complete! (0.0 minutes and 0.008893489837646484 seconds)


In [25]:
# Proceed to perform the appropriate PCA.

success = True
pca_prefs = {'new': True, 'load_paths': None, 'save_new': True,
             'save_paths': dict(zip(common_wrt_legs,
              [f'/content/PCA/pipeline_{i}' for i in common_wrt_legs]))}
pca_mods = {}
train_pca = {}
test_pca = {}
for i, j in enumerate(pca_ks):
  log_time()
  if pca_prefs['new']:
    print(f'Generating PCA model for data partition {i + 1} of {len(pca_ks)}...')
    pca_mods[j] = PCA(k = pca_ks[j], inputCol = 'ssf', outputCol = 'pcf').fit(train_fes[j])
    if pca_prefs['save_new']:
      pca_mods[j].save(pca_prefs['save_paths'][j])
      !tar czf PCA_part_{j}.tar.gz {pca_prefs['save_paths'][j][9:]}
  else:
    try:
      print(f'Retrieving previously trained PCA model stored in {pca_prefs["load_paths"]}...')
      !tar xf PCA_part_{j}.tar.gz
      pca_mods[j] = PCAModel.load(pca_prefs['load_paths'][j])
    except:
      print(f'Failed to retrieve PCA model from {pca_prefs["load_paths"]}!')
      success = False
      break
  time_flag()

  print(f'Proceeding to reduce dimension associated with data partition {i + 1} of {len(pca_ks)}...')

  log_time()
  train_pca[j] = pca_mods[j].transform(train_fes[j])
  test_pca[j] = pca_mods[j].transform(test_fes[j])
  time_flag(f'Successfully reduced dimension associated with data partition {i + 1} of {len(pca_ks)}!')

if success:
  time_flag(note = 'PCA performed on ALL DATA PARTITIONS!!!', frum = cell_flag)

Generating PCA model for data partition 1 of 1...

Process complete! (85.0 minutes and 50.04234719276428 seconds)
Proceeding to reduce dimension associated with data partition 1 of 1...

Successfully reduced dimension associated with data partition 1 of 1! (0.0 minutes and 0.15296530723571777 seconds)

PCA performed on ALL DATA PARTITIONS!!! (85.0 minutes and 50.28396487236023 seconds)


## Summary
(View runtime of procedure following execution)

In [26]:
section_flag = time_flag(note = 'Data Pipeline Fitting Complete!', frum = section_flag, save_flag = True)


Data Pipeline Fitting Complete! (297.0 minutes and 0.060181617736816406 seconds)


# Model Training

## Procedure

In [39]:
log_time()
# Choose target: either baseFare or totalFare
target_choice = 'baseFare'
time_flag()


Process complete! (0.0 minutes and 8.916854858398438e-05 seconds)


In [42]:
from xgboost.spark import *

In [45]:
# Train using a PySpark-compatible model class of choice.
success = True
mod_prefs = {'new': True, 'load_paths': None, 'save_new': False, 'save_paths': None}
mod_class = {'name': 'XGB Regressor', 'class': SparkXGBRegressor, 'mod': SparkXGBRegressorModel}
mods = {}
train_res = {}
test_res = {}
for i, j in enumerate(train_pca):
  log_time()
  if mod_prefs['new']:
    print(f'Training a {mod_class["name"]} on data partition {i + 1} of {len(train_pca)}...')
    mods[j] = mod_class['class'](features_col = 'pcf', label_col = target_choice).fit(train_pca[j])
    if mod_prefs['save_new']:
      mods[j].save(mod_prefs['save_paths'][j])
      !tar czf Models_part_{j}.tar.gz {mod_prefs['save_paths'][j][9:]}
  else:
    try:
      print(f'Retrieving previously trained model stored in {mod_prefs["load_paths"]}...')
      !tar xf Models_part_{j}.tar.gz
      mods[j] = mod_class['mod'].load(mod_prefs['load_paths'][j])
    except:
      print(f'Failed to retrieve model from {mod_prefs["load_paths"]}!')
      success = False
      break
  time_flag()

  print(f'Proceeding to generate predictions for data partition {i + 1} of {len(train_pca)}...')

  log_time()
  train_res[j] = mods[j].transform(train_pca[j])
  test_res[j] = mods[j].transform(test_pca[j])
  time_flag(note = f'Predictions generated for data partition {i + 1} of {len(train_pca)}...')

if success:
  cell_flag = time_flag(note = 'Model training/prediction generation complete for ALL DATA PARTITIONS!!!',
                        frum = section_flag, save_flag = True)

Training a XGB Regressor on data partition 1 of 1...


INFO:XGBoost-PySpark:Running xgboost-2.1.4 on 1 workers with
	booster params: {'objective': 'reg:squarederror', 'device': 'cpu', 'nthread': 1}
	train_call_kwargs_params: {'verbose_eval': True, 'num_boost_round': 100}
	dmatrix_kwargs: {'nthread': 1, 'missing': nan}
INFO:XGBoost-PySpark:Finished xgboost training!



Process complete! (41.0 minutes and 36.56788372993469 seconds)
Proceeding to generate predictions for data partition 1 of 1...

Predictions generated for data partition 1 of 1... (0.0 minutes and 1.2910025119781494 seconds)

Model training/prediction generation complete for ALL DATA PARTITIONS!!! (65.0 minutes and 27.712190866470337 seconds)


In [46]:
# Retrieve/assess results.

train_SE = {}
test_SE = {}

for i, j in enumerate(pnos):
  print(f'Evaluating model associated with data partition {i + 1} of {len(pnos)}...')
  log_time()

  train_SE[j] = train_res[j].select(((train_res[j][target_choice] - train_res[j]['prediction']) ** 2).alias('SE'))
  train_SE[j].describe().show()

  test_SE[j] = test_res[j].select(((test_res[j][target_choice] - test_res[j]['prediction']) ** 2).alias('SE'))
  test_SE[j].describe().show()

  time_flag()

time_flag(note = 'Model evaluation complete for ALL DATA PARTITIONS!!!', frum = cell_flag)

Evaluating model associated with data partition 1 of 1...
+-------+-------------------+
|summary|                 SE|
+-------+-------------------+
|  count|           16551316|
|   mean|  9571.781258383819|
| stddev| 114263.15222369383|
|    min|                0.0|
|    max|1.783867559357557E7|
+-------+-------------------+

+-------+--------------------+
|summary|                  SE|
+-------+--------------------+
|  count|             5515572|
|   mean|   9612.353169563714|
| stddev|  113724.72872101783|
|    min|1.455191522836685...|
|    max| 1.814538582437755E7|
+-------+--------------------+


Process complete! (57.0 minutes and 18.67183518409729 seconds)

Model evaluation complete for ALL DATA PARTITIONS!!! (57.0 minutes and 34.15929555892944 seconds)


## Summary
(View runtime of procedure following execution)

In [47]:
section_flag = time_flag(note = 'Model Training Complete!', frum = section_flag, save_flag = True)


Model Training Complete! (123.0 minutes and 1.9470362663269043 seconds)


# Notebook Summary: Total Runtime

In [48]:
time_flag(note = 'ENTIRE NOTEBOOK EXECUTION COMPLETE!!!!!', frum = 0)


ENTIRE NOTEBOOK EXECUTION COMPLETE!!!!! (687.0 minutes and 44.43928790092468 seconds)


In [None]:
time.sleep(12 * 60 * 60)