In [5]:
import plotly.express as px
import plotly.graph_objs as go
import pandas as pd
import numpy as np
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
import plotly.offline as py


In [6]:
spark = SparkSession.builder.appName("flights").getOrCreate()
import json
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, DateType, TimestampType
with open("../matrix/schema.json","r") as f:
    schema = StructType.fromJson(json.load(f))

In [7]:
df = spark.read.csv("../../data.nosync/cleaned/cleaned_flights.csv",schema=schema, header=True)

In [10]:
# create a new column 'label' that is 1 if the flight is delayed and 0 if it is not
df = df.withColumn("label", when(df["ArrDelay"] > 0, 1).otherwise(0))

In [14]:
features = [
 'Quarter',
 'Month',
 'DayofMonth',
 'DayOfWeek',
 'Reporting_Airline',
 'Origin',
 'Dest',
 'DepDelay',
 'CRSDepTime',
 'CRSArrTime',
 'CRSElapsedTime',
 'AirTime',
 'Distance',
 'ORIGIN_STATE',
 'DEST_STATE',
 'label'
 ]


# mantain only the features in features list
df = df.select(features)

In [16]:
df.show(10)

+-------+-----+----------+---------+-----------------+------+----+--------+----------+----------+--------------+-------+--------+------------+----------+-----+
|Quarter|Month|DayofMonth|DayOfWeek|Reporting_Airline|Origin|Dest|DepDelay|CRSDepTime|CRSArrTime|CRSElapsedTime|AirTime|Distance|ORIGIN_STATE|DEST_STATE|label|
+-------+-----+----------+---------+-----------------+------+----+--------+----------+----------+--------------+-------+--------+------------+----------+-----+
|      4|   10|         4|        5|               9E|   ATL| OMA|     0.0|       855|      1025|         150.0|  124.0|   821.0|          GA|        NE|    0|
|      4|   10|        26|        6|               9E|   IAH| MSP|    -8.0|       730|      1021|         171.0|  142.0|  1034.0|          TX|        MN|    0|
|      4|   10|        12|        6|               9E|   LGA| CLT|   104.0|      1625|      1839|         134.0|   81.0|   544.0|          NY|        NC|    1|
|      4|   10|         4|        5|    

In [17]:
from pyspark.ml import Pipeline
from pyspark.ml.feature import StringIndexer, VectorAssembler
from pyspark.ml.regression import RandomForestRegressor
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.ml.feature import VectorIndexer
from pyspark.ml.feature import OneHotEncoder

In [18]:
df.dtypes

[('Quarter', 'int'),
 ('Month', 'int'),
 ('DayofMonth', 'int'),
 ('DayOfWeek', 'int'),
 ('Reporting_Airline', 'string'),
 ('Origin', 'string'),
 ('Dest', 'string'),
 ('DepDelay', 'double'),
 ('CRSDepTime', 'int'),
 ('CRSArrTime', 'int'),
 ('CRSElapsedTime', 'double'),
 ('AirTime', 'double'),
 ('Distance', 'double'),
 ('ORIGIN_STATE', 'string'),
 ('DEST_STATE', 'string'),
 ('label', 'int')]

In [19]:
from pyspark.ml.feature import OneHotEncoder, StringIndexer

categoricalCols = [field for (field, dataType) in df.dtypes
if dataType == "string"]

indexOutputCols = [x + "Index" for x in categoricalCols]
oheOutputCols = [x + "OHE" for x in categoricalCols]

stringIndexer = StringIndexer(inputCols=categoricalCols,
                                outputCols=indexOutputCols,
                                handleInvalid="skip")
oheEncoder = OneHotEncoder(inputCols=indexOutputCols,
                            outputCols=oheOutputCols)

numericCols = [field for (field, dataType) in df.dtypes
                    if ((dataType == "double" or dataType == "int" ) & (field != "label"))]

assemblerInputs = oheOutputCols + numericCols

vecAssembler = VectorAssembler(inputCols=assemblerInputs,
                        outputCol="features")


In [21]:
# create the pipeline
pipeline = Pipeline(stages=[stringIndexer, oheEncoder, vecAssembler])

# fit the pipeline to the data
pipelineModel = pipeline.fit(df)

# transform the data
df_proc = pipelineModel.transform(df)

                                                                                

In [22]:
# select the features and label columns
df_proc = df_proc.select("features","label")


In [23]:
# show the first 5 rows
df_proc.show(5)


+--------------------+-----+
|            features|label|
+--------------------+-----+
|(762,[8,15,389,64...|    0|
|(762,[8,20,340,64...|    0|
|(762,[8,33,339,64...|    1|
|(762,[8,75,330,67...|    1|
|(762,[8,94,330,66...|    0|
+--------------------+-----+
only showing top 5 rows



In [24]:
# count the number of rows with label 1
df_proc.filter(df_proc.label == 1).count()

                                                                                

2501251

In [25]:
# count the number of rows with label 0
df_proc.filter(df_proc.label == 0).count()

                                                                                

3745488

In [26]:
sample_rate = 2501251/3745488
sample_rate

0.6678037681605175

In [28]:
# split the data into train and test
train, test = df_proc.randomSplit([0.8, 0.2], seed=42)


In [29]:

train_1 = train.filter(train.label == 1)


train_0 = train.filter(train.label == 0).sample(False, sample_rate, seed=42)


# merge the two datasets
train = train_1.union(train_0)

In [30]:
# count the number of rows with label 1
train.filter(train.label == 1).count()

                                                                                

2001500

In [31]:
# count the number of rows with label 0
train.filter(train.label == 0).count()

                                                                                

2002517

In [32]:
# create the model
rf = RandomForestClassifier(featuresCol="features", labelCol="label")


In [33]:
# train the model
model = rf.fit(train)

[Stage 27:>                                                        (0 + 8) / 36]

22/12/26 21:30:43 WARN MemoryStore: Not enough space to cache rdd_102_5 in memory! (computed 13.7 MiB so far)
22/12/26 21:30:43 WARN MemoryStore: Not enough space to cache rdd_102_3 in memory! (computed 8.7 MiB so far)
22/12/26 21:30:43 WARN MemoryStore: Not enough space to cache rdd_102_7 in memory! (computed 21.0 MiB so far)
22/12/26 21:30:43 WARN MemoryStore: Not enough space to cache rdd_102_2 in memory! (computed 21.0 MiB so far)
22/12/26 21:30:43 WARN BlockManager: Persisting block rdd_102_3 to disk instead.
22/12/26 21:30:43 WARN BlockManager: Persisting block rdd_102_5 to disk instead.
22/12/26 21:30:43 WARN BlockManager: Persisting block rdd_102_7 to disk instead.
22/12/26 21:30:43 WARN BlockManager: Persisting block rdd_102_2 to disk instead.
22/12/26 21:30:43 WARN MemoryStore: Not enough space to cache rdd_102_4 in memory! (computed 31.5 MiB so far)
22/12/26 21:30:43 WARN BlockManager: Persisting block rdd_102_4 to disk instead.
22/12/26 21:30:43 WARN MemoryStore: Not enough



22/12/26 21:30:49 WARN MemoryStore: Not enough space to cache rdd_102_9 in memory! (computed 31.5 MiB so far)
22/12/26 21:30:49 WARN BlockManager: Persisting block rdd_102_9 to disk instead.
22/12/26 21:30:49 WARN MemoryStore: Not enough space to cache rdd_102_8 in memory! (computed 31.5 MiB so far)
22/12/26 21:30:49 WARN BlockManager: Persisting block rdd_102_8 to disk instead.
22/12/26 21:30:49 WARN MemoryStore: Not enough space to cache rdd_102_11 in memory! (computed 31.5 MiB so far)
22/12/26 21:30:49 WARN BlockManager: Persisting block rdd_102_11 to disk instead.
22/12/26 21:30:50 WARN MemoryStore: Not enough space to cache rdd_102_10 in memory! (computed 169.5 MiB so far)
22/12/26 21:30:50 WARN BlockManager: Persisting block rdd_102_10 to disk instead.
22/12/26 21:30:51 WARN MemoryStore: Not enough space to cache rdd_102_9 in memory! (computed 111.0 MiB so far)
22/12/26 21:30:51 WARN MemoryStore: Not enough space to cache rdd_102_10 in memory! (computed 71.5 MiB so far)
22/12/26 



22/12/26 21:30:52 WARN MemoryStore: Not enough space to cache rdd_102_14 in memory! (computed 31.5 MiB so far)
22/12/26 21:30:52 WARN BlockManager: Persisting block rdd_102_14 to disk instead.




22/12/26 21:30:53 WARN MemoryStore: Not enough space to cache rdd_102_12 in memory! (computed 111.0 MiB so far)
22/12/26 21:30:53 WARN BlockManager: Persisting block rdd_102_12 to disk instead.
22/12/26 21:30:53 WARN MemoryStore: Not enough space to cache rdd_102_15 in memory! (computed 254.2 MiB so far)
22/12/26 21:30:53 WARN BlockManager: Persisting block rdd_102_15 to disk instead.
22/12/26 21:30:53 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_102_19 in memory.
22/12/26 21:30:55 WARN MemoryStore: Not enough space to cache rdd_102_14 in memory! (computed 111.0 MiB so far)
22/12/26 21:30:55 WARN MemoryStore: Not enough space to cache rdd_102_15 in memory! (computed 169.5 MiB so far)
22/12/26 21:30:55 WARN MemoryStore: Not enough space to cache rdd_102_13 in memory! (computed 254.2 MiB so far)




22/12/26 21:30:56 WARN MemoryStore: Not enough space to cache rdd_102_19 in memory! (computed 384.0 B so far)
22/12/26 21:30:56 WARN BlockManager: Persisting block rdd_102_19 to disk instead.
22/12/26 21:30:57 WARN MemoryStore: Not enough space to cache rdd_102_12 in memory! (computed 111.0 MiB so far)
22/12/26 21:30:57 WARN MemoryStore: Not enough space to cache rdd_102_18 in memory! (computed 111.0 MiB so far)
22/12/26 21:30:57 WARN BlockManager: Persisting block rdd_102_18 to disk instead.




22/12/26 21:30:58 WARN MemoryStore: Not enough space to cache rdd_102_16 in memory! (computed 71.5 MiB so far)
22/12/26 21:30:58 WARN BlockManager: Persisting block rdd_102_16 to disk instead.




22/12/26 21:30:58 WARN MemoryStore: Not enough space to cache rdd_102_17 in memory! (computed 254.2 MiB so far)
22/12/26 21:30:58 WARN BlockManager: Persisting block rdd_102_17 to disk instead.
22/12/26 21:30:59 WARN MemoryStore: Not enough space to cache rdd_102_18 in memory! (computed 47.3 MiB so far)
22/12/26 21:30:59 WARN MemoryStore: Not enough space to cache rdd_102_19 in memory! (computed 47.3 MiB so far)




22/12/26 21:31:00 WARN MemoryStore: Not enough space to cache rdd_102_17 in memory! (computed 169.5 MiB so far)
22/12/26 21:31:00 WARN MemoryStore: Not enough space to cache rdd_102_16 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:00 WARN MemoryStore: Not enough space to cache rdd_102_21 in memory! (computed 2.5 MiB so far)
22/12/26 21:31:00 WARN BlockManager: Persisting block rdd_102_21 to disk instead.
22/12/26 21:31:00 WARN MemoryStore: Not enough space to cache rdd_102_20 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:00 WARN BlockManager: Persisting block rdd_102_20 to disk instead.




22/12/26 21:31:02 WARN MemoryStore: Not enough space to cache rdd_102_22 in memory! (computed 254.2 MiB so far)
22/12/26 21:31:02 WARN BlockManager: Persisting block rdd_102_22 to disk instead.
22/12/26 21:31:02 WARN MemoryStore: Not enough space to cache rdd_102_23 in memory! (computed 1054.5 KiB so far)
22/12/26 21:31:02 WARN BlockManager: Persisting block rdd_102_23 to disk instead.
22/12/26 21:31:02 WARN MemoryStore: Not enough space to cache rdd_102_20 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:02 WARN MemoryStore: Not enough space to cache rdd_102_21 in memory! (computed 254.2 MiB so far)
22/12/26 21:31:03 WARN MemoryStore: Not enough space to cache rdd_102_22 in memory! (computed 47.3 MiB so far)




22/12/26 21:31:04 WARN MemoryStore: Not enough space to cache rdd_102_25 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:04 WARN BlockManager: Persisting block rdd_102_25 to disk instead.
22/12/26 21:31:04 WARN MemoryStore: Not enough space to cache rdd_102_23 in memory! (computed 8.7 MiB so far)
22/12/26 21:31:04 WARN MemoryStore: Not enough space to cache rdd_102_24 in memory! (computed 169.5 MiB so far)
22/12/26 21:31:04 WARN BlockManager: Persisting block rdd_102_24 to disk instead.




22/12/26 21:31:05 WARN MemoryStore: Not enough space to cache rdd_102_26 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:05 WARN BlockManager: Persisting block rdd_102_26 to disk instead.
22/12/26 21:31:05 WARN MemoryStore: Not enough space to cache rdd_102_24 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:05 WARN MemoryStore: Not enough space to cache rdd_102_27 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:05 WARN BlockManager: Persisting block rdd_102_27 to disk instead.




22/12/26 21:31:06 WARN MemoryStore: Not enough space to cache rdd_102_25 in memory! (computed 254.2 MiB so far)




22/12/26 21:31:07 WARN MemoryStore: Not enough space to cache rdd_102_26 in memory! (computed 254.2 MiB so far)
22/12/26 21:31:07 WARN MemoryStore: Not enough space to cache rdd_102_29 in memory! (computed 8.7 MiB so far)
22/12/26 21:31:07 WARN BlockManager: Persisting block rdd_102_29 to disk instead.
22/12/26 21:31:07 WARN MemoryStore: Not enough space to cache rdd_102_27 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:07 WARN MemoryStore: Not enough space to cache rdd_102_28 in memory! (computed 1054.5 KiB so far)
22/12/26 21:31:07 WARN BlockManager: Persisting block rdd_102_28 to disk instead.




22/12/26 21:31:08 WARN MemoryStore: Not enough space to cache rdd_102_29 in memory! (computed 111.0 MiB so far)




22/12/26 21:31:09 WARN MemoryStore: Not enough space to cache rdd_102_28 in memory! (computed 254.2 MiB so far)




22/12/26 21:31:11 WARN MemoryStore: Not enough space to cache rdd_102_30 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:11 WARN BlockManager: Persisting block rdd_102_30 to disk instead.
22/12/26 21:31:11 WARN MemoryStore: Not enough space to cache rdd_102_31 in memory! (computed 1054.5 KiB so far)
22/12/26 21:31:11 WARN BlockManager: Persisting block rdd_102_31 to disk instead.
22/12/26 21:31:12 WARN MemoryStore: Not enough space to cache rdd_102_33 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:12 WARN BlockManager: Persisting block rdd_102_33 to disk instead.
22/12/26 21:31:12 WARN MemoryStore: Not enough space to cache rdd_102_32 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:12 WARN BlockManager: Persisting block rdd_102_32 to disk instead.
22/12/26 21:31:12 WARN MemoryStore: Not enough space to cache rdd_102_35 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:12 WARN BlockManager: Persisting block rdd_102_35 to disk instead.
22/12/26 21:31:12 WARN MemorySto



22/12/26 21:31:14 WARN MemoryStore: Not enough space to cache rdd_102_32 in memory! (computed 169.5 MiB so far)
22/12/26 21:31:14 WARN MemoryStore: Not enough space to cache rdd_102_34 in memory! (computed 254.2 MiB so far)


                                                                                

22/12/26 21:31:15 WARN MemoryStore: Not enough space to cache rdd_102_7 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:15 WARN MemoryStore: Not enough space to cache rdd_102_4 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:15 WARN MemoryStore: Not enough space to cache rdd_102_6 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:15 WARN MemoryStore: Not enough space to cache rdd_102_1 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:15 WARN MemoryStore: Not enough space to cache rdd_102_3 in memory! (computed 31.5 MiB so far)
22/12/26 21:31:15 WARN MemoryStore: Not enough space to cache rdd_102_5 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:15 WARN MemoryStore: Not enough space to cache rdd_102_2 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:15 WARN MemoryStore: Not enough space to cache rdd_102_0 in memory! (computed 71.5 MiB so far)




22/12/26 21:31:16 WARN MemoryStore: Not enough space to cache rdd_102_8 in memory! (computed 169.5 MiB so far)
22/12/26 21:31:16 WARN MemoryStore: Not enough space to cache rdd_102_9 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:16 WARN MemoryStore: Not enough space to cache rdd_102_10 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:16 WARN MemoryStore: Not enough space to cache rdd_102_13 in memory! (computed 13.7 MiB so far)
22/12/26 21:31:16 WARN MemoryStore: Not enough space to cache rdd_102_12 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:16 WARN MemoryStore: Not enough space to cache rdd_102_11 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:16 WARN MemoryStore: Not enough space to cache rdd_102_15 in memory! (computed 3.8 MiB so far)




22/12/26 21:31:17 WARN MemoryStore: Not enough space to cache rdd_102_14 in memory! (computed 381.3 MiB so far)
22/12/26 21:31:17 WARN MemoryStore: Not enough space to cache rdd_102_16 in memory! (computed 31.5 MiB so far)




22/12/26 21:31:17 WARN MemoryStore: Not enough space to cache rdd_102_17 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:17 WARN MemoryStore: Not enough space to cache rdd_102_18 in memory! (computed 47.3 MiB so far)




22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_19 in memory! (computed 254.2 MiB so far)
22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_21 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_22 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_20 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_23 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_25 in memory! (computed 1644.5 KiB so far)
22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_24 in memory! (computed 111.0 MiB so far)




22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_27 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_28 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:18 WARN MemoryStore: Not enough space to cache rdd_102_29 in memory! (computed 1644.5 KiB so far)
22/12/26 21:31:19 WARN MemoryStore: Not enough space to cache rdd_102_26 in memory! (computed 254.2 MiB so far)




22/12/26 21:31:19 WARN MemoryStore: Not enough space to cache rdd_102_30 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:19 WARN MemoryStore: Not enough space to cache rdd_102_31 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:19 WARN MemoryStore: Not enough space to cache rdd_102_32 in memory! (computed 21.0 MiB so far)




22/12/26 21:31:19 WARN MemoryStore: Not enough space to cache rdd_102_35 in memory! (computed 13.7 MiB so far)
22/12/26 21:31:19 WARN MemoryStore: Not enough space to cache rdd_102_34 in memory! (computed 169.5 MiB so far)
22/12/26 21:31:19 WARN MemoryStore: Not enough space to cache rdd_102_33 in memory! (computed 254.2 MiB so far)


                                                                                

22/12/26 21:31:20 WARN MemoryStore: Not enough space to cache rdd_102_0 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:20 WARN MemoryStore: Not enough space to cache rdd_102_6 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:20 WARN MemoryStore: Not enough space to cache rdd_102_5 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:20 WARN MemoryStore: Not enough space to cache rdd_102_1 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:20 WARN MemoryStore: Not enough space to cache rdd_102_2 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:20 WARN MemoryStore: Not enough space to cache rdd_102_3 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:20 WARN MemoryStore: Not enough space to cache rdd_102_4 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:20 WARN MemoryStore: Not enough space to cache rdd_102_7 in memory! (computed 71.5 MiB so far)




22/12/26 21:31:21 WARN MemoryStore: Not enough space to cache rdd_102_12 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:21 WARN MemoryStore: Not enough space to cache rdd_102_11 in memory! (computed 31.5 MiB so far)
22/12/26 21:31:21 WARN MemoryStore: Not enough space to cache rdd_102_9 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:21 WARN MemoryStore: Not enough space to cache rdd_102_10 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:21 WARN MemoryStore: Not enough space to cache rdd_102_8 in memory! (computed 169.5 MiB so far)
22/12/26 21:31:21 WARN MemoryStore: Not enough space to cache rdd_102_13 in memory! (computed 31.5 MiB so far)
22/12/26 21:31:21 WARN MemoryStore: Not enough space to cache rdd_102_14 in memory! (computed 13.7 MiB so far)
22/12/26 21:31:21 WARN MemoryStore: Not enough space to cache rdd_102_15 in memory! (computed 8.7 MiB so far)




22/12/26 21:31:22 WARN MemoryStore: Not enough space to cache rdd_102_17 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:22 WARN MemoryStore: Not enough space to cache rdd_102_18 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:22 WARN MemoryStore: Not enough space to cache rdd_102_16 in memory! (computed 254.2 MiB so far)




22/12/26 21:31:23 WARN MemoryStore: Not enough space to cache rdd_102_19 in memory! (computed 169.5 MiB so far)




22/12/26 21:31:23 WARN MemoryStore: Not enough space to cache rdd_102_21 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:23 WARN MemoryStore: Not enough space to cache rdd_102_22 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:23 WARN MemoryStore: Not enough space to cache rdd_102_23 in memory! (computed 1644.5 KiB so far)
22/12/26 21:31:23 WARN MemoryStore: Not enough space to cache rdd_102_20 in memory! (computed 254.2 MiB so far)




22/12/26 21:31:24 WARN MemoryStore: Not enough space to cache rdd_102_25 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:24 WARN MemoryStore: Not enough space to cache rdd_102_26 in memory! (computed 1644.5 KiB so far)
22/12/26 21:31:24 WARN MemoryStore: Not enough space to cache rdd_102_24 in memory! (computed 111.0 MiB so far)




22/12/26 21:31:24 WARN MemoryStore: Not enough space to cache rdd_102_28 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:24 WARN MemoryStore: Not enough space to cache rdd_102_30 in memory! (computed 31.5 MiB so far)
22/12/26 21:31:24 WARN MemoryStore: Not enough space to cache rdd_102_29 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:24 WARN MemoryStore: Not enough space to cache rdd_102_27 in memory! (computed 254.2 MiB so far)




22/12/26 21:31:25 WARN MemoryStore: Not enough space to cache rdd_102_32 in memory! (computed 31.5 MiB so far)
22/12/26 21:31:25 WARN MemoryStore: Not enough space to cache rdd_102_33 in memory! (computed 31.5 MiB so far)
22/12/26 21:31:25 WARN MemoryStore: Not enough space to cache rdd_102_31 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:25 WARN MemoryStore: Not enough space to cache rdd_102_34 in memory! (computed 71.5 MiB so far)




22/12/26 21:31:25 WARN MemoryStore: Not enough space to cache rdd_102_35 in memory! (computed 381.3 MiB so far)


                                                                                

22/12/26 21:31:26 WARN MemoryStore: Not enough space to cache rdd_102_4 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:26 WARN MemoryStore: Not enough space to cache rdd_102_7 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:26 WARN MemoryStore: Not enough space to cache rdd_102_1 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:26 WARN MemoryStore: Not enough space to cache rdd_102_3 in memory! (computed 31.5 MiB so far)
22/12/26 21:31:26 WARN MemoryStore: Not enough space to cache rdd_102_5 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:26 WARN MemoryStore: Not enough space to cache rdd_102_6 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:26 WARN MemoryStore: Not enough space to cache rdd_102_2 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:26 WARN MemoryStore: Not enough space to cache rdd_102_0 in memory! (computed 71.5 MiB so far)




22/12/26 21:31:28 WARN MemoryStore: Not enough space to cache rdd_102_8 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:28 WARN MemoryStore: Not enough space to cache rdd_102_12 in memory! (computed 13.7 MiB so far)
22/12/26 21:31:28 WARN MemoryStore: Not enough space to cache rdd_102_10 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:28 WARN MemoryStore: Not enough space to cache rdd_102_11 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:28 WARN MemoryStore: Not enough space to cache rdd_102_13 in memory! (computed 13.7 MiB so far)
22/12/26 21:31:28 WARN MemoryStore: Not enough space to cache rdd_102_15 in memory! (computed 3.8 MiB so far)
22/12/26 21:31:28 WARN MemoryStore: Not enough space to cache rdd_102_14 in memory! (computed 1644.5 KiB so far)
22/12/26 21:31:28 WARN MemoryStore: Not enough space to cache rdd_102_9 in memory! (computed 169.5 MiB so far)




22/12/26 21:31:29 WARN MemoryStore: Not enough space to cache rdd_102_17 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:29 WARN MemoryStore: Not enough space to cache rdd_102_18 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:29 WARN MemoryStore: Not enough space to cache rdd_102_16 in memory! (computed 254.2 MiB so far)




22/12/26 21:31:29 WARN MemoryStore: Not enough space to cache rdd_102_19 in memory! (computed 1644.5 KiB so far)




22/12/26 21:31:30 WARN MemoryStore: Not enough space to cache rdd_102_22 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:30 WARN MemoryStore: Not enough space to cache rdd_102_21 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:30 WARN MemoryStore: Not enough space to cache rdd_102_20 in memory! (computed 254.2 MiB so far)




22/12/26 21:31:31 WARN MemoryStore: Not enough space to cache rdd_102_23 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:31 WARN MemoryStore: Not enough space to cache rdd_102_24 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:31 WARN MemoryStore: Not enough space to cache rdd_102_25 in memory! (computed 1644.5 KiB so far)
22/12/26 21:31:31 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KiB for computing block rdd_102_26 in memory.
22/12/26 21:31:31 WARN MemoryStore: Not enough space to cache rdd_102_26 in memory! (computed 384.0 B so far)




22/12/26 21:31:31 WARN MemoryStore: Not enough space to cache rdd_102_28 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:31 WARN MemoryStore: Not enough space to cache rdd_102_29 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:31 WARN MemoryStore: Not enough space to cache rdd_102_30 in memory! (computed 21.0 MiB so far)




22/12/26 21:31:32 WARN MemoryStore: Not enough space to cache rdd_102_31 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:32 WARN MemoryStore: Not enough space to cache rdd_102_32 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:32 WARN MemoryStore: Not enough space to cache rdd_102_33 in memory! (computed 8.7 MiB so far)




22/12/26 21:31:32 WARN MemoryStore: Not enough space to cache rdd_102_35 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:32 WARN MemoryStore: Not enough space to cache rdd_102_34 in memory! (computed 111.0 MiB so far)


                                                                                

22/12/26 21:31:34 WARN MemoryStore: Not enough space to cache rdd_102_3 in memory! (computed 13.7 MiB so far)
22/12/26 21:31:34 WARN MemoryStore: Not enough space to cache rdd_102_0 in memory! (computed 5.8 MiB so far)
22/12/26 21:31:34 WARN MemoryStore: Not enough space to cache rdd_102_7 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:34 WARN MemoryStore: Not enough space to cache rdd_102_6 in memory! (computed 31.5 MiB so far)
22/12/26 21:31:34 WARN MemoryStore: Not enough space to cache rdd_102_2 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:34 WARN MemoryStore: Not enough space to cache rdd_102_4 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:34 WARN MemoryStore: Not enough space to cache rdd_102_1 in memory! (computed 3.8 MiB so far)
22/12/26 21:31:34 WARN MemoryStore: Not enough space to cache rdd_102_5 in memory! (computed 21.0 MiB so far)


[Stage 35:>                                                        (0 + 8) / 36]

22/12/26 21:31:35 WARN MemoryStore: Not enough space to cache rdd_102_12 in memory! (computed 3.8 MiB so far)
22/12/26 21:31:35 WARN MemoryStore: Not enough space to cache rdd_102_10 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:35 WARN MemoryStore: Not enough space to cache rdd_102_11 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:35 WARN MemoryStore: Not enough space to cache rdd_102_9 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:35 WARN MemoryStore: Not enough space to cache rdd_102_8 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:35 WARN MemoryStore: Not enough space to cache rdd_102_13 in memory! (computed 3.8 MiB so far)




22/12/26 21:31:35 WARN MemoryStore: Not enough space to cache rdd_102_14 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:35 WARN MemoryStore: Not enough space to cache rdd_102_15 in memory! (computed 111.0 MiB so far)




22/12/26 21:31:36 WARN MemoryStore: Not enough space to cache rdd_102_16 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:36 WARN MemoryStore: Not enough space to cache rdd_102_18 in memory! (computed 8.7 MiB so far)
22/12/26 21:31:36 WARN MemoryStore: Not enough space to cache rdd_102_17 in memory! (computed 21.0 MiB so far)




22/12/26 21:31:37 WARN MemoryStore: Not enough space to cache rdd_102_19 in memory! (computed 111.0 MiB so far)




22/12/26 21:31:37 WARN MemoryStore: Not enough space to cache rdd_102_20 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:37 WARN MemoryStore: Not enough space to cache rdd_102_22 in memory! (computed 8.7 MiB so far)
22/12/26 21:31:37 WARN MemoryStore: Not enough space to cache rdd_102_21 in memory! (computed 21.0 MiB so far)




22/12/26 21:31:38 WARN MemoryStore: Not enough space to cache rdd_102_23 in memory! (computed 111.0 MiB so far)
22/12/26 21:31:38 WARN MemoryStore: Not enough space to cache rdd_102_24 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:38 WARN MemoryStore: Not enough space to cache rdd_102_25 in memory! (computed 8.7 MiB so far)




22/12/26 21:31:38 WARN MemoryStore: Not enough space to cache rdd_102_26 in memory! (computed 111.0 MiB so far)




22/12/26 21:31:38 WARN MemoryStore: Not enough space to cache rdd_102_28 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:38 WARN MemoryStore: Not enough space to cache rdd_102_29 in memory! (computed 47.3 MiB so far)




22/12/26 21:31:39 WARN MemoryStore: Not enough space to cache rdd_102_30 in memory! (computed 71.5 MiB so far)
22/12/26 21:31:39 WARN MemoryStore: Not enough space to cache rdd_102_31 in memory! (computed 47.3 MiB so far)
22/12/26 21:31:39 WARN MemoryStore: Not enough space to cache rdd_102_32 in memory! (computed 13.7 MiB so far)




22/12/26 21:31:39 WARN MemoryStore: Not enough space to cache rdd_102_34 in memory! (computed 13.7 MiB so far)
22/12/26 21:31:39 WARN MemoryStore: Not enough space to cache rdd_102_35 in memory! (computed 21.0 MiB so far)
22/12/26 21:31:39 WARN MemoryStore: Not enough space to cache rdd_102_33 in memory! (computed 111.0 MiB so far)


                                                                                

In [34]:
# evaluate the model
predictions = model.transform(test)

# evaluate the model
evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction", metricName="accuracy")
accuracy = evaluator.evaluate(predictions)
print("Test Error = %g " % (1.0 - accuracy))

# print the accuracy
print("Accuracy = %g " % accuracy)





Test Error = 0.213845 
Accuracy = 0.786155 


                                                                                