# Building a Knowledge Graph

In [1]:
import pyspark as sp
import nltk
import numpy as np
from nltk.tokenize import TreebankWordTokenizer
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
import string

### Required for environment variable

In [2]:
import os
os.environ["PYSPARK_PYTHON"]="/usr/local/bin/python3"
os.environ["PYSPARK_DRIVER_PYTHON"]="/usr/local/bin/python3"

### Setup cluster

In [3]:
from pyspark.sql import SparkSession

spark = SparkSession \
    .builder \
    .appName("Python Spark Sentiment Analysis example") \
    .config("spark.some.config.option", "some-value") \
    .getOrCreate()

###  Load dataset

In [4]:
df_keywords = spark.read.csv("./mag_cs_keywords.csv",header=True)
df_arxiv = spark.read.json("./arxiv-metadata-oai-snapshot.json")

In [5]:
abstracts = df_arxiv.select("abstract")
keywords = df_keywords.select("normalizedName")

### Cleaning & Normalization

In [6]:
from pyspark.sql.functions import udf, col
from pyspark.sql import Row
from pyspark.sql.types import ArrayType, StructField, StructType, StringType, IntegerType

# remove non ASCII characters & lowercase
def strip_non_ascii(data_str):
    ''' Returns the string without non ASCII characters'''
    stripped = (c.lower() for c in data_str if 0 < ord(c) < 127)

    return ''.join(stripped)
# setup pyspark udf function
strip_non_ascii_udf = udf(strip_non_ascii, StringType())

In [7]:
abstracts = abstracts.withColumn('normalized', strip_non_ascii_udf(abstracts['abstract']))
abstracts = abstracts.select("normalized")

1. Extract keywords in document
    - https://stackoverflow.com/questions/48869922/how-to-efficiently-check-if-a-list-of-words-is-contained-in-a-spark-dataframe
    - https://stackoverflow.com/questions/46410887/pyspark-string-matching-to-create-new-column

   
2. Build Co-occurence matrix
    - https://stackoverflow.com/questions/48551900/spark-generate-occurrence-matrix
   
   
3. Useful tutorials
   - https://spark.apache.org/docs/1.6.3/ml-features.html

In [9]:
keywords_list = list(keywords.select('normalizedName').toPandas()['normalizedName'])

In [None]:
keywords.filter(keywords.normalizedName.contains('a')).collect()

In [None]:
# data = [('60 ML of paracetomol and 0.5 ML of XYZ',)]
# df = sc.parallelize(data).toDF('str:string')

# Define the function you want to return
def extract(s):
    all_matches = list()
    for keyword in keywords_list:
        all_matches.extend(re.findall(keyword, s))
    return all_matches

# Create the UDF, note that you need to declare the return schema matching the returned type
extract_udf = udf(extract, ArrayType(StringType()))

# Apply it
df = abstracts.withColumn('extracted', extract_udf(abstracts['normalized']))

In [219]:
from pyspark.sql.functions import regexp_extract, col, concat, lit, when
from collections import Counter
from collections import defaultdict

corpus, co_occurence, documents = Counter(), Counter(), defaultdict(set)
df = abstracts.withColumn('extracted_words', regexp_extract('normalized', keywords_list[0], 0))

# keywords_list2 = ["a", "describe", "paper", "algorithm"]

for i, keyword in enumerate(keywords_list[1:],1):
    print("Working on {}".format(i))
#     df_temp = abstracts.withColumn('extracted_temp', regexp_extract('normalized', keywords_list[1], 0))
#     df = df_temp.withColumn('extracted_words2', concat(col('extracted_words'), lit(' '), col('extracted_temp')))
    df_temp = df.withColumn('extracted_temp', regexp_extract('normalized', keyword, 0))
    df = df_temp.withColumn('extracted_words', when(col('extracted_temp') != '', concat(col('extracted_words'), lit(",") ,col('extracted_temp'))).otherwise(col('extracted_words')))

#     df_temp3 = df_temp2.withColumn('extracted_temp', regexp_extract('normalized', "different", 0))
#     df_temp4 = df_temp3.withColumn('extracted_words', concat(col('extracted_words'), lit(' '), col('extracted_temp')))



    
    #     df = df.withColumn('joined_column', sf.concat(sf.col('colname1'),sf.lit('_'), sf.col('colname2')))
#     extracted_temp = list(df.select('extracted_words').toPandas()['extracted_words'])
    
    # update corpus, co_occurence, documents
#     doc_num = 0
#     for v in extracted_temp:
#         if not v:
#             continue
#         corpus[v] += 1
#         documents[doc_num].add(v)
#         doc_num += 1

Working on 1
Working on 2
Working on 3
Working on 4
Working on 5
Working on 6
Working on 7
Working on 8
Working on 9
Working on 10
Working on 11
Working on 12
Working on 13
Working on 14
Working on 15
Working on 16
Working on 17
Working on 18
Working on 19
Working on 20
Working on 21
Working on 22
Working on 23
Working on 24
Working on 25
Working on 26
Working on 27
Working on 28
Working on 29
Working on 30
Working on 31
Working on 32
Working on 33
Working on 34
Working on 35
Working on 36
Working on 37
Working on 38
Working on 39
Working on 40
Working on 41
Working on 42
Working on 43
Working on 44
Working on 45
Working on 46
Working on 47
Working on 48
Working on 49
Working on 50
Working on 51
Working on 52
Working on 53
Working on 54
Working on 55
Working on 56
Working on 57
Working on 58
Working on 59
Working on 60
Working on 61
Working on 62
Working on 63
Working on 64
Working on 65
Working on 66
Working on 67
Working on 68
Working on 69
Working on 70
Working on 71
Working on 72
W

Working on 555
Working on 556
Working on 557
Working on 558
Working on 559
Working on 560
Working on 561
Working on 562
Working on 563
Working on 564
Working on 565
Working on 566
Working on 567
Working on 568
Working on 569
Working on 570
Working on 571
Working on 572
Working on 573
Working on 574
Working on 575
Working on 576
Working on 577
Working on 578
Working on 579
Working on 580
Working on 581
Working on 582
Working on 583
Working on 584
Working on 585
Working on 586
Working on 587
Working on 588
Working on 589
Working on 590
Working on 591
Working on 592
Working on 593
Working on 594
Working on 595
Working on 596
Working on 597
Working on 598
Working on 599
Working on 600
Working on 601
Working on 602
Working on 603
Working on 604
Working on 605
Working on 606
Working on 607
Working on 608
Working on 609
Working on 610
Working on 611
Working on 612
Working on 613
Working on 614
Working on 615
Working on 616
Working on 617
Working on 618
Working on 619
Working on 620
Working on

Working on 1098
Working on 1099
Working on 1100
Working on 1101
Working on 1102
Working on 1103
Working on 1104
Working on 1105
Working on 1106
Working on 1107
Working on 1108
Working on 1109
Working on 1110
Working on 1111
Working on 1112
Working on 1113
Working on 1114
Working on 1115
Working on 1116
Working on 1117
Working on 1118
Working on 1119
Working on 1120
Working on 1121
Working on 1122
Working on 1123
Working on 1124
Working on 1125
Working on 1126
Working on 1127
Working on 1128
Working on 1129
Working on 1130
Working on 1131
Working on 1132
Working on 1133
Working on 1134
Working on 1135
Working on 1136
Working on 1137
Working on 1138
Working on 1139
Working on 1140
Working on 1141
Working on 1142
Working on 1143
Working on 1144
Working on 1145
Working on 1146
Working on 1147
Working on 1148
Working on 1149
Working on 1150
Working on 1151
Working on 1152
Working on 1153
Working on 1154
Working on 1155
Working on 1156
Working on 1157
Working on 1158
Working on 1159
Working 

Working on 1611
Working on 1612
Working on 1613
Working on 1614
Working on 1615
Working on 1616
Working on 1617
Working on 1618
Working on 1619
Working on 1620
Working on 1621
Working on 1622
Working on 1623
Working on 1624
Working on 1625
Working on 1626
Working on 1627
Working on 1628
Working on 1629
Working on 1630
Working on 1631
Working on 1632
Working on 1633
Working on 1634
Working on 1635
Working on 1636
Working on 1637
Working on 1638
Working on 1639
Working on 1640
Working on 1641
Working on 1642
Working on 1643
Working on 1644
Working on 1645
Working on 1646
Working on 1647
Working on 1648
Working on 1649
Working on 1650
Working on 1651
Working on 1652
Working on 1653
Working on 1654
Working on 1655
Working on 1656
Working on 1657
Working on 1658
Working on 1659
Working on 1660
Working on 1661
Working on 1662
Working on 1663
Working on 1664
Working on 1665
Working on 1666
Working on 1667
Working on 1668
Working on 1669
Working on 1670
Working on 1671
Working on 1672
Working 

Working on 2124
Working on 2125
Working on 2126
Working on 2127
Working on 2128
Working on 2129
Working on 2130
Working on 2131
Working on 2132
Working on 2133
Working on 2134
Working on 2135
Working on 2136
Working on 2137
Working on 2138
Working on 2139
Working on 2140
Working on 2141
Working on 2142
Working on 2143
Working on 2144
Working on 2145
Working on 2146
Working on 2147
Working on 2148
Working on 2149
Working on 2150
Working on 2151
Working on 2152
Working on 2153
Working on 2154
Working on 2155
Working on 2156
Working on 2157
Working on 2158
Working on 2159
Working on 2160
Working on 2161
Working on 2162
Working on 2163
Working on 2164
Working on 2165
Working on 2166
Working on 2167
Working on 2168
Working on 2169
Working on 2170
Working on 2171
Working on 2172
Working on 2173
Working on 2174
Working on 2175
Working on 2176
Working on 2177
Working on 2178
Working on 2179
Working on 2180
Working on 2181
Working on 2182
Working on 2183
Working on 2184
Working on 2185
Working 

Working on 2637
Working on 2638
Working on 2639
Working on 2640
Working on 2641
Working on 2642
Working on 2643
Working on 2644
Working on 2645
Working on 2646
Working on 2647
Working on 2648
Working on 2649
Working on 2650
Working on 2651
Working on 2652
Working on 2653
Working on 2654
Working on 2655
Working on 2656
Working on 2657
Working on 2658
Working on 2659
Working on 2660
Working on 2661
Working on 2662
Working on 2663
Working on 2664
Working on 2665
Working on 2666
Working on 2667
Working on 2668
Working on 2669
Working on 2670
Working on 2671
Working on 2672
Working on 2673
Working on 2674
Working on 2675
Working on 2676
Working on 2677
Working on 2678
Working on 2679
Working on 2680
Working on 2681
Working on 2682
Working on 2683
Working on 2684
Working on 2685
Working on 2686
Working on 2687
Working on 2688
Working on 2689
Working on 2690
Working on 2691
Working on 2692
Working on 2693
Working on 2694
Working on 2695
Working on 2696
Working on 2697
Working on 2698
Working 

Working on 3151
Working on 3152
Working on 3153
Working on 3154
Working on 3155
Working on 3156
Working on 3157
Working on 3158
Working on 3159
Working on 3160
Working on 3161
Working on 3162
Working on 3163
Working on 3164
Working on 3165
Working on 3166
Working on 3167
Working on 3168
Working on 3169
Working on 3170
Working on 3171
Working on 3172
Working on 3173
Working on 3174
Working on 3175
Working on 3176
Working on 3177
Working on 3178
Working on 3179
Working on 3180
Working on 3181
Working on 3182
Working on 3183
Working on 3184
Working on 3185
Working on 3186
Working on 3187
Working on 3188
Working on 3189
Working on 3190
Working on 3191
Working on 3192
Working on 3193
Working on 3194
Working on 3195
Working on 3196
Working on 3197
Working on 3198
Working on 3199
Working on 3200
Working on 3201
Working on 3202
Working on 3203
Working on 3204
Working on 3205
Working on 3206
Working on 3207
Working on 3208
Working on 3209
Working on 3210
Working on 3211
Working on 3212
Working 

Working on 3665
Working on 3666
Working on 3667
Working on 3668
Working on 3669
Working on 3670
Working on 3671
Working on 3672
Working on 3673
Working on 3674
Working on 3675
Working on 3676
Working on 3677
Working on 3678
Working on 3679
Working on 3680
Working on 3681
Working on 3682
Working on 3683
Working on 3684
Working on 3685
Working on 3686
Working on 3687
Working on 3688
Working on 3689
Working on 3690
Working on 3691
Working on 3692
Working on 3693
Working on 3694
Working on 3695
Working on 3696
Working on 3697
Working on 3698


Py4JJavaError: An error occurred while calling o73494.withColumn.
: java.lang.StackOverflowError
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:156)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$1(TreeNode.scala:159)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:89)
	at org.apache.spark.sql.catalyst.trees.TreeNode.find(TreeNode.scala:159)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$find$2(TreeNode.scala:159)


df.write.csv

In [218]:
df.select('extracted_words').take(20)
# df.show()

[Row(extracted_words='fully,a'),
 Row(extracted_words=',a,describe,algorithm'),
 Row(extracted_words=',a,describe'),
 Row(extracted_words=',a'),
 Row(extracted_words=',a,paper'),
 Row(extracted_words=',a,describe'),
 Row(extracted_words=',a,paper'),
 Row(extracted_words=',a'),
 Row(extracted_words=',a,describe'),
 Row(extracted_words=',a,paper'),
 Row(extracted_words=',a,paper,algorithm'),
 Row(extracted_words=',a,paper'),
 Row(extracted_words=',a,paper'),
 Row(extracted_words=',a'),
 Row(extracted_words=',a,paper'),
 Row(extracted_words='fully,a'),
 Row(extracted_words=',a'),
 Row(extracted_words=',a'),
 Row(extracted_words=',a'),
 Row(extracted_words=',a')]

In [74]:
df_temp = df.withColumn('extracted_temp', regexp_extract('normalized', "fully", 0))
df_temp2 = df_temp.withColumn('extracted_words', concat(col('extracted_words'), lit(' '),col('extracted_temp')))

df_temp3 = df_temp2.withColumn('extracted_temp', regexp_extract('normalized', "different", 0))
df_temp4 = df_temp3.withColumn('extracted_words', concat(col('extracted_words'), lit(' '), col('extracted_temp')))




In [75]:
df_temp4.show()

+--------------------+----------------+--------------+
|          normalized| extracted_words|extracted_temp|
+--------------------+----------------+--------------+
|  a fully differe...| fully different|     different|
|  we describe a n...|                |              |
|  the evolution o...|                |              |
|  we show that a ...|                |              |
|  in this paper w...|                |              |
|  we study the tw...|                |              |
|  a rather non-st...|       different|     different|
|  a general formu...|       different|     different|
|  we discuss the ...|                |              |
|  partial cubes a...|                |              |
|  in this paper w...|                |              |
|  recently, bruin...|                |              |
|  serre obtained ...|                |              |
|  in this article...|       different|     different|
|  the pure spinor...|       different|     different|
|  in this

In [31]:
corpus

Counter({'fully': 70695})

https://towardsdatascience.com/solving-combinatorial-problems-with-pyspark-fad433b1fca0

In [None]:
from flashtext import KeywordProcessor

kp = KeywordProcessor()
keywords = ['abc']
for keyword in keywords:
    kp.add_keyword(keyword)

def extractKeywords(menu_name, kp=kp):
    keywords = kp.extract_keywords(menu_name)
    return ''.join(keywords)
    
extractKeywords_udf = udf(extractKeywords, StringType())

In [None]:
df2 = df.withColumn("extracted_keyword", udf(abstracts["normalized"]))