Skip to content

Conversation

@hiroshi-cl
Copy link
Contributor

各split strategyでexplainを出力するコード例です

sbt "runMain jp.cedretaber.minispark.FlowJoinExplain"

Iterator

== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- Union
   :- ShuffledHashJoin [lang_id#153], [id#165], Inner, BuildRight
   :  :- Exchange hashpartitioning(lang_id#153, 200), ENSURE_REQUIREMENTS, [plan_id=275]
   :  :  +- LocalTableScan [id#150, name#151, user_id#152, lang_id#153]
   :  +- Exchange hashpartitioning(id#165, 200), ENSURE_REQUIREMENTS, [plan_id=276]
   :     +- LocalTableScan [id#165, name#166]
   +- BroadcastHashJoin [lang_id#145], [id#161], Inner, BuildRight, false
      :- LocalTableScan [id#142, name#143, user_id#144, lang_id#145]
      +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=278]
         +- LocalTableScan [id#161, name#162]

GroupBy

== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- Union
   :- ShuffledHashJoin [lang_id#247], [id#256], Inner, BuildRight
   :  :- Exchange hashpartitioning(lang_id#247, 200), ENSURE_REQUIREMENTS, [plan_id=442]
   :  :  +- LocalTableScan [id#244, name#245, user_id#246, lang_id#247]
   :  +- Exchange hashpartitioning(id#256, 200), ENSURE_REQUIREMENTS, [plan_id=443]
   :     +- LocalTableScan [id#256, name#257]
   +- BroadcastHashJoin [lang_id#239], [id#252], Inner, BuildRight, false
      :- LocalTableScan [id#236, name#237, user_id#238, lang_id#239]
      +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=445]
         +- LocalTableScan [id#252, name#253]

Filter

== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- Union
   :- ShuffledHashJoin [lang_id#84], [id#50], Inner, BuildRight
   :  :- Exchange hashpartitioning(lang_id#84, 200), ENSURE_REQUIREMENTS, [plan_id=631]
   :  :  +- Filter isnotnull(lang_id#84)
   :  :     +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8698/0x0000000802b73840@6065abaf.apply
   :  :        +- Union
   :  :           :- ShuffledHashJoin [id#17], [user_id#83], Inner, BuildRight
   :  :           :  :- Exchange hashpartitioning(id#17, 200), ENSURE_REQUIREMENTS, [plan_id=621]
   :  :           :  :  +- Filter isnotnull(id#17)
   :  :           :  :     +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8698/0x0000000802b73840@201f889a.apply
   :  :           :  :        +- FileScan csv [id#17,name#18] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:string,name:string>
   :  :           :  +- Exchange hashpartitioning(user_id#83, 200), ENSURE_REQUIREMENTS, [plan_id=622]
   :  :           :     +- Filter isnotnull(user_id#83)
   :  :           :        +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8698/0x0000000802b73840@7e9cece6.apply
   :  :           :           +- FileScan csv [user_id#83,lang_id#84] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<user_id:string,lang_id:string>
   :  :           +- BroadcastHashJoin [id#306], [user_id#308], Inner, BuildRight, false
   :  :              :- Filter isnotnull(id#306)
   :  :              :  +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8697/0x0000000802b73040@2bd13033.apply
   :  :              :     +- FileScan csv [id#306,name#307] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:string,name:string>
   :  :              +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, false]),false), [plan_id=624]
   :  :                 +- Filter isnotnull(user_id#308)
   :  :                    +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8697/0x0000000802b73040@66a4ce35.apply
   :  :                       +- FileScan csv [user_id#308,lang_id#309] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<user_id:string,lang_id:string>
   :  +- Exchange hashpartitioning(id#50, 200), ENSURE_REQUIREMENTS, [plan_id=632]
   :     +- Filter isnotnull(id#50)
   :        +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8698/0x0000000802b73840@3a23d123.apply
   :           +- FileScan csv [id#50,name#51] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/lan..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:string,name:string>
   +- BroadcastHashJoin [lang_id#343], [id#348], Inner, BuildRight, false
      :- Filter isnotnull(lang_id#343)
      :  +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8697/0x0000000802b73040@2341f15d.apply
      :     +- Union
      :        :- ShuffledHashJoin [id#340], [user_id#342], Inner, BuildRight
      :        :  :- Exchange hashpartitioning(id#340, 200), ENSURE_REQUIREMENTS, [plan_id=635]
      :        :  :  +- Filter isnotnull(id#340)
      :        :  :     +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8698/0x0000000802b73840@201f889a.apply
      :        :  :        +- FileScan csv [id#340,name#341] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:string,name:string>
      :        :  +- Exchange hashpartitioning(user_id#342, 200), ENSURE_REQUIREMENTS, [plan_id=636]
      :        :     +- Filter isnotnull(user_id#342)
      :        :        +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8698/0x0000000802b73840@7e9cece6.apply
      :        :           +- FileScan csv [user_id#342,lang_id#343] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<user_id:string,lang_id:string>
      :        +- BroadcastHashJoin [id#344], [user_id#346], Inner, BuildRight, false
      :           :- Filter isnotnull(id#344)
      :           :  +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8697/0x0000000802b73040@2bd13033.apply
      :           :     +- FileScan csv [id#344,name#345] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:string,name:string>
      :           +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, false]),false), [plan_id=638]
      :              +- Filter isnotnull(user_id#346)
      :                 +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8697/0x0000000802b73040@66a4ce35.apply
      :                    +- FileScan csv [user_id#346,lang_id#347] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<user_id:string,lang_id:string>
      +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, false]),false), [plan_id=644]
         +- Filter isnotnull(id#348)
            +- Filter jp.cedretaber.minispark.flowJoinStrategy.FilterSplitStrategy$$$Lambda$8697/0x0000000802b73040@1331dae.apply
               +- FileScan csv [id#348,name#349] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/lan..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:string,name:string>

Join

== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- Union
   :- ShuffledHashJoin [lang_id#84], [id#50], Inner, BuildRight
   :  :- Exchange hashpartitioning(lang_id#84, 200), ENSURE_REQUIREMENTS, [plan_id=925]
   :  :  +- Union
   :  :     :- ShuffledHashJoin [id#17], [user_id#83], Inner, BuildRight
   :  :     :  :- Exchange hashpartitioning(id#17, 200), ENSURE_REQUIREMENTS, [plan_id=907]
   :  :     :  :  +- BroadcastHashJoin [id#17], [id#361], LeftAnti, BuildRight, false
   :  :     :  :     :- Filter isnotnull(id#17)
   :  :     :  :     :  +- FileScan csv [id#17,name#18] Batched: false, DataFilters: [isnotnull(id#17)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct<id:string,name:string>
   :  :     :  :     +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=898]
   :  :     :  :        +- LocalTableScan [id#361]
   :  :     :  +- Exchange hashpartitioning(user_id#83, 200), ENSURE_REQUIREMENTS, [plan_id=908]
   :  :     :     +- BroadcastHashJoin [lang_id#84], [id#408], LeftAnti, BuildRight, false
   :  :     :        :- BroadcastHashJoin [user_id#83], [id#379], LeftAnti, BuildRight, false
   :  :     :        :  :- Filter (isnotnull(user_id#83) AND isnotnull(lang_id#84))
   :  :     :        :  :  +- FileScan csv [user_id#83,lang_id#84] Batched: false, DataFilters: [isnotnull(user_id#83), isnotnull(lang_id#84)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [IsNotNull(user_id), IsNotNull(lang_id)], ReadSchema: struct<user_id:string,lang_id:string>
   :  :     :        :  +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=900]
   :  :     :        :     +- LocalTableScan [id#379]
   :  :     :        +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=903]
   :  :     :           +- LocalTableScan [id#408]
   :  :     +- BroadcastHashJoin [id#397], [user_id#400], Inner, BuildRight, false
   :  :        :- BroadcastHashJoin [id#397], [id#399], LeftSemi, BuildRight, false
   :  :        :  :- Filter isnotnull(id#397)
   :  :        :  :  +- FileScan csv [id#397,name#398] Batched: false, DataFilters: [isnotnull(id#397)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct<id:string,name:string>
   :  :        :  +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=910]
   :  :        :     +- LocalTableScan [id#399]
   :  :        +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, false]),false), [plan_id=918]
   :  :           +- BroadcastHashJoin [lang_id#401], [id#408], LeftAnti, BuildRight, false
   :  :              :- BroadcastHashJoin [user_id#400], [id#388], LeftSemi, BuildRight, false
   :  :              :  :- Filter (isnotnull(user_id#400) AND isnotnull(lang_id#401))
   :  :              :  :  +- FileScan csv [user_id#400,lang_id#401] Batched: false, DataFilters: [isnotnull(user_id#400), isnotnull(lang_id#401)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [IsNotNull(user_id), IsNotNull(lang_id)], ReadSchema: struct<user_id:string,lang_id:string>
   :  :              :  +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=912]
   :  :              :     +- LocalTableScan [id#388]
   :  :              +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=915]
   :  :                 +- LocalTableScan [id#408]
   :  +- Exchange hashpartitioning(id#50, 200), ENSURE_REQUIREMENTS, [plan_id=926]
   :     +- BroadcastHashJoin [id#50], [id#434], LeftAnti, BuildRight, false
   :        :- Filter isnotnull(id#50)
   :        :  +- FileScan csv [id#50,name#51] Batched: false, DataFilters: [isnotnull(id#50)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/lan..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct<id:string,name:string>
   :        +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=921]
   :           +- LocalTableScan [id#434]
   +- BroadcastHashJoin [lang_id#464], [id#473], Inner, BuildRight, false
      :- Union
      :  :- ShuffledHashJoin [id#460], [user_id#463], Inner, BuildRight
      :  :  :- Exchange hashpartitioning(id#460, 200), ENSURE_REQUIREMENTS, [plan_id=937]
      :  :  :  +- BroadcastHashJoin [id#460], [id#462], LeftAnti, BuildRight, false
      :  :  :     :- Filter isnotnull(id#460)
      :  :  :     :  +- FileScan csv [id#460,name#461] Batched: false, DataFilters: [isnotnull(id#460)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct<id:string,name:string>
      :  :  :     +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=928]
      :  :  :        +- LocalTableScan [id#462]
      :  :  +- Exchange hashpartitioning(user_id#463, 200), ENSURE_REQUIREMENTS, [plan_id=938]
      :  :     +- BroadcastHashJoin [lang_id#464], [id#472], LeftSemi, BuildRight, false
      :  :        :- BroadcastHashJoin [user_id#463], [id#465], LeftAnti, BuildRight, false
      :  :        :  :- Filter (isnotnull(lang_id#464) AND isnotnull(user_id#463))
      :  :        :  :  +- FileScan csv [user_id#463,lang_id#464] Batched: false, DataFilters: [isnotnull(lang_id#464), isnotnull(user_id#463)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [IsNotNull(lang_id), IsNotNull(user_id)], ReadSchema: struct<user_id:string,lang_id:string>
      :  :        :  +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=930]
      :  :        :     +- LocalTableScan [id#465]
      :  :        +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=933]
      :  :           +- LocalTableScan [id#472]
      :  +- BroadcastHashJoin [id#466], [user_id#469], Inner, BuildRight, false
      :     :- BroadcastHashJoin [id#466], [id#468], LeftSemi, BuildRight, false
      :     :  :- Filter isnotnull(id#466)
      :     :  :  +- FileScan csv [id#466,name#467] Batched: false, DataFilters: [isnotnull(id#466)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct<id:string,name:string>
      :     :  +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=940]
      :     :     +- LocalTableScan [id#468]
      :     +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, false]),false), [plan_id=948]
      :        +- BroadcastHashJoin [lang_id#470], [id#472], LeftSemi, BuildRight, false
      :           :- BroadcastHashJoin [user_id#469], [id#471], LeftSemi, BuildRight, false
      :           :  :- Filter (isnotnull(user_id#469) AND isnotnull(lang_id#470))
      :           :  :  +- FileScan csv [user_id#469,lang_id#470] Batched: false, DataFilters: [isnotnull(user_id#469), isnotnull(lang_id#470)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/use..., PartitionFilters: [], PushedFilters: [IsNotNull(user_id), IsNotNull(lang_id)], ReadSchema: struct<user_id:string,lang_id:string>
      :           :  +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=942]
      :           :     +- LocalTableScan [id#471]
      :           +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=945]
      :              +- LocalTableScan [id#472]
      +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, false]),false), [plan_id=954]
         +- BroadcastHashJoin [id#473], [id#447], LeftSemi, BuildRight, false
            :- Filter isnotnull(id#473)
            :  +- FileScan csv [id#473,name#474] Batched: false, DataFilters: [isnotnull(id#473)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/****/spark_sample/src/main/resources/lan..., PartitionFilters: [], PushedFilters: [IsNotNull(id)], ReadSchema: struct<id:string,name:string>
            +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=951]
               +- LocalTableScan [id#447]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant