# Install lib

In [2]:
!pip install kafka-python
%pip install pyspark

Note: you may need to restart the kernel to use updated packages.


In [3]:
from kafka import KafkaConsumer
import json
import pandas as pd

In [4]:
data = []

In [5]:
# use this function to print the json with the indent=4
def jprint(data):
  print(json.dumps(data,indent=4))

# Kafka Consumer

In [6]:
consumer = KafkaConsumer(
    '2020',
    bootstrap_servers=['localhost:9092'],
    auto_offset_reset='earliest',
    value_deserializer=lambda x: json.loads(x.decode('utf-8')),
)

print("Starting the consumer...")

for message in consumer:
    data.append(message.value)
    print(f"Received: {message.value}")

print("get all data from 2020, done")

Starting the consumer...
Received: {'id': '202000143', 'publicDate': '01/12/2020', 'source': 1, 'coAuthorship': 6, 'citationCount': '7', 'refCount': '27', 'Class': ['MEDI', 'HEAL']}
Received: {'id': '202000188', 'publicDate': '01/12/2020', 'source': 1, 'coAuthorship': 23, 'citationCount': '20', 'refCount': '35', 'Class': ['MEDI']}
Received: {'id': '202000144', 'publicDate': '01/12/2020', 'source': 1, 'coAuthorship': 7, 'citationCount': '11', 'refCount': '32', 'Class': ['ENER', 'ENGI', 'ENVI']}
Received: {'id': '202000186', 'publicDate': '01/12/2020', 'source': 1, 'coAuthorship': 5, 'citationCount': '13', 'refCount': '84', 'Class': ['BUSI', 'COMP', 'SOCI', 'ECON']}
Received: {'id': '202000172', 'publicDate': '01/12/2020', 'source': 1, 'coAuthorship': 8, 'citationCount': '11', 'refCount': '35', 'Class': ['MEDI']}
Received: {'id': '202000175', 'publicDate': '01/12/2020', 'source': 1, 'coAuthorship': 10, 'citationCount': '4', 'refCount': '22', 'Class': ['MEDI', 'IMMU', 'VETE']}
Received: {

KeyboardInterrupt: 

# Spark for processing

In [39]:
jprint(data)

[
    {
        "id": "202000143",
        "publicDate": "01/12/2020",
        "source": 1,
        "coAuthorship": 6,
        "citationCount": "7",
        "refCount": "27",
        "Class": [
            "MEDI",
            "HEAL"
        ]
    },
    {
        "id": "202000188",
        "publicDate": "01/12/2020",
        "source": 1,
        "coAuthorship": 23,
        "citationCount": "20",
        "refCount": "35",
        "Class": [
            "MEDI"
        ]
    },
    {
        "id": "202000144",
        "publicDate": "01/12/2020",
        "source": 1,
        "coAuthorship": 7,
        "citationCount": "11",
        "refCount": "32",
        "Class": [
            "ENER",
            "ENGI",
            "ENVI"
        ]
    },
    {
        "id": "202000186",
        "publicDate": "01/12/2020",
        "source": 1,
        "coAuthorship": 5,
        "citationCount": "13",
        "refCount": "84",
        "Class": [
            "BUSI",
            "COMP",
            "SOCI"

In [50]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

df = spark.createDataFrame(data)
df = df.select("id", "publicDate", "source", "citationCount", "coAuthorship", "refCount", "Class") # reordering
df.show(10)

+---------+----------+------+-------------+------------+--------+--------------------+
|       id|publicDate|source|citationCount|coAuthorship|refCount|               Class|
+---------+----------+------+-------------+------------+--------+--------------------+
|202000143|01/12/2020|     1|            7|           6|      27|        [MEDI, HEAL]|
|202000188|01/12/2020|     1|           20|          23|      35|              [MEDI]|
|202000144|01/12/2020|     1|           11|           7|      32|  [ENER, ENGI, ENVI]|
|202000186|01/12/2020|     1|           13|           5|      84|[BUSI, COMP, SOCI...|
|202000172|01/12/2020|     1|           11|           8|      35|              [MEDI]|
|202000175|01/12/2020|     1|            4|          10|      22|  [MEDI, IMMU, VETE]|
|202000181|01/12/2020|     1|            2|           3|      26|  [VETE, AGRI, BIOC]|
|202000121|01/12/2020|     1|           21|           5|      37|              [MULT]|
|202000119|01/12/2020|     1|            2|

In [51]:
df.count()

301

In [52]:
# cast type accordingly
df = df.withColumn('citationCount', df.citationCount.cast('int'))
df = df.withColumn('coAuthorship', df.coAuthorship.cast('int'))
df = df.withColumn('refCount', df.refCount.cast('int'))

In [53]:
from pyspark.sql.functions import avg, min, max, countDistinct, explode, split, col, round, sum

max_values = df.agg(
    max("citationCount").alias("maxCitation"),
    max("refCount").alias("maxRef"),
    max("coAuthorship").alias("maxCoAuthor")
).collect()[0]

# max value for each feature for normalization
max_citation = max_values["maxCitation"]
max_ref = max_values["maxRef"]
max_coauthor = max_values["maxCoAuthor"]

In [54]:
# find all Class
genre_counts = df.withColumn("Genre", explode(col("Class")))\
                 .groupBy("Genre")\
                 .count()  

print('number of all the class:', genre_counts.count())
genre_counts.show()

number of all the class: 27
+-----+-----+
|Genre|count|
+-----+-----+
| COMP|   19|
| IMMU|   15|
| MATE|   33|
| ARTS|    5|
| HEAL|    2|
| PHYS|   21|
| BIOC|   33|
| NEUR|    5|
| VETE|   10|
| ENGI|   37|
| PHAR|    7|
| MEDI|   85|
| ECON|   10|
| MULT|   48|
| ENVI|   30|
| AGRI|   26|
| ENER|   17|
| NURS|    4|
| CENG|   19|
| SOCI|   19|
+-----+-----+
only showing top 20 rows



In [55]:
# explode the class
exploded_df = df.withColumn("Class", explode(col("Class")))
# exploded_df.show(20)

In [56]:
# drop na for invalid rows
cleaned_df = exploded_df.dropna()
print(cleaned_df.count())
cleaned_df.show(5)

484
+---------+----------+------+-------------+------------+--------+-----+
|       id|publicDate|source|citationCount|coAuthorship|refCount|Class|
+---------+----------+------+-------------+------------+--------+-----+
|202000143|01/12/2020|     1|            7|           6|      27| MEDI|
|202000143|01/12/2020|     1|            7|           6|      27| HEAL|
|202000188|01/12/2020|     1|           20|          23|      35| MEDI|
|202000144|01/12/2020|     1|           11|           7|      32| ENER|
|202000144|01/12/2020|     1|           11|           7|      32| ENGI|
+---------+----------+------+-------------+------------+--------+-----+
only showing top 5 rows



In [57]:
# compute the score for each paper
cleaned_df = cleaned_df.withColumn(
    "Score",
    round(
        col("source") * (
            0.4 * (col("citationCount") / max_citation * 10) +
            0.2 * (col("refCount") / max_ref * 10) +
            0.1 * (col("coAuthorship") / max_coauthor * 10)
        ), 4
    )
)

cleaned_df.show(10)

+---------+----------+------+-------------+------------+--------+-----+------+
|       id|publicDate|source|citationCount|coAuthorship|refCount|Class| Score|
+---------+----------+------+-------------+------------+--------+-----+------+
|202000143|01/12/2020|     1|            7|           6|      27| MEDI|0.1354|
|202000143|01/12/2020|     1|            7|           6|      27| HEAL|0.1354|
|202000188|01/12/2020|     1|           20|          23|      35| MEDI|0.2467|
|202000144|01/12/2020|     1|           11|           7|      32| ENER|0.1764|
|202000144|01/12/2020|     1|           11|           7|      32| ENGI|0.1764|
|202000144|01/12/2020|     1|           11|           7|      32| ENVI|0.1764|
|202000186|01/12/2020|     1|           13|           5|      84| BUSI|0.3634|
|202000186|01/12/2020|     1|           13|           5|      84| COMP|0.3634|
|202000186|01/12/2020|     1|           13|           5|      84| SOCI|0.3634|
|202000186|01/12/2020|     1|           13|         

In [58]:
from pyspark.sql.functions import to_date, year, quarter, col, sum, round, count
from pyspark.sql.window import Window

cleaned_df = cleaned_df.withColumn("publicDate", to_date(col("publicDate"), "dd/MM/yyyy"))
cleaned_df = cleaned_df.withColumn("Year", year(col("publicDate")))

grouped_df = cleaned_df.groupBy("Class", "Year").agg(
    round(sum("Score"), 4).alias("Total Score"),
    count("id").alias("Paper Count")  
)

# Display the result
grouped_df.show()

+-----+----+-----------+-----------+
|Class|Year|Total Score|Paper Count|
+-----+----+-----------+-----------+
| NEUR|2020|      3.303|          5|
| PHYS|2020|    13.2352|         21|
| ENGI|2020|    10.1755|         35|
| MULT|2020|    16.5471|         47|
| ENER|2020|     3.5205|         16|
| MATE|2020|     9.5043|         32|
| HEAL|2020|     0.3753|          2|
| DENT|2020|      0.661|          3|
| CHEM|2020|     6.0035|         24|
| BIOC|2020|     8.1494|         31|
| CENG|2020|     5.0331|         18|
| NURS|2020|      0.924|          4|
| VETE|2020|     1.3842|          7|
| ECON|2020|     2.3307|          7|
| MEDI|2020|    21.9217|         82|
| AGRI|2020|     7.2204|         25|
| PHAR|2020|     4.2157|          7|
| COMP|2020|     4.6723|         19|
| SOCI|2020|     4.4166|         17|
| IMMU|2020|     3.6567|         15|
+-----+----+-----------+-----------+
only showing top 20 rows



In [59]:
# Save the DataFrame to a CSV file
cumulative_df.write.csv(path="output_2020.csv", mode="overwrite", header=True)

spark.stop()

Py4JJavaError: An error occurred while calling o644.csv.
: java.lang.IllegalStateException: Cannot call methods on a stopped SparkContext.
This stopped SparkContext was created at:

org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:75)
java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:53)
java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500)
java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:484)
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
py4j.Gateway.invoke(Gateway.java:238)
py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
py4j.ClientServerConnection.run(ClientServerConnection.java:106)
java.base/java.lang.Thread.run(Thread.java:1623)

The currently active SparkContext was created at:

org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:75)
java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:53)
java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500)
java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:484)
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
py4j.Gateway.invoke(Gateway.java:238)
py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
py4j.ClientServerConnection.run(ClientServerConnection.java:106)
java.base/java.lang.Thread.run(Thread.java:1623)
         
	at org.apache.spark.SparkContext.assertNotStopped(SparkContext.scala:122)
	at org.apache.spark.SparkContext.defaultParallelism(SparkContext.scala:2707)
	at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.$anonfun$apply$1(CoalesceShufflePartitions.scala:61)
	at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.apply(CoalesceShufflePartitions.scala:58)
	at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.apply(CoalesceShufflePartitions.scala:34)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$optimizeQueryStage$2(AdaptiveSparkPlanExec.scala:169)
	at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
	at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
	at scala.collection.immutable.List.foldLeft(List.scala:91)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.optimizeQueryStage(AdaptiveSparkPlanExec.scala:168)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.newQueryStage(AdaptiveSparkPlanExec.scala:588)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:538)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createQueryStages$2(AdaptiveSparkPlanExec.scala:577)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
	at scala.collection.Iterator.foreach(Iterator.scala:943)
	at scala.collection.Iterator.foreach$(Iterator.scala:943)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
	at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
	at scala.collection.TraversableLike.map(TraversableLike.scala:286)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
	at scala.collection.AbstractTraversable.map(Traversable.scala:108)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:577)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:534)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createQueryStages$2(AdaptiveSparkPlanExec.scala:577)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
	at scala.collection.Iterator.foreach(Iterator.scala:943)
	at scala.collection.Iterator.foreach$(Iterator.scala:943)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
	at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
	at scala.collection.TraversableLike.map(TraversableLike.scala:286)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
	at scala.collection.AbstractTraversable.map(Traversable.scala:108)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:577)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createQueryStages$2(AdaptiveSparkPlanExec.scala:577)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
	at scala.collection.Iterator.foreach(Iterator.scala:943)
	at scala.collection.Iterator.foreach$(Iterator.scala:943)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
	at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
	at scala.collection.TraversableLike.map(TraversableLike.scala:286)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
	at scala.collection.AbstractTraversable.map(Traversable.scala:108)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:577)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createQueryStages$2(AdaptiveSparkPlanExec.scala:577)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
	at scala.collection.Iterator.foreach(Iterator.scala:943)
	at scala.collection.Iterator.foreach$(Iterator.scala:943)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
	at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
	at scala.collection.TraversableLike.map(TraversableLike.scala:286)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
	at scala.collection.AbstractTraversable.map(Traversable.scala:108)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:577)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createQueryStages$2(AdaptiveSparkPlanExec.scala:577)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
	at scala.collection.Iterator.foreach(Iterator.scala:943)
	at scala.collection.Iterator.foreach$(Iterator.scala:943)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
	at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
	at scala.collection.TraversableLike.map(TraversableLike.scala:286)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
	at scala.collection.AbstractTraversable.map(Traversable.scala:108)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:577)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$getFinalPhysicalPlan$1(AdaptiveSparkPlanExec.scala:277)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.getFinalPhysicalPlan(AdaptiveSparkPlanExec.scala:272)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.withFinalPlanUpdate(AdaptiveSparkPlanExec.scala:417)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.executeCollect(AdaptiveSparkPlanExec.scala:390)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:859)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:388)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:361)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:240)
	at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:850)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:75)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:52)
	at java.base/java.lang.reflect.Method.invoke(Method.java:578)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:1623)
