from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("SHCWriteExample").getOrCreate()

# Load the CSV from Hadoop
df = spark.read.csv("hdfs:///path/to/data.csv", header=True, inferSchema=True)

# Define a catalog to map the DataFrame columns to HBase columns
catalog = "".join("""
{
    "table":{"namespace":"default", "name":"Lyrics"},
    "rowkey":"id",
    "columns":{
        "id":{"cf":"rowkey", "col":"id", "type":"string"},
        "title":{"cf":"info", "col":"title", "type":"string"},
        "lyric":{"cf":"info", "col":"lyric", "type":"string"}
    }
}
""")

# Write the DataFrame to HBase
df.write.options(catalog=catalog).format("org.apache.spark.sql.execution.datasources.hbase").save()


In [1]:
print(sc.version)

3.2.4


In [2]:
# Check the master URL of the SparkContext
print(sc.master)

# Read a CSV file into a DataFrame
#df = spark.read.option("header", "true").option("inferSchema", "true").csv("hdfs:///CA1/lyrics-data.csv")
#df = spark.read.csv("hdfs:///CA1/lyrics-data.csv", header = True)
#df = spark.read.csv("hdfs:///CA1/artists-data.csv", header = True)

df = spark.read.csv("hdfs:///CA1/lyrics-dataQUO20tab.csv", sep='\t', header=True, multiLine = True, escape = "\n")

#df = spark.read.csv("hdfs:///CA1/azlyrics-scraper/azlyrics_lyrics_19.csv", header = True)
df.show(20)

local[*]
+---------------+--------------------+--------------------+--------------------+--------+
|          ALink|               SName|               SLink|               Lyric|language|
+---------------+--------------------+--------------------+--------------------+--------+
|/ivete-sangalo/|               Arerê|/ivete-sangalo/ar...|Tudo o que eu que...|      pt|
|/ivete-sangalo/|Se Eu Não Te Amas...|/ivete-sangalo/se...|Meu coração\nSem ...|      pt|
|/ivete-sangalo/|         Céu da Boca|/ivete-sangalo/ch...|É de babaixá!\nÉ ...|      pt|
|/ivete-sangalo/|Quando A Chuva Pa...|/ivete-sangalo/qu...|Quando a chuva pa...|      pt|
|/ivete-sangalo/|        Sorte Grande|/ivete-sangalo/so...|A minha sorte gra...|      pt|
|/ivete-sangalo/|    A Lua Q Eu T Dei|/ivete-sangalo/a-...|Posso te falar do...|      pt|
|/ivete-sangalo/|Mulheres Não Têm ...|/ivete-sangalo/mu...|Hey, girl\nLevant...|      pt|
|/ivete-sangalo/|Eva / Alô Paixão ...|/ivete-sangalo/ev...|'EVA'\n(Giancarlo...|      pt|
|

In [3]:
# Add a unique ID to the DataFrame as rowkey
df = df.withColumn("id", monotonically_increasing_id())

In [9]:
df.show(20)

+---------------+--------------------+--------------------+--------------------+--------+---+
|          ALink|               SName|               SLink|               Lyric|language| id|
+---------------+--------------------+--------------------+--------------------+--------+---+
|/ivete-sangalo/|               Arerê|/ivete-sangalo/ar...|Tudo o que eu que...|      pt|  0|
|/ivete-sangalo/|Se Eu Não Te Amas...|/ivete-sangalo/se...|Meu coração\nSem ...|      pt|  1|
|/ivete-sangalo/|         Céu da Boca|/ivete-sangalo/ch...|É de babaixá!\nÉ ...|      pt|  2|
|/ivete-sangalo/|Quando A Chuva Pa...|/ivete-sangalo/qu...|Quando a chuva pa...|      pt|  3|
|/ivete-sangalo/|        Sorte Grande|/ivete-sangalo/so...|A minha sorte gra...|      pt|  4|
|/ivete-sangalo/|    A Lua Q Eu T Dei|/ivete-sangalo/a-...|Posso te falar do...|      pt|  5|
|/ivete-sangalo/|Mulheres Não Têm ...|/ivete-sangalo/mu...|Hey, girl\nLevant...|      pt|  6|
|/ivete-sangalo/|Eva / Alô Paixão ...|/ivete-sangalo/ev...|'

In [14]:
from pyspark.sql.functions import regexp_replace

df = df.withColumn("Lyric", regexp_replace(df["Lyric"], "\n", " "))
df.show(20)

+---------------+--------------------+--------------------+--------------------+--------+---+
|          ALink|               SName|               SLink|               Lyric|language| id|
+---------------+--------------------+--------------------+--------------------+--------+---+
|/ivete-sangalo/|               Arerê|/ivete-sangalo/ar...|Tudo o que eu que...|      pt|  0|
|/ivete-sangalo/|Se Eu Não Te Amas...|/ivete-sangalo/se...|Meu coração Sem d...|      pt|  1|
|/ivete-sangalo/|         Céu da Boca|/ivete-sangalo/ch...|É de babaixá! É d...|      pt|  2|
|/ivete-sangalo/|Quando A Chuva Pa...|/ivete-sangalo/qu...|Quando a chuva pa...|      pt|  3|
|/ivete-sangalo/|        Sorte Grande|/ivete-sangalo/so...|A minha sorte gra...|      pt|  4|
|/ivete-sangalo/|    A Lua Q Eu T Dei|/ivete-sangalo/a-...|Posso te falar do...|      pt|  5|
|/ivete-sangalo/|Mulheres Não Têm ...|/ivete-sangalo/mu...|Hey, girl Levanta...|      pt|  6|
|/ivete-sangalo/|Eva / Alô Paixão ...|/ivete-sangalo/ev...|'

In [15]:
# Write the limited DataFrame to a new CSV file with UTF-8 encoding
df.write.option("header", "true") \
        .option("sep", "\t") \
        .option("encoding", "UTF-8") \
        .csv("hdfs:///CA1/lyrics-dataSPACESutf.csv")

[Stage 11:>                                                         (0 + 1) / 1]                                                                                

In [13]:
# Write the limited DataFrame to a new CSV file
df.write.option("header", "true").option("sep", "\t").csv("hdfs:///CA1/lyrics-dataSPACES.csv")

In [7]:
spark = SparkSession.builder \
    .appName("HBase Write") \
    .config("spark.jars", "/usr/local/spark/jars/shc-core-1.1.0.3.1.7.5000-4.jar") \
    .getOrCreate()

In [8]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import monotonically_increasing_id


df = spark.read.csv("hdfs:///CA1/lyrics-dataQUO20tab.csv", sep='\t', header=True, multiLine = True, escape = "\n")

# Add a unique ID to the DataFrame as rowkey
df = df.withColumn("id", monotonically_increasing_id())

# Define the HBase catalog
catalog = "".join("""
{
    "table":{"namespace":"default", "name":"lyricsTEST"},
    "rowkey":"id",
    "columns":{
        "id":{"cf":"rowkey", "col":"id", "type":"long"},
        "ALink":{"cf":"cf", "col":"ALink", "type":"string"},
        "SName":{"cf":"cf", "col":"SName", "type":"string"},
        "SLink":{"cf":"cf", "col":"SLink", "type":"string"},
        "Lyric":{"cf":"cf", "col":"Lyric", "type":"string"},
        "language":{"cf":"cf", "col":"language", "type":"string"}
    }
}
""")

# Write DataFrame to HBase
df.write.options(catalog=catalog).format("org.apache.spark.sql.execution.datasources.hbase").save()


Py4JJavaError: An error occurred while calling o82.save.
: java.lang.ClassNotFoundException: 
Failed to find data source: org.apache.spark.sql.execution.datasources.hbase. Please find packages at
http://spark.apache.org/third-party-projects.html
       
	at org.apache.spark.sql.errors.QueryExecutionErrors$.failedToFindDataSourceError(QueryExecutionErrors.scala:443)
	at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:670)
	at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSourceV2(DataSource.scala:720)
	at org.apache.spark.sql.DataFrameWriter.lookupV2Provider(DataFrameWriter.scala:852)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:256)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:247)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.execution.datasources.hbase.DefaultSource
	at java.net.URLClassLoader.findClass(URLClassLoader.java:387)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
	at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$5(DataSource.scala:656)
	at scala.util.Try$.apply(Try.scala:213)
	at org.apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$4(DataSource.scala:656)
	at scala.util.Failure.orElse(Try.scala:224)
	at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:656)
	... 16 more


In [16]:
#pip install happybase


Defaulting to user installation because normal site-packages is not writeable
Collecting happybase
  Downloading happybase-1.2.0.tar.gz (40 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 KB[0m [31m868.1 kB/s[0m eta [36m0:00:00[0mMB/s[0m eta [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting thriftpy2>=0.4
  Downloading thriftpy2-0.4.17.tar.gz (519 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.4/519.4 KB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m[31m5.6 MB/s[0m eta [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[0m[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: happybase, thriftpy2
  Building wheel for happybase (setup.py) ... [?25ldone
[?25h  Created wheel for happybase: filename=ha

In [18]:
import happybase

# Connect to HBase
connection = happybase.Connection('localhost')  # replace 'localhost' with your HBase host
table = connection.table('lyricsTEST')

# Fetch a row by its row key
row = table.row(b'0')
print(row)

# Scan all rows in the table
for key, data in table.scan():
    print(key, data)


{b'cf:ALink': b'/ivete-sangalo/', b'cf:Lyric': b'Tudo o que eu quero nessa vida, Toda vida, \xc3\xa9 \xc3\x89 amar voc\xc3\xaa Amar voc\xc3\xaa O seu amor \xc3\xa9 como uma chama acesa Queima de prazer De prazer Eu j\xc3\xa1 falei com Deus que n\xc3\xa3o vou te deixar Vou te levar pra onde for Qualquer lugar J\xc3\xa1 fiz de tudo pra n\xc3\xa3o te perder Arer\xc3\xaa, Um lobby, um hobby, um love com voc\xc3\xaa Arer\xc3\xaa, Um lobby, um hobby, um love com voc\xc3\xaa Cai, cai, cai, cai, cai pra c\xc3\xa1 Hey, hey, hey Tu-do,tu-do, vai rolar', b'cf:SLink': b'/ivete-sangalo/arere.html', b'cf:SName': b'Arer\xc3\xaa', b'cf:language': b'pt'}
b'0' {b'cf:ALink': b'/ivete-sangalo/', b'cf:Lyric': b'Tudo o que eu quero nessa vida, Toda vida, \xc3\xa9 \xc3\x89 amar voc\xc3\xaa Amar voc\xc3\xaa O seu amor \xc3\xa9 como uma chama acesa Queima de prazer De prazer Eu j\xc3\xa1 falei com Deus que n\xc3\xa3o vou te deixar Vou te levar pra onde for Qualquer lugar J\xc3\xa1 fiz de tudo pra n\xc3\xa3o te

In [19]:
import happybase

# Connect to HBase
connection = happybase.Connection('localhost')  # replace 'localhost' with your HBase host
table = connection.table('lyricsTEST')

# Fetch a row by its row key
row = table.row(b'0')
for column, value in row.items():
    print(column.decode('utf-8'), ":", value.decode('utf-8'))

print("\nScanning all rows:")
# Scan all rows in the table
for key, data in table.scan():
    decoded_key = key.decode('utf-8')
    decoded_data = {k.decode('utf-8'): v.decode('utf-8') for k, v in data.items()}
    print(decoded_key, decoded_data)


cf:ALink : /ivete-sangalo/
cf:Lyric : Tudo o que eu quero nessa vida, Toda vida, é É amar você Amar você O seu amor é como uma chama acesa Queima de prazer De prazer Eu já falei com Deus que não vou te deixar Vou te levar pra onde for Qualquer lugar Já fiz de tudo pra não te perder Arerê, Um lobby, um hobby, um love com você Arerê, Um lobby, um hobby, um love com você Cai, cai, cai, cai, cai pra cá Hey, hey, hey Tu-do,tu-do, vai rolar
cf:SLink : /ivete-sangalo/arere.html
cf:SName : Arerê
cf:language : pt

Scanning all rows:
0 {'cf:ALink': '/ivete-sangalo/', 'cf:Lyric': 'Tudo o que eu quero nessa vida, Toda vida, é É amar você Amar você O seu amor é como uma chama acesa Queima de prazer De prazer Eu já falei com Deus que não vou te deixar Vou te levar pra onde for Qualquer lugar Já fiz de tudo pra não te perder Arerê, Um lobby, um hobby, um love com você Arerê, Um lobby, um hobby, um love com você Cai, cai, cai, cai, cai pra cá Hey, hey, hey Tu-do,tu-do, vai rolar', 'cf:SLink': '/ivete-

In [5]:
import happybase
import pandas as pd

# Connect to HBase
connection = happybase.Connection('localhost')
table = connection.table('lyricsTEST')

# Extract all rows from HBase into a list
rows = [data for _, data in table.scan()]

# Construct DataFrame
df = pd.DataFrame(rows)

# Convert bytes columns to string
for column in df.columns:
    df[column] = df[column].str.decode('utf-8')

df.head(50)


Unnamed: 0,b'cf:ALink',b'cf:Lyric',b'cf:SLink',b'cf:SName',b'cf:language'
0,/ivete-sangalo/,"Tudo o que eu quero nessa vida, Toda vida, é É...",/ivete-sangalo/arere.html,Arerê,pt
1,/ivete-sangalo/,"Cheiro de pneu queimado, carburador furado, co...",/ivete-sangalo/carro-velho.html,Carro Velho,pt
2,/ivete-sangalo/,"Odô, axé odô, axé odô, axé odô Odô, axé odô, a...",/ivete-sangalo/muito-obrigado-axe.html,Muito Obrigado Axé,pt
3,/ivete-sangalo/,Não precisa mudar Vou me adaptar ao seu jeito ...,/ivete-sangalo/nao-precisa-mudar.html,Não Precisa Mudar,pt
4,/ivete-sangalo/,Toda vez que eu pintar em seu pensamento Se le...,/ivete-sangalo/nada-vai-nos-separar.html,Nada Vai Nos Separar,pt
5,/ivete-sangalo/,É amor É tanto amor que eu sinto esse momento ...,/ivete-sangalo/tempo-de-alegria.html,Tempo de Alegria,pt
6,/ivete-sangalo/,"Duvidava, não entendia Quando alguém me falou ...",/ivete-sangalo/agora-ja-sei.html,Agora Já Sei,pt
7,/ivete-sangalo/,Eu me lembro sempre onde quer que eu vá Só um ...,/ivete-sangalo/deixo.html,Deixo,pt
8,/ivete-sangalo/,Ivete Sangalooooo! Então não me conte seus pr...,/ivete-sangalo/nao-me-conte-seus-problemas.html,Não Me Conte Seus Problemas,pt
9,/ivete-sangalo/,"Moro... Num país tropical, Abençoado por Deus ...",/ivete-sangalo/pais-tropical-arere-taj-mahal.html,País Tropical / Arerê / Taj Mahal,pt


2023-10-31 05:01:57,331 WARN spark.HeartbeatReceiver: Removing executor driver with no recent heartbeats: 6249934 ms exceeds timeout 120000 ms
2023-10-31 08:28:15,404 WARN executor.Executor: Issue communicating with driver in heartbeater
org.apache.spark.rpc.RpcTimeoutException: Futures timed out after [10000 milliseconds]. This timeout is controlled by spark.executor.heartbeatInterval
	at org.apache.spark.rpc.RpcTimeout.org$apache$spark$rpc$RpcTimeout$$createRpcTimeoutException(RpcTimeout.scala:47)
	at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:62)
	at org.apache.spark.rpc.RpcTimeout$$anonfun$addMessageIfTimeout$1.applyOrElse(RpcTimeout.scala:58)
	at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:76)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:103)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:

2023-10-31 08:28:16,504 WARN executor.Executor: Issue communicating with driver in heartbeater
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:103)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:87)
	at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:79)
	at org.apache.spark.storage.BlockManager.reregister(BlockManager.scala:636)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:1009)
	at org.apache.spark.executor.Executor.$anonfun$heartbeater$1(Executor.scala:212)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2048)
	at org.apache.spark.Heartbeater$$anon$1.run(Heartbeater.scala:46)


2023-10-31 08:28:16,627 WARN executor.Executor: Issue communicating with driver in heartbeater
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:103)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:87)
	at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:79)
	at org.apache.spark.storage.BlockManager.reregister(BlockManager.scala:636)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:1009)
	at org.apache.spark.executor.Executor.$anonfun$heartbeater$1(Executor.scala:212)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2048)
	at org.apache.spark.Heartbeater$$anon$1.run(Heartbeater.scala:46)


2023-10-31 08:28:16,628 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$register(BlockManagerMasterEndpoint.scala:585)
	at org.apache.spark.storage.BlockManagerMasterEndpoint$$anonfun$receiveAndReply$1.applyOrElse(BlockManagerMasterEndpoint.scala:119)
	at org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:103)
	at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbo

2023-10-31 08:28:17,211 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

2023-10-31 08:28:17,681 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

2023-10-31 08:28:17,948 WARN executor.Executor: Issue communicating with driver in heartbeater
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:103)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:87)
	at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:79)
	at org.apache.spark.storage.BlockManager.reregister(BlockManager.scala:636)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:1009)
	at org.apache.spark.executor.Executor.$anonfun$heartbeater$1(Executor.scala:212)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2048)
	at org.apache.spark.Heartbeater$$anon$1.run(Heartbeater.scala:46)


	at org.apache.spark.rpc.netty.NettyRpcEnv.$anonfun$asyncSetupEndpointRefByURI$1(NettyRpcEnv.scala:148)
	at org.apache.spark.rpc.netty.NettyRpcEnv.$anonfun$asyncSetupEndpointRefByURI$1$adapted(NettyRpcEnv.scala:144)
	at scala.concurrent.Future.$anonfun$flatMap$1(Future.scala:307)
	at scala.concurrent.impl.Promise.$anonfun$transformWith$1(Promise.scala:41)
	at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
	at org.apache.spark.util.ThreadUtils$$anon$1.execute(ThreadUtils.scala:99)
	at scala.concurrent.impl.ExecutionContextImpl$$anon$4.execute(ExecutionContextImpl.scala:138)
	at scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:72)
	at scala.concurrent.impl.Promise$DefaultPromise.dispatchOrAddCallback(Promise.scala:316)
	at scala.concurrent.impl.Promise$DefaultPromise.onComplete(Promise.scala:307)
	at scala.concurrent.impl.Promise.transformWith(Promise.scala:40)
	at scala.concurrent.impl.Promise.transformWith$(Promise.scala:38)
	at scala.concurrent.impl.

2023-10-31 08:28:20,819 WARN executor.Executor: Issue communicating with driver in heartbeater
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:103)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:87)
	at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:79)
	at org.apache.spark.storage.BlockManager.reregister(BlockManager.scala:636)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:1009)
	at org.apache.spark.executor.Executor.$anonfun$heartbeater$1(Executor.scala:212)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2048)
	at org.apache.spark.Heartbeater$$anon$1.run(Heartbeater.scala:46)


2023-10-31 08:28:21,096 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

2023-10-31 08:28:21,332 WARN executor.Executor: Issue communicating with driver in heartbeater
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:103)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:87)
	at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:79)
	at org.apache.spark.storage.BlockManager.reregister(BlockManager.scala:636)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:1009)
	at org.apache.spark.executor.Executor.$anonfun$heartbeater$1(Executor.scala:212)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2048)
	at org.apache.spark.Heartbeater$$anon$1.run(Heartbeater.scala:46)


2023-10-31 08:28:21,443 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

2023-10-31 08:28:21,801 WARN executor.Executor: Issue communicating with driver in heartbeater
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:103)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:87)
	at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:79)
	at org.apache.spark.storage.BlockManager.reregister(BlockManager.scala:636)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:1009)
	at org.apache.spark.executor.Executor.$anonfun$heartbeater$1(Executor.scala:212)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2048)
	at org.apache.spark.Heartbeater$$anon$1.run(Heartbeater.scala:46)


2023-10-31 08:28:22,144 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

2023-10-31 08:28:22,435 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

2023-10-31 08:28:22,648 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

2023-10-31 08:28:23,065 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:113)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:112)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$lzycompute$1(BlockManagerMasterEndpoint.scala:548)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.isExecutorAlive$1(BlockManagerMasterEndpoint.scala:547)
	at org

2023-10-31 08:28:23,121 WARN executor.Executor: Issue communicating with driver in heartbeater
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:103)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:87)
	at org.apache.spark.storage.BlockManagerMaster.registerBlockManager(BlockManagerMaster.scala:79)
	at org.apache.spark.storage.BlockManager.reregister(BlockManager.scala:636)
	at org.apache.spark.executor.Executor.reportHeartBeat(Executor.scala:1009)
	at org.apache.spark.executor.Executor.$anonfun$heartbeater$1(Executor.scala:212)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2048)
	at org.apache.spark.Heartbeater$$anon$1.run(Heartbeater.scala:46)


2023-10-31 08:28:23,553 ERROR executor.Executor: Exit as unable to send heartbeats to driver more than 60 times
