# Nebula 


https://github.com/vesoft-inc/nebula-algorithm (parou no spark 2.4)

## Spark

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.config(
    "spark.jars", "/home/jovyan/nebula-spark-connector_3.0-3.6.0.jar"
).config("spark.driver.extraClassPath","/home/jovyan/nebula-spark-connector_3.0-3.6.0.jar"
        ).config("spark.executor.extraClassPath","/home/jovyan/nebula-spark-connector_3.0-3.6.0.jar"
        ).appName(
    "nebula-connector"
).getOrCreate()


In [14]:
df = spark.read.format(
  "com.vesoft.nebula.connector.NebulaDataSource").option(
    "type", "vertex").option(
    "spaceName", "exemplo_grafo").option(
    "label", "pessoa").option(
    "returnCols", "nome,idade,profissao").option("metaAddress", "metad0:9559,metad1:9559,metad2:9559") \
    .option("graphAddress", "graphd:9669,graphd1:9669,graphd2:9669") \
    .option("operateType", "read").option("partitionNumber", 1).load()

In [15]:
df.show()

+---------+------------+-----+----------+
|_vertexId|        nome|idade| profissao|
+---------+------------+-----+----------+
|     p004|   Ana Costa|   31|  Analista|
|     p001|  João Silva|   35|Engenheiro|
|     p002|Maria Santos|   29|  Designer|
|     p003| Carlos Lima|   42|   Gerente|
+---------+------------+-----+----------+



In [16]:
# Criando um DataFrame com dados de jogadores de basquete
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

schema = StructType([
    StructField("_vertexId", StringType(), True),
    StructField("nome", StringType(), True),
    StructField("idade", IntegerType(), True),
    StructField("profissao", StringType(), True)
])

players_data = [
    ("p005", "Lucas Ponce", 35, "Estudante"),
]

df_players = spark.createDataFrame(players_data, schema)

df_players.show()

+---------+-----------+-----+---------+
|_vertexId|       nome|idade|profissao|
+---------+-----------+-----+---------+
|     p005|Lucas Ponce|   35|Estudante|
+---------+-----------+-----+---------+



In [18]:
# Inserindo vértices no NebulaGraph
df_players.write.format("com.vesoft.nebula.connector.NebulaDataSource").mode("append")\
.option(
    "type", "vertex"
).option(
    "operateType", "write"
).option(
    "spaceName", "exemplo_grafo"
).option(
    "label", "pessoa"
).option(
    "vidPolicy", ""
).option(
    "vertexField", "_vertexId"
).option(
    "batch", 100
).option(
    "metaAddress", "metad0:9559,metad1:9559,metad2:9559"
).option(
    "graphAddress", "graphd:9669,graphd1:9669,graphd2:9669"
).option(
    "user", "root"
).option(
    "passwd", ""
).option(
    "writeMode", "insert"
).save()

In [19]:
spark.read.format(
  "com.vesoft.nebula.connector.NebulaDataSource").option(
    "type", "vertex").option(
    "spaceName", "exemplo_grafo").option(
    "label", "pessoa").option(
    "returnCols", "nome,idade,profissao").option("metaAddress", "metad0:9559,metad1:9559,metad2:9559") \
    .option("graphAddress", "graphd:9669,graphd1:9669,graphd2:9669") \
    .option("operateType", "read").option("partitionNumber", 1).load().show()

+---------+------------+-----+----------+
|_vertexId|        nome|idade| profissao|
+---------+------------+-----+----------+
|     p004|   Ana Costa|   31|  Analista|
|     p001|  João Silva|   35|Engenheiro|
|     p002|Maria Santos|   29|  Designer|
|     p005| Lucas Ponce|   35| Estudante|
|     p003| Carlos Lima|   42|   Gerente|
+---------+------------+-----+----------+



In [45]:
# Criando DataFrame com relacionamentos (ex: equipes que jogaram juntos)
from pyspark.sql.types import StructType, StructField, StringType,DateType
import pyspark.sql.functions as F

edge_schema = StructType([
    StructField("srcid", StringType(), True),
    StructField("dstid", StringType(), True),
    StructField("desde", StringType(), True),
    StructField("proximidade", StringType(), True)
])

relationships_data = [
    ("p005", "p003",  "2018-05-20", "amigo_proximo"),
    ("p002", "p005",  "2018-05-20", "amigo_proximo"),
]

df_relationships = spark.createDataFrame(relationships_data, edge_schema)
df_relationships = df_relationships.withColumn("desde", F.from_unixtime(F.unix_timestamp('desde', 'yyyy-MM-dd')).cast("date"))

df_relationships.show()

+-----+-----+----------+-------------+
|srcid|dstid|     desde|  proximidade|
+-----+-----+----------+-------------+
| p005| p003|2018-05-20|amigo_proximo|
| p002| p005|2018-05-20|amigo_proximo|
+-----+-----+----------+-------------+



In [46]:
df_relationships.write.format("com.vesoft.nebula.connector.NebulaDataSource")\
    .mode("overwrite")\
    .option("operateType", "write")\
    .option("srcPolicy", "")\
    .option("dstPolicy", "")\
   .option(
        "metaAddress", "metad0:9559,metad1:9559,metad2:9559"
    ).option(
        "graphAddress", "graphd:9669,graphd1:9669,graphd2:9669"
    )\
    .option("user", "root")\
    .option("passwd", "")\
    .option("type", "edge")\
    .option("spaceName", "exemplo_grafo")\
    .option("label", "conhece")\
    .option("srcVertexField", "srcid")\
    .option("dstVertexField", "dstid")\
    .option("batch", 100)\
    .option("writeMode", "insert")\
    .option("rankField", "")\
    .save()

In [47]:
df_conhece = spark.read.format("com.vesoft.nebula.connector.NebulaDataSource") \
    .option("type", "edge") \
    .option("spaceName", "exemplo_grafo") \
    .option("label", "conhece") \
    .option("returnCols", "desde,proximidade") \
    .option("metaAddress", "metad0:9559,metad1:9559,metad2:9559") \
    .option("graphAddress", "graphd:9669,graphd1:9669,graphd2:9669") \
    .option("user", "root") \
    .option("passwd", "nebula") \
    .option("operateType", "read") \
    .option("rankField", "") \
    .option("partitionNumber", 1) \
    .load()

# Visualizar os dados
df_conhece.show()

+------+------+-----+-----------+-------------+
|_srcId|_dstId|_rank|      desde|  proximidade|
+------+------+-----+-----------+-------------+
|  p001|  p002|    0| 2018-05-20|amigo_proximo|
|  p001|  p004|    0| 2022-08-20|       colega|
|  p002|  p003|    0| 2019-12-10|    conhecido|
|  p002|  p005|    0|17671-01-01|amigo_proximo|
|  p005|  p003|    0|17671-01-01|amigo_proximo|
|  p003|  p004|    0| 2022-09-01|       colega|
+------+------+-----+-----------+-------------+



## Python

In [8]:
! pip install nebula3-python

Collecting nebula3-python
  Downloading nebula3_python-3.8.3-py3-none-any.whl (331 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m331.3/331.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting httpx[http2]>=0.22.0
  Downloading httpx-0.28.1-py3-none-any.whl (73 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.5/73.5 kB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httplib2>=0.20.0
  Downloading httplib2-0.30.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.1/91.1 kB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting future>=0.18.0
  Downloading future-1.0.0-py3-none-any.whl (491 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.3/491.3 kB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.*
  Downloading httpcore-1.0.9-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 

In [11]:
from nebula3.gclient.net import ConnectionPool
from nebula3.Config import Config

# Configuração da conexão
config = Config()
config.max_connection_pool_size = 10

# Inicializar pool de conexões
connection_pool = ConnectionPool()
ok = connection_pool.init([('graphd', 9669)], config)

if not ok:
    print("Erro ao conectar com NebulaGraph")
    exit(1)

In [13]:
with connection_pool.session_context('root', '') as session:
    # Listar todos os espaços
    result = session.execute('SHOW SPACES')
    print("Espaços disponíveis:")
    for row in result:
        print(f"- {row.values}")


Espaços disponíveis:
- <bound method Record.values of "exemplo_grafo">
