# 1. Librerías & Set Up

In [1]:
!pip install pyspark

Collecting pyspark
  Downloading pyspark-3.5.1.tar.gz (317.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.0/317.0 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.1-py2.py3-none-any.whl size=317488491 sha256=9865a10fb1cce82e868f8feebdeda84793d1f8cc2f07a4bb03c94b10d1c51a6e
  Stored in directory: /root/.cache/pip/wheels/80/1d/60/2c256ed38dddce2fdd93be545214a63e02fbd8d74fb0b7f3a6
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.1


In [2]:
from pyspark.sql import SparkSession
from itertools import product

In [3]:
#import neo4j [TO-EDIT]
#from neo4j import GraphDatabase
#import pandas as pd

In [4]:
spark = SparkSession.builder \
    .getOrCreate()

sc = spark.sparkContext
sc # Elemento que ejecuta toda instrucción.

# 2. Neo4j Graph

In [None]:
# TODO

In [5]:
graph = [(1,11,2),(1,11,3),(2,11,3),(3,11,2),(3,11,4),(4,11,1),(4,11,2),(4,11,3),(4,12,5),(5,12,1),(5,12,2),(5,12,6)]

# 3. MapReduce Algorithm for Triangles

In [30]:
rdd_graph = sc.parallelize(graph)
rdd_graph.collect()

[(1, 11, 2),
 (1, 11, 3),
 (2, 11, 3),
 (3, 11, 2),
 (3, 11, 4),
 (4, 11, 1),
 (4, 11, 2),
 (4, 11, 3),
 (4, 12, 5),
 (5, 12, 1),
 (5, 12, 2),
 (5, 12, 6)]

In [29]:
def hash(number):
  """
  Returns number mod 2. The ouput will be 0 or 1.
  """
  return number % 2

In [44]:
B = 2 # Dimension de elementos del conjunto de imagenes de la funcion de hash
L = 3 # Dimension del subgrafo (triangulo)
BSET = list(range(B))
REDUCERS = list(product(BSET, repeat=L))

def get_keys(edge, b_dim, b_set, pattern_dim):
  """
  Funcion que retorna las llaves correspondientes para un vertice.
  - b_dim = dimension del conjunto de imagenes de la funcion de hash (|b|)
  - b_set = conjunto de imagenes de la funcion de hash (b)
  - pattern_dim = dimension del patron de vertices entregado (l)
  - hash_n1 = valor de hash para el nodo 1 = b1
  - hash_n2 = valor de hash para el nodo 2 = b2

  Idea general: buscamos el par hash_n1,hash_n2 dentro de las posibles
  combinaciones dentro del espacio de imagenes de la funcion de hash.
  Dentro del for, obtenemos un string con la codificacion de las llaves y luego
  verificamos si es una llave candidata para el vertice entregado:
  
  1.  sequence_in_reducer: Si la secuencia 'b1b2' esta en la llave del reducer
      codificada como hash(n1)hash(n2) ?, donde ? = 0 o 1, entonces se considerará
      el par reducer_key : edge.
  2.  edge_case: El otro caso, es para cuando tenemos por ejemplo x = n2 y z = n2
      para patrones de 3 vertices.
  """

  hash_n1 = hash(edge[0])
  hash_n2 = hash(edge[2])
  values = [] # posible keys
  sequence = '{}{}'.format(hash_n1, hash_n2)

  for i in range(0, b_dim ** pattern_dim):
    reducer = ''.join(str(num) for num in b_set[i])
    sequence_in_reducer = sequence in reducer
    edge_case = reducer[0] == sequence[1] and reducer[pattern_dim - 1] == sequence[0]
    if sequence_in_reducer or edge_case:
        reducer_key = tuple(int(digit) for digit in reducer)
        values.append((reducer_key, edge))

  return values


def map_phase(rdd, dim):
  """
  input: RDD del grafo de dimension 'dim'
  ouput: Mapeo de cada arista con respecto a las llaves
  """
  mapped_keys = rdd.flatMap(lambda edge: get_keys(edge, dim))
  reducers = mapped_keys.groupByKey().mapValues(list)
  return reducers

def reduce_phase(rdd, dim):
  # TODO
  pass

reducers = map_phase(rdd_graph, 3)
reducers.collect()




[((0, 0, 1),
  [(1, 11, 2),
   (2, 11, 3),
   (3, 11, 2),
   (3, 11, 4),
   (4, 11, 1),
   (4, 11, 2),
   (4, 11, 3),
   (4, 12, 5),
   (5, 12, 2),
   (5, 12, 6)]),
 ((0, 1, 0),
  [(1, 11, 2),
   (2, 11, 3),
   (3, 11, 2),
   (3, 11, 4),
   (4, 11, 1),
   (4, 11, 2),
   (4, 11, 3),
   (4, 12, 5),
   (5, 12, 2),
   (5, 12, 6)]),
 ((1, 0, 0),
  [(1, 11, 2),
   (2, 11, 3),
   (3, 11, 2),
   (3, 11, 4),
   (4, 11, 1),
   (4, 11, 2),
   (4, 11, 3),
   (4, 12, 5),
   (5, 12, 2),
   (5, 12, 6)]),
 ((1, 1, 1), [(1, 11, 3), (5, 12, 1)]),
 ((0, 1, 1),
  [(1, 11, 2),
   (1, 11, 3),
   (2, 11, 3),
   (3, 11, 2),
   (3, 11, 4),
   (4, 11, 1),
   (4, 11, 3),
   (4, 12, 5),
   (5, 12, 1),
   (5, 12, 2),
   (5, 12, 6)]),
 ((1, 0, 1),
  [(1, 11, 2),
   (1, 11, 3),
   (2, 11, 3),
   (3, 11, 2),
   (3, 11, 4),
   (4, 11, 1),
   (4, 11, 3),
   (4, 12, 5),
   (5, 12, 1),
   (5, 12, 2),
   (5, 12, 6)]),
 ((1, 1, 0),
  [(1, 11, 2),
   (1, 11, 3),
   (2, 11, 3),
   (3, 11, 2),
   (3, 11, 4),
   (4, 11, 1),
  

# 4. MapReduce Algorithm for Squares
