In [1]:
# imports
import socket
from pyspark.sql import SparkSession
from pyspark.streaming import StreamingContext

# paramètrage
listen_to_port_caisse_1 = 30011
listen_to_port_caisse_2 = 30021
listen_to_ip_caisse_1   = socket.gethostname()
listen_to_ip_caisse_2   = socket.gethostname()
nb_secondes             = 1
checkpoint_dir          = "./checkpoint/"

print("Going to listen on %s:%s chaque %s seconde"%(listen_to_ip_caisse_1, listen_to_port_caisse_1, nb_secondes))
print("Going to listen on %s:%s chaque %s seconde"%(listen_to_ip_caisse_2, listen_to_port_caisse_2, nb_secondes))

# Initialisation de spatrk
spark       = SparkSession.builder.getOrCreate()
sc          = spark.sparkContext

Going to listen on 337b0ed4fa15:30011 chaque 1 seconde
Going to listen on 337b0ed4fa15:30021 chaque 1 seconde


In [2]:
# création du streaming context
ssc                  = StreamingContext(sc, nb_secondes)
# écoute des deux sockets
dstream_caisse_1     = ssc.socketTextStream(listen_to_ip_caisse_1, listen_to_port_caisse_1)
dstream_caisse_2     = ssc.socketTextStream(listen_to_ip_caisse_2, listen_to_port_caisse_2)
# union des données
dstream = ssc.union(dstream_caisse_1, dstream_caisse_2)

In [3]:
# préparation du DAG 
data            = dstream.map(lambda x: x.split(","))
clients_facture = data.map(lambda x: (x[0], float(x[1])*float(x[2])))
# on veut savoir toutes les 5 secondes, ce qu'ils ont acheté pendant les 10 dernières secondes
limite          = 3000
update_client   = clients_facture.reduceByKeyAndWindow(func           = lambda x,y : x+y, 
                                                       invFunc        = lambda x,y : x+y,
                                                       windowDuration = nb_secondes*10, 
                                                       slideDuration  = nb_secondes*5 )
# on filtre les résultats pour n'avoir que les clients ayant achetés plus de 3000 
client_avec_plus_de_3k_d_achat = update_client.filter(lambda x: x[1]>limite)
client_avec_plus_de_3k_d_achat.pprint()

In [4]:
ssc.checkpoint("./checkpoint_dir")

In [5]:
ssc.start()

-------------------------------------------
Time: 2019-04-27 21:01:41
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:01:46
-------------------------------------------
('client_c', 6476.0)
('client_b', 4743.0)
('client_a', 5739.0)
('client_d', 6322.0)

-------------------------------------------
Time: 2019-04-27 21:01:51
-------------------------------------------
('client_c', 6395.0)
('client_b', 4351.0)
('client_a', 7153.0)
('client_d', 5763.0)

-------------------------------------------
Time: 2019-04-27 21:01:56
-------------------------------------------
('client_a', 3415.0)

-------------------------------------------
Time: 2019-04-27 21:02:01
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:02:06
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:02:11
-------------------------------------------

------

-------------------------------------------
Time: 2019-04-27 21:07:31
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:07:36
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:07:41
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:07:46
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:07:51
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:07:56
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:08:01
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:08:06
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:08:11
----------

-------------------------------------------
Time: 2019-04-27 21:13:31
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:13:36
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:13:41
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:13:46
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:13:51
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:13:56
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:14:01
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:14:06
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:14:11
----------

-------------------------------------------
Time: 2019-04-27 21:19:31
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:19:36
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:19:41
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:19:46
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:19:51
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:19:56
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:20:01
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:20:06
-------------------------------------------

-------------------------------------------
Time: 2019-04-27 21:20:11
----------