In [1]:
import json
import threading
from pyspark import SparkContext
from pyspark.streaming import StreamingContext

# Globals
ssc = None
sc = None
streaming_thread = None

def start_streaming():
    global sc, ssc, streaming_thread

    if sc is not None or ssc is not None:
        print("⚠️ SparkContext or StreamingContext already exists. Stop it before starting again.")
        return

    # Create SparkContext and StreamingContext (batch interval 5s)
    sc = SparkContext("local[2]", "FinancialDataStream")
    ssc = StreamingContext(sc, 5)

    # Connect to the TCP server
    lines = ssc.socketTextStream("127.0.0.1", 9999)
    lines.pprint()

    # Parse JSON lines
    transactions = lines.map(lambda line: json.loads(line))

    # Count transactions per type
    counts = transactions.map(lambda tx: (tx["transaction_type"], 1)) \
                         .reduceByKey(lambda a, b: a + b)
    counts.pprint()

    # Sum amounts per transaction type
    amounts = transactions.map(lambda tx: (tx["transaction_type"], tx["total_amount"])) \
                          .reduceByKey(lambda a, b: a + b)
    amounts.pprint()

    # Start streaming
    ssc.start()
    print("✅ Streaming started in background. Run stop_streaming() to stop it.")

    # Run awaitTermination in a separate thread so Jupyter stays free
    def run_stream():
        try:
            ssc.awaitTermination()
        except Exception as e:
            print(f"Background thread terminated: {e}")

    streaming_thread = threading.Thread(target=run_stream, daemon=True)
    streaming_thread.start()


def stop_streaming():
    global sc, ssc, streaming_thread
    if ssc:
        print("🛑 Stopping StreamingContext...")
        ssc.stop(stopSparkContext=True, stopGraceFully=True)
        ssc = None
    if sc:
        sc.stop()
        sc = None
        print("✅ SparkContext stopped.")
    if streaming_thread:
        streaming_thread.join(timeout=5)
        streaming_thread = None
        print("🟢 Streaming thread stopped.")




In [None]:
start_streaming()



✅ Streaming started in background. Run stop_streaming() to stop it.


-------------------------------------------
Time: 2025-09-04 23:11:55
-------------------------------------------

-------------------------------------------
Time: 2025-09-04 23:11:55
-------------------------------------------

-------------------------------------------
Time: 2025-09-04 23:11:55
-------------------------------------------

-------------------------------------------
Time: 2025-09-04 23:12:00
-------------------------------------------
{"transaction_id": "67d8bcc7-78fb-41f1-ab8e-112cf06636ce", "timestamp": "2025-09-04T23:11:55.454871", "account_id": "ACC_25210", "transaction_type": "SELL", "symbol": "SPOT", "quantity": 677, "price": 123.32, "total_amount": 83487.64, "currency": "JPY", "exchange": "NYSE", "customer_segment": "VIP"}
{"transaction_id": "0fb0a888-1811-4780-a0a4-9aa12f54cbd2", "timestamp": "2025-09-04T23:11:56.468510", "account_id": "ACC_46549", "transaction_type": "BUY", "symbol": "META", "quantity": 123, "price": 209.69, "total_amount": 25791.87, "curre

In [3]:
stop_streaming()

🛑 Stopping StreamingContext...
✅ SparkContext stopped.
🟢 Streaming thread stopped.
