# Simulated Live Financial Data - Amazon

In [4]:
import yfinance as yf
import pandas as pd

# Fetch historical data
symbol = 'AMZN'
df = yf.download(symbol, start="2020-01-01", end="2025-01-01", progress=False)

# Reset the index to get 'Date' as a column
df.reset_index(inplace=True)

# Add the Ticker column manually
df['Symbol'] = symbol

# Reorder columns
df = df[['Symbol', 'Date', 'Close', 'High', 'Low', 'Open', 'Volume']]
df.columns = ['Symbol', 'Date', 'Close', 'High', 'Low', 'Open', 'Volume']

# Display result
print(df.head())


  Symbol       Date      Close       High        Low       Open    Volume
0   AMZN 2020-01-02  94.900497  94.900497  93.207497  93.750000  80580000
1   AMZN 2020-01-03  93.748497  94.309998  93.224998  93.224998  75288000
2   AMZN 2020-01-06  95.143997  95.184502  93.000000  93.000000  81236000
3   AMZN 2020-01-07  95.343002  95.694504  94.601997  95.224998  80898000
4   AMZN 2020-01-08  94.598503  95.550003  94.321999  94.902000  70160000


In [5]:

from kafka import KafkaProducer
import time
import json

# Define Kafka server and topic
KAFKA_SERVER = '8e35f34f36db:9093'
KAFKA_TOPIC = 'kafka-amazon'

# Initialize Kafka producer
producer = KafkaProducer(
    bootstrap_servers=KAFKA_SERVER,
    value_serializer=lambda v: json.dumps(v).encode('utf-8')  # serialize to JSON
)

# Send each row of the DataFrame to Kafka
try:
    print(f"Producing messages to Kafka topic: {KAFKA_TOPIC}")
    for _, row in df.iterrows():
        message = row.to_dict()
        print(message)
        # Optionally add symbol
        # message['symbol'] = symbol
        # Convert Timestamp to string
        message['Date'] = message['Date'].strftime('%Y-%m-%d')

        producer.send(KAFKA_TOPIC, message)
        print(f"Sent: {message}")
        time.sleep(1)  # Simulate delay like real-time stream

except KeyboardInterrupt:
    print("Stopped producing messages.")
finally:
    producer.close()

Producing messages to Kafka topic: kafka-amazon
{'Symbol': 'AMZN', 'Date': Timestamp('2020-01-02 00:00:00'), 'Close': 94.90049743652344, 'High': 94.90049743652344, 'Low': 93.2074966430664, 'Open': 93.75, 'Volume': 80580000}
Sent: {'Symbol': 'AMZN', 'Date': '2020-01-02', 'Close': 94.90049743652344, 'High': 94.90049743652344, 'Low': 93.2074966430664, 'Open': 93.75, 'Volume': 80580000}
{'Symbol': 'AMZN', 'Date': Timestamp('2020-01-03 00:00:00'), 'Close': 93.74849700927734, 'High': 94.30999755859375, 'Low': 93.2249984741211, 'Open': 93.2249984741211, 'Volume': 75288000}
Sent: {'Symbol': 'AMZN', 'Date': '2020-01-03', 'Close': 93.74849700927734, 'High': 94.30999755859375, 'Low': 93.2249984741211, 'Open': 93.2249984741211, 'Volume': 75288000}
{'Symbol': 'AMZN', 'Date': Timestamp('2020-01-06 00:00:00'), 'Close': 95.14399719238281, 'High': 95.18450164794922, 'Low': 93.0, 'Open': 93.0, 'Volume': 81236000}
Sent: {'Symbol': 'AMZN', 'Date': '2020-01-06', 'Close': 95.14399719238281, 'High': 95.18450