# Simulated Live Financial Data - Google

In [4]:
import yfinance as yf
import pandas as pd

# Fetch historical data
symbol = 'GOOGL'
df = yf.download(symbol, start="2020-01-01", end="2025-01-01", progress=False)

# Reset the index to get 'Date' as a column
df.reset_index(inplace=True)

# Add the Ticker column manually
df['Symbol'] = symbol

# Reorder columns
df = df[['Symbol', 'Date', 'Close', 'High', 'Low', 'Open', 'Volume']]
df.columns = ['Symbol', 'Date', 'Close', 'High', 'Low', 'Open', 'Volume']

# Display result
print(df.head())


  Symbol       Date      Close       High        Low       Open    Volume
0  GOOGL 2020-01-02  68.108376  68.108376  67.004158  67.099702  27278000
1  GOOGL 2020-01-03  67.752075  68.360669  67.045454  67.079297  23408000
2  GOOGL 2020-01-06  69.557945  69.583321  67.228582  67.259926  46768000
3  GOOGL 2020-01-07  69.423592  69.841098  69.246938  69.689821  34330000
4  GOOGL 2020-01-08  69.917725  70.256604  69.300178  69.409154  35314000


In [5]:

from kafka import KafkaProducer
import time
import json

# Define Kafka server and topic
KAFKA_SERVER = '8e35f34f36db:9093'
KAFKA_TOPIC = 'kafka-google'

# Initialize Kafka producer
producer = KafkaProducer(
    bootstrap_servers=KAFKA_SERVER,
    value_serializer=lambda v: json.dumps(v).encode('utf-8')  # serialize to JSON
)

# Send each row of the DataFrame to Kafka
try:
    print(f"Producing messages to Kafka topic: {KAFKA_TOPIC}")
    for _, row in df.iterrows():
        message = row.to_dict()
        print(message)
        # Optionally add symbol
        # message['symbol'] = symbol
        # Convert Timestamp to string
        message['Date'] = message['Date'].strftime('%Y-%m-%d')

        producer.send(KAFKA_TOPIC, message)
        print(f"Sent: {message}")
        time.sleep(1)  # Simulate delay like real-time stream

except KeyboardInterrupt:
    print("Stopped producing messages.")
finally:
    producer.close()

Producing messages to Kafka topic: kafka-google
{'Symbol': 'GOOGL', 'Date': Timestamp('2020-01-02 00:00:00'), 'Close': 68.1083755493164, 'High': 68.1083755493164, 'Low': 67.00415768648521, 'Open': 67.09970156760528, 'Volume': 27278000}
Sent: {'Symbol': 'GOOGL', 'Date': '2020-01-02', 'Close': 68.1083755493164, 'High': 68.1083755493164, 'Low': 67.00415768648521, 'Open': 67.09970156760528, 'Volume': 27278000}
{'Symbol': 'GOOGL', 'Date': Timestamp('2020-01-03 00:00:00'), 'Close': 67.7520751953125, 'High': 68.36066912200879, 'Low': 67.04545444931702, 'Open': 67.07929686090621, 'Volume': 23408000}
Sent: {'Symbol': 'GOOGL', 'Date': '2020-01-03', 'Close': 67.7520751953125, 'High': 68.36066912200879, 'Low': 67.04545444931702, 'Open': 67.07929686090621, 'Volume': 23408000}
{'Symbol': 'GOOGL', 'Date': Timestamp('2020-01-06 00:00:00'), 'Close': 69.55794525146484, 'High': 69.58332136437583, 'Low': 67.22858209701914, 'Open': 67.25992638011208, 'Volume': 46768000}
Sent: {'Symbol': 'GOOGL', 'Date': '2