# Simulated Live Financial Data

In [1]:
import yfinance as yf
import pandas as pd

# Fetch historical data (example: Apple stock)
symbol = 'AAPL'
df = yf.download(symbol, start="2020-01-01", end="2025-01-01", progress=False)

df.reset_index(inplace=True)

df.columns = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']


# Display the first few rows
print(df.head())
print(df.info())


YF.download() has changed argument auto_adjust default to True
        Date      Close       High        Low       Open     Volume
0 2020-01-02  72.716072  72.776598  71.466812  71.721019  135480400
1 2020-01-03  72.009125  72.771752  71.783969  71.941336  146322800
2 2020-01-06  72.582916  72.621654  70.876083  71.127873  118387200
3 2020-01-07  72.241539  72.849216  72.021223  72.592586  108872000
4 2020-01-08  73.403656  73.706287  71.943766  71.943766  132079200
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    1258 non-null   datetime64[ns]
 1   Close   1258 non-null   float64       
 2   High    1258 non-null   float64       
 3   Low     1258 non-null   float64       
 4   Open    1258 non-null   float64       
 5   Volume  1258 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 59.1 KB
N

In [2]:

from kafka import KafkaProducer
import time
import json

# Define Kafka server and topic
KAFKA_SERVER = '8e35f34f36db:9093'
KAFKA_TOPIC = 'Kafka-train'

# Initialize Kafka producer
producer = KafkaProducer(
    bootstrap_servers=KAFKA_SERVER,
    value_serializer=lambda v: json.dumps(v).encode('utf-8')  # serialize to JSON
)

# Send each row of the DataFrame to Kafka
try:
    print(f"Producing messages to Kafka topic: {KAFKA_TOPIC}")
    for _, row in df.iterrows():
        message = row.to_dict()
        print(message)
        # Optionally add symbol
        message['symbol'] = symbol
        # Convert Timestamp to string
        message['Date'] = message['Date'].strftime('%Y-%m-%d')

        producer.send(KAFKA_TOPIC, message)
        print(f"Sent: {message}")
        time.sleep(2)  # Simulate delay like real-time stream

except KeyboardInterrupt:
    print("Stopped producing messages.")
finally:
    producer.close()

Producing messages to Kafka topic: Kafka-train
{'Date': Timestamp('2020-01-02 00:00:00'), 'Close': 72.71607208251953, 'High': 72.77659819422657, 'Low': 71.46681225027338, 'Open': 71.72101896406637, 'Volume': 135480400}
Sent: {'Date': '2020-01-02', 'Close': 72.71607208251953, 'High': 72.77659819422657, 'Low': 71.46681225027338, 'Open': 71.72101896406637, 'Volume': 135480400, 'symbol': 'AAPL'}
{'Date': Timestamp('2020-01-03 00:00:00'), 'Close': 72.00912475585938, 'High': 72.7717522953066, 'Low': 71.78396939069293, 'Open': 71.94133580542943, 'Volume': 146322800}
Sent: {'Date': '2020-01-03', 'Close': 72.00912475585938, 'High': 72.7717522953066, 'Low': 71.78396939069293, 'Open': 71.94133580542943, 'Volume': 146322800, 'symbol': 'AAPL'}
{'Date': Timestamp('2020-01-06 00:00:00'), 'Close': 72.58291625976562, 'High': 72.62165386110323, 'Low': 70.87608272259153, 'Open': 71.12787343706493, 'Volume': 118387200}
Sent: {'Date': '2020-01-06', 'Close': 72.58291625976562, 'High': 72.62165386110323, 'Lo