In [None]:
import yfinance as yf
from datetime import datetime, timedelta

# Define the ticker symbol and date range
ticker = "AAPL"
end_date = datetime.today()
start_date = end_date - timedelta(days=30)

# Fetch the data
apple_data = yf.download(ticker, start=start_date.strftime("%Y-%m-%d"), end=end_date.strftime("%Y-%m-%d"))

# Display the data
print(apple_data)


In [None]:
ticker = "AAPL"
interval = "1m"  # Smallest interval

# Fetch data for the last 7 days
apple_data = yf.download(tickers=ticker, period="7d", interval=interval)

# Display the first few rows of data
print(apple_data.head())

In [None]:
import yfinance as yf
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

# Fetch Apple's 1-minute data over 7 days
ticker = ["AAPL", "NVDA", "MSFT", "AMZN", "GOOG", "META", "TSLA", "AVGO", "BRK-B", "WMT"]
interval = "1m"
period = "1d"

def predict_stock(ticker):
    data = yf.download(tickers=ticker, period=period, interval=interval)

    # Extract closing prices
    closing_prices = data["Close"].dropna()

    train = closing_prices[:-26]  
    test = closing_prices[-26:]

    # Fit the ARIMA model
    model = ARIMA(train, order=(5, 1, 2))
    model_fit = model.fit()

    # Forecast for the 7th day
    forecast = model_fit.forecast(steps=len(test))

    # Plot the results
    plt.figure(figsize=(14, 6))
    plt.plot(closing_prices.index, closing_prices, label="Actual Data (Last 2 Days)")
    plt.plot(test.index, forecast, label="Predicted Data (7th Day)", linestyle='--')
    plt.title(f"{ticker} Stock Price: Actual vs Predicted (ARIMA)")
    plt.xlabel("Time")
    plt.ylabel("Price (USD)")
    plt.legend()
    plt.show()

plots = [predict_stock(tick) for tick in ticker]

In [None]:
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler

# Ticker details
tickers = ["AAPL", "NVDA", "MSFT", "AMZN", "GOOG", "META", "TSLA", "AVGO", "BRK-B", "WMT"]
interval = "1m"
period = "1d"

results = []  # Store results for each ticker

def predict_future(model, last_data_point, scaler, n_predictions=10):
    # Use the last data point to start predicting the future
    predictions = []
    current_input = last_data_point

    for _ in range(n_predictions):
        
        # Predict the next value
        next_prediction = model.predict(np.array(current_input[-1]).reshape(-1, 1))
        predictions.append(next_prediction[0])
        
        # Update current input by appending the predicted value
        current_input = np.append(current_input[1:], next_prediction)
    
    predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
    return predictions

def plot_future_predictions(predictions, data, future_periods=10):
    # Create future dates (Assuming the last date is continuous)
    last_date = data.index[-1]
    future_dates = [last_date + np.timedelta64(i, 'm') for i in range(1, future_periods + 1)]
    
    # Plot the future predictions
    plt.figure(figsize=(14, 6))
    plt.plot(data.index, data["Close"], label="Actual Data")
    plt.plot(future_dates, predictions, label="Future Predictions", linestyle="--", color="red")
    plt.title("Stock Price: Actual vs Future Predictions")
    plt.xlabel("Time")
    plt.ylabel("Price (USD)")
    plt.legend()
    plt.show()

def predict_stock(ticker):
    # Download stock data
    data = yf.download(tickers=ticker, period=period, interval=interval)

    # Extract and clean the closing prices
    closing_prices = data["Close"].dropna().values.reshape(-1, 1)

    # Normalize the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_prices = scaler.fit_transform(closing_prices)

    # Prepare training and testing data
    train_size = len(scaled_prices) - 10
    train, test = scaled_prices[:train_size], scaled_prices[train_size:]

    # Create input-output datasets
    def create_dataset(data):
        x, y = [], []
        for i in range(len(data) - 1):
            x.append(data[i][0])
            y.append(data[i + 1][0])
        return np.array(x), np.array(y)

    X_train, y_train = create_dataset(train)
    # X_test, y_test = create_dataset(test, time_step)
    X_train = np.array(X_train).reshape(-1, 1)
    print(X_train.shape)
    
    # Build the MLPRegressor model
    model = MLPRegressor(hidden_layer_sizes=(50, 50), max_iter=500, random_state=42)
    model.fit(X_train, y_train)
    predictions = predict_future(model, X_train, scaler, 10)
    plot_future_predictions(predictions, data, 10)

    # Make predictions
    # predictions = model.predict(X_test)
    # predictions = scaler.inverse_transform(predictions.reshape(-1, 1))

    # Inverse transform actual test data
    # y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

    # Record key values
    # last_train_price = closing_prices[train_size - 1][0]
    # last_predicted_price = predictions[-1][0]
    # last_actual_price = y_test_actual[-1][0]

    # results.append({
    #     "Ticker": ticker,
    #     "Last Train Price": last_train_price,
    #     "Predicted Price": last_predicted_price,
    #     "Actual Price": last_actual_price
    # })

    # Plot the results
    # plt.figure(figsize=(14, 6))
    # plt.plot(data.index[-len(y_test_actual):], y_test_actual, label="Actual Data")
    # plt.plot(data.index[-len(predictions):], predictions, label="Predicted Data", linestyle="--")
    # plt.title(f"{ticker} Stock Price: Actual vs Predicted (MLPRegressor)")
    # plt.xlabel("Time")
    # plt.ylabel("Price (USD)")
    # plt.legend()
    # plt.show()

# Generate plots and record results for each ticker
for tick in tickers:
    predict_stock(tick)

# Display results
# for result in results:
#     print(result)


In [None]:
['AAPL', 'MSFT', 'GOOG', 'AMZN', 'TSLA', 'NVDA', 'JPM', 'V', 'PFE', 'KO', 'JNJ', 'DIS', 'BA', 'CAT', 'WMT', 'MCD', 'GE', 'CSCO', 'XOM', 'CVX', 'NKE', 'NFLX', 'UNH', 'PYPL', 'AMD', 'BA', 'IBM', 'INTC', 'GS', 'SPGI', 'T', 'VZ', 'AMGN', 'CVS', 'LMT', 'RTX', 'HD', 'LOW', 'UPS', 'MS', 'BK', 'AXP', 'BLK', 'MO', 'NEE', 'BMY', 'WFC', 'CSX', 'SCHW', 'ZTS', 'MTB', 'PGR', 'DHR', 'TMO', 'ABT', 'SYY', 'SYK', 'HUM', 'MRK', 'GILD', 'MMM', 'USB', 'MTCH', 'AIG', 'UAL']


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from datetime import datetime, timedelta
from core_code.get_fin_info import get_s_p_tickers, get_fin_data
from models.lstm.lstm import get_train_test_for_lstm, build_model, predict_future

# Ticker details
tickers = get_s_p_tickers()
interval = "5m"
period = "1d"
start = (datetime.today() - timedelta(days=4)).strftime('%Y-%m-%d')
end = (datetime.today() - timedelta(days=3)).strftime('%Y-%m-%d')

results = []  # Store results for each ticker

def calc_difference(past, future):
    dif = future-past
    return dif
    if dif<0:
        return dif
    return f"+{dif}"

def predict_stock(ticker):
    # Get stock data
    closing_prices = get_fin_data(ticker, start, end, interval)

    # Normalize the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_prices = scaler.fit_transform(closing_prices)

    X_train, y_train = get_train_test_for_lstm(scaled_prices)
    
    # Build the MLPRegressor model
    model = build_model(X_train, y_train)
    
    predictions = predict_future(model, y_train, scaler) # change y_tain to whole data set when real data

    # Record key values
    last_train_price = scaler.inverse_transform(np.array(X_train[-1][0]).reshape(-1, 1))[0][0]
    last_predicted_price = predictions[-1][0]
    last_actual_price = scaler.inverse_transform(np.array(scaled_prices[-1][0]).reshape(-1, 1))[0][0]

    results.append({
        "Ticker": ticker,
        # "Last Train Price": last_train_price,
        # "Predicted Price": last_predicted_price,
        # "Actual Price": last_actual_price,
        "Predicted Difference": calc_difference(last_train_price, last_predicted_price),
        "Actual Difference": calc_difference(last_train_price, last_actual_price)

    })

# Generate plots and record results for each ticker
for tick in tickers:
    predict_stock(tick)

# Create DataFrame
df = pd.DataFrame(results)

# Round to 2 decimal places
# df["Last Train Price"] = df["Last Train Price"].round(2)
# df["Predicted Price"] = df["Predicted Price"].round(2)
# df["Actual Price"] = df["Actual Price"].round(2)

# Display the table
print(df)


In [None]:
plt.figure(figsize=(8, 5))
plt.scatter(df['Predicted Difference'], df['Actual Difference'], marker='o', linestyle='-', color='blue')
plt.title('Pred vs Act Plot')
plt.xlabel('Pred Axis')
plt.ylabel('Act Axis')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))

# Plot each pair of values from both columns for the same x-axis index
for i in range(len(df)):
    plt.scatter(i + 1, df['Predicted Difference'][i], color='blue')  # Plot for Column1
    plt.scatter(i + 1, df['Actual Difference'][i], color='red')   # Plot for Column2

plt.xlabel('Index')
plt.ylabel('Value')
plt.title('Scatter Plot of Column1 and Column2 at Each Index')
plt.show()

In [None]:
def predict_future(model, last_data_point, scaler, n_predictions=1):
    # Use the last data point to start predicting the future
    predictions = []
    current_input = last_data_point

    for _ in range(n_predictions):
        
        # Predict the next value
        next_prediction = model.predict(np.array(current_input[-1]).reshape(-1, 1))
        predictions.append(next_prediction[0])
        
        # Update current input by appending the predicted value
        current_input = np.append(current_input[1:], next_prediction)
    
    predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
    return predictions

def plot_future_predictions(predictions, data, future_periods=1):
    # Create future dates (Assuming the last date is continuous)
    last_date = data.index[-1]
    future_dates = [last_date + np.timedelta64(i, 'm') for i in range(1, future_periods + 1)]
    
    # Plot the future predictions
    plt.figure(figsize=(14, 6))
    plt.plot(data.index, data["Close"], label="Actual Data")
    plt.plot(future_dates, predictions, label="Future Predictions", linestyle="--", color="red")
    plt.title("Stock Price: Actual vs Future Predictions")
    plt.xlabel("Time")
    plt.ylabel("Price (USD)")
    plt.legend()
    plt.show()

In [None]:

# Initialize counters
both_above_0 = 0
both_below_0 = 0
one_above_one_below = 0

# Loop through each row and count the conditions
for i in range(len(df)):
    col1 = df['Predicted Difference'][i]
    col2 = df['Actual Difference'][i]

    if col1 > 0 and col2 > 0:
        both_above_0 += 1
    elif col1 < 0 and col2 < 0:
        both_below_0 += 1
    elif (col1 > 0 and col2 < 0) or (col1 < 0 and col2 > 0):
        one_above_one_below += 1

# Print the results
print(f"Both dots are above 0: {both_above_0} times")
print(f"Both dots are below 0: {both_below_0} times")
print(f"One dot is above 0 while the other is below 0: {one_above_one_below} times")

In [None]:
14+9+42

In [None]:
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from datetime import datetime, timedelta
from core_code.get_fin_info import get_s_p_tickers, get_fin_data
from models.lstm.lstm import get_train_test_for_lstm

# Ticker details
tickers = get_s_p_tickers()
interval = "5m"
period = "1d"
start = (datetime.today() - timedelta(days=4)).strftime('%Y-%m-%d')
end = (datetime.today() - timedelta(days=3)).strftime('%Y-%m-%d')

closing_prices = get_fin_data("aapl", start, end, interval)

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_prices = scaler.fit_transform(closing_prices)

print(scaled_prices)
train_size = len(scaled_prices) - 1
train = scaled_prices[:train_size]

In [None]:
np.array(data).reshape(-1,1)

In [None]:
data = [0,1,2,3,4,5,6,7,8,9,10]
def create_dataset(data):
    x, y = [], []
    for i in range(len(data) - 1):
        x.append(data[i][0])
        y.append(data[i + 1][0])
    return np.array(x), np.array(y)
x, y = create_dataset(np.array(data).reshape(-1,1))
print(x)
print(y)

In [None]:
data[:-1]

In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from datetime import datetime, timedelta
from core_code.get_fin_info import get_s_p_tickers, get_fin_data
from models.lstm.lstm import get_train_test_for_lstm, build_model, predict_future
from core_code.testing import calc_difference, plot_skatter

# Ticker details
tickers = get_s_p_tickers()

def predict_stock(ticker, start, end, interval):
    # Get stock data
    closing_prices = get_fin_data(ticker, start, end, interval)

    # Normalize the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_prices = scaler.fit_transform(closing_prices)

    X_train, y_train = get_train_test_for_lstm(scaled_prices)
    
    # Build the MLPRegressor model
    model = build_model(X_train, y_train.ravel())
    
    predictions = predict_future(model, y_train, scaler) # change y_tain to whole data set when real data

    # Record key values
    last_train_price = scaler.inverse_transform(np.array(X_train[-1][0]).reshape(-1, 1))[0][0]
    last_predicted_price = predictions[-1][0]
    last_actual_price = scaler.inverse_transform(np.array(scaled_prices[-1][0]).reshape(-1, 1))[0][0]
    return last_train_price, last_predicted_price, last_actual_price

ave_act_dif = []
profits_005 = []
profits_01 = []
profits_015 = []
for i in range(30):
    results = []
    act_dif = []
    interval = "5m"
    start = (datetime.today() - timedelta(days=i+1)).strftime('%Y-%m-%d')
    end = (datetime.today() - timedelta(days=i)).strftime('%Y-%m-%d')

    try:
        # Generate plots and record results for each ticker
        for ticker in tickers:
            last_train_price, last_predicted_price, last_actual_price = predict_stock(ticker, start, end, interval)

            predicted_difference = calc_difference(last_train_price, last_predicted_price)
            actual_difference = calc_difference(last_train_price, last_actual_price)
            act_dif.append(actual_difference)

            if predicted_difference>0.005:
                print(i, last_train_price, last_predicted_price, last_actual_price)

                profits_005.append((i, last_train_price, last_predicted_price, last_actual_price))

                if predicted_difference>0.01:
                    print(i, last_train_price, last_predicted_price, last_actual_price)

                    profits_01.append((i, last_train_price, last_predicted_price, last_actual_price))

                    if predicted_difference>0.015:
                        print(i, last_train_price, last_predicted_price, last_actual_price)

                        profits_015.append((i, last_train_price, last_predicted_price, last_actual_price))

            results.append({
                "Ticker": ticker,
                "Predicted Difference": predicted_difference,
                "Actual Difference": actual_difference

            })
        # Create DataFrame
        df = pd.DataFrame(results)
        plot_skatter(df)

    except Exception as e:
        print(e)
    if len(act_dif)>0:
        ave_act_dif.append((i, sum(act_dif)/len(act_dif)))

In [None]:
print(ave_act_dif)
print(profits_1)
print(profits_2_5)
print(profits_5)

In [None]:
print(ave_act_dif)
print(profits_1)
print(profits_2_5)
print(profits_5)

In [None]:
import pandas as pd
from datetime import datetime, timedelta
from core_code.get_fin_info import get_s_p_tickers
from core_code.testing import calc_difference, plot_skatter, breached_threshold
from core_code.prediction import predict_stock

# Ticker details
tickers = get_s_p_tickers()

ave_act_dif = []
profits_005 = []
profits_01 = []
profits_015 = []
for i in range(3):
    results = []
    act_dif = []
    interval = "5m"
    start = (datetime.today() - timedelta(days=i+1)).strftime('%Y-%m-%d')
    end = (datetime.today() - timedelta(days=i)).strftime('%Y-%m-%d')

    try:
        # Generate plots and record results for each ticker
        for ticker in tickers:
            last_train_price, last_predicted_price, last_actual_price = predict_stock(ticker, start, end, interval)

            predicted_difference = calc_difference(last_train_price, last_predicted_price)
            actual_difference = calc_difference(last_train_price, last_actual_price)
            act_dif.append(actual_difference)

            breached_threshold(0.005, profits_005, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)
            breached_threshold(0.01, profits_01, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)
            breached_threshold(0.015, profits_015, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)

            results.append({
                "Ticker": ticker,
                "Predicted Difference": predicted_difference,
                "Actual Difference": actual_difference
            })

        df = pd.DataFrame(results)
        plot_skatter(df)

    except Exception as e:
        print(e)
    if len(act_dif)>0:
        ave_act_dif.append((i, sum(act_dif)/len(act_dif)))

In [None]:
import pandas as pd
from datetime import datetime, timedelta
from core_code.get_fin_info import get_s_p_tickers
from core_code.testing import calc_difference, plot_skatter, breached_threshold
from core_code.prediction import predict_stock
from core_code.get_fin_info import get_fin_data

# Ticker details
tickers = get_s_p_tickers()

ave_act_dif = {}
profits_005 = {}
profits_0075 = {}
profits_01 = {}
profits_015 = {}
profits_02 = {}
profits_025 = {}
for i in range(10):
    results = []
    act_dif = []
    interval = "5m"
    start = (datetime.today() - timedelta(days=i+1)).strftime('%Y-%m-%d')
    end = (datetime.today() - timedelta(days=i)).strftime('%Y-%m-%d')

    try:
        # Generate plots and record results for each ticker
        for ticker in tickers:
            # Get stock data
            closing_prices = get_fin_data(ticker, start, end, interval)
            last_train_price, last_predicted_price, last_actual_price = predict_stock(closing_prices)

            predicted_difference = calc_difference(last_train_price, last_predicted_price)
            actual_difference = calc_difference(last_train_price, last_actual_price)
            act_dif.append(actual_difference)

            breached_threshold(0.005, profits_005, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)
            breached_threshold(0.0075, profits_0075, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)
            breached_threshold(0.01, profits_01, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)
            breached_threshold(0.015, profits_015, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)
            breached_threshold(0.02, profits_02, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)
            breached_threshold(0.025, profits_025, predicted_difference, i, last_train_price, last_predicted_price, last_actual_price)

            results.append({
                "Ticker": ticker,
                "Predicted Difference": predicted_difference,
                "Actual Difference": actual_difference
            })

        # df = pd.DataFrame(results)
        # plot_skatter(df)

    except Exception as e:
        print(e)
    if len(act_dif)>0:
        ave_act_dif[i] = sum(act_dif)/len(act_dif)

In [None]:
print(ave_act_dif)
print(profits_005)
print(profits_01)
print(profits_015)

In [None]:
import numpy as np
dicts = [profits_005, profits_0075, profits_01, profits_015, profits_02, profits_025]
for dic in dicts:
    averages = {key: np.mean(values) for key, values in dic.items()}

    for key, avg in averages.items():
        print(f"Key {key}: S&P_Ave = {ave_act_dif[key]}, Average = {avg}")

In [None]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Connect to SQLite database
conn = sqlite3.connect("data.db")
cursor = conn.cursor()

df = pd.read_sql("SELECT * FROM stocks_data", conn)
conn.close()  # Close connection

def convert_blob(blob):
    return np.frombuffer(blob, dtype=np.float64)  # Adjust dtype if needed

# Apply conversion to all rows
df = df.map(convert_blob)

# Convert DataFrame rows into a list of lists
list_of_lists = df.values.tolist()  # Extracts all rows as lists

# Plot each sublist sequentially
plt.figure(figsize=(10, 5))

colors = ['b', 'g', 'r', 'c', 'm']  # Different colors for each sublist
for i, sublist in enumerate(list_of_lists[:5]):  # Only 5 sublists
    plt.plot(range(len(sublist)), sublist, color=colors[i % len(colors)], label=f"Sublist {i+1}")

    # Plot styling
    plt.xlabel("Index")
    plt.ylabel("Value")
    plt.title("Sequential Plot of 5 Sublists")
    plt.legend()
    plt.grid(True)

    # Show the plot
    plt.show()

import pandas as pd
from datetime import datetime, timedelta
from core_code.get_fin_info import get_s_p_tickers
from core_code.testing import calc_difference, plot_skatter, breached_threshold
from core_code.prediction import predict_stock
from core_code.get_fin_info import get_fin_data
import sqlite3
import itertools

tickers = get_s_p_tickers()[:5]
tickers

In [None]:
import pandas as pd
import re
import yfinance as yf
data = yf.download(
        tickers="MMM", 
        start="2025-02-10", 
        end="2025-02-11", 
        interval="1h"
    )
data

In [None]:
import pandas as pd
from datetime import datetime, timedelta
from core_code.get_fin_info import get_s_p_tickers
from core_code.testing import calc_difference, plot_skatter, breached_threshold
from core_code.prediction import predict_stock
from core_code.get_fin_info import get_fin_data
import sqlite3
import itertools

tickers = get_s_p_tickers()[:2]
data_list = []
n_days = 2
for ticker in tickers:
    dates = []
    close_val = []
    volumn_val = []
    for i in range(n_days):
        interval = "90m"
        start = (datetime.today() - timedelta(days=n_days-i)).strftime('%Y-%m-%d')
        end = (datetime.today() - timedelta(days=n_days-i-1)).strftime('%Y-%m-%d')
        try:
            data = get_fin_data(ticker, start, end, interval)
            dates.append(data.index.tolist())
            close_val.append(data["Close"].to_numpy().flatten().tolist())
            volumn_val.append(data["Volume"].to_numpy().flatten().tolist())
        except Exception as e:
            print(e)
    data_list.append((dates, close_val, volumn_val))

# df = pd.DataFrame(data_list)

# conn = sqlite3.connect("data.db")  # Creates a database file
# df.to_sql("stocks_data", conn, if_exists="replace", index=False)  # Writes DataFrame to a table
# conn.close()




In [None]:
data.index.tolist()

In [None]:
import random
from core_code.get_fin_info import get_s_p_tickers
# print(type(get_s_p_tickers()))
tickers = random.shuffle(get_s_p_tickers())
print(tickers)

In [None]:
print(data_records)

In [None]:
import pickle
import matplotlib.pyplot as plt
import pandas as pd

# Load the data from the pickle file
with open('data_records.pkl', 'rb') as file:
    data_records = pickle.load(file)

# Specify the date for which to plot the data
plot_date = '2025-02-11'

# Number of tickers to plot
tickers = list(data_records[plot_date].keys())

# Create a figure with a separate subplot for each ticker
fig, axes = plt.subplots(len(tickers), 1, figsize=(10, 6 * len(tickers)))

# If only one ticker, axes is a single object, so we make it a list
if len(tickers) == 1:
    axes = [axes]

# Loop through each ticker and plot the 'Close' price against the 'Time' for the specified date
for ax, ticker in zip(axes, tickers):
    stock_data = data_records[plot_date][ticker]
    
    # Ensure the 'Time' column is in datetime format
    stock_data['Time'] = pd.to_datetime(stock_data['Time'])
    
    # Plot the 'Close' price against the 'Time'
    ax.plot(stock_data['Time'], stock_data['Close'], label=ticker)
    
    # Customize each subplot
    ax.set_title(f"Stock Price of {ticker} on {plot_date}")
    ax.set_xlabel('Time')
    ax.set_ylabel('Close Price ($)')
    ax.legend()
    ax.tick_params(axis='x', rotation=45)

# Adjust layout to avoid overlap
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import torch
print(torch.__version__)

In [None]:
import pickle
import matplotlib.pyplot as plt
import pandas as pd

# Load the data from the pickle file
with open('data_records.pkl', 'rb') as file:
    data_records = pickle.load(file)

In [None]:
data_records

In [None]:
import json
import pickle
from datetime import datetime
import numpy as np
import random

# Load the data
with open('data_records.pkl', 'rb') as file:
    data_records = pickle.load(file)

# Define the output file
output_file_train = "deepar_train_data.jsonl"
output_file_test = "deepar_test_data.jsonl"

# List to hold DeepAR formatted records
deepar_train = []
deepar_test = []

# Process each date and ticker
for date, tickers_data in data_records.items():
    for ticker, time_series in tickers_data.items():
        try:
            # Use varrying lenght datasets
            # The random number is chosen so that there is at least 3 hours in the training set and 30 mins in the test
            rand_num = random.randint(40, 60)

            # Construct DeepAR formatted entry
            deepar_train_entry = {
                "start": time_series["Time"][0].strftime("%Y-%m-%d %H:%M:%S"),
                "target": [entry for entry in time_series["Close"]][:rand_num]
            }

            # Construct DeepAR formatted entry
            deepar_test_entry = {
                "start": time_series["Time"][rand_num].strftime("%Y-%m-%d %H:%M:%S"),
                "target": [entry for entry in time_series["Close"]][rand_num:]
            }

            deepar_train.append(deepar_train_entry)
            deepar_test.append(deepar_test_entry)
        except Exception as e:
            _=1

# Save to JSONL
with open(output_file_train, "w") as f:
    for record in deepar_train:
        f.write(json.dumps(record) + "\n")

with open(output_file_test, "w") as f:
    for record in deepar_test:
        f.write(json.dumps(record) + "\n")



In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator

# Load data from a CSV file into a PandasDataset
df = pd.read_csv(
    "https://raw.githubusercontent.com/AileenNielsen/"
    "TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv",
    index_col=0,
    parse_dates=True,
)
dataset = PandasDataset(df, target="#Passengers")
display(dataset)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

import numpy as np
np.bool = np.bool_

import mxnet as mx

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator

# Load data from a CSV file into a PandasDataset
df = pd.read_csv(
    "https://raw.githubusercontent.com/AileenNielsen/"
    "TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv",
    index_col=0,
    parse_dates=True,
)
dataset = PandasDataset(df, target="#Passengers")

# Split the data for training and testing
training_data, test_gen = split(dataset, offset=-36)
test_data = test_gen.generate_instances(prediction_length=12, windows=3)

# Train the model and make predictions
model = DeepAREstimator(
    prediction_length=12, freq="M", trainer_kwargs={"max_epochs": 1}
).train(training_data)

forecasts = list(model.predict(test_data.input))

# Plot predictions
plt.plot(df["1954":], color="black")
for forecast in forecasts:
  forecast.plot()
plt.legend(["True values"], loc="upper left", fontsize="xx-large")
plt.show()

Unnamed: 0_level_0,#Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121
...,...
1960-08-01,606
1960-09-01,508
1960-10-01,461
1960-11-01,390


In [41]:
import json
import pandas as pd
import matplotlib.pyplot as plt

import numpy as np
np.bool = np.bool_

import mxnet as mx

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator
from gluonts.dataset.common import ListDataset
from gluonts.mx import Trainer

# Load JSONL file
jsonl_file = "deepar_train_data.jsonl"

data = []
with open(jsonl_file, "r") as f:
    for line in f:
        data.append(json.loads(line))

# Convert to ListDataset format
train_dataset = ListDataset(
    [{"target": entry["target"], "start": pd.Timestamp(entry["start"])} for entry in data],
    freq="5min"
)

jsonl_file = "deepar_test_data.jsonl"

data = []
with open(jsonl_file, "r") as f:
    for line in f:
        data.append(json.loads(line))
data[:1]
# Convert to ListDataset format
test_dataset = ListDataset(
    [{"target": entry["target"], "start": pd.Timestamp(entry["start"])} for entry in data],
    freq="5min"
)

# Train the model and make predictions
model = DeepAREstimator(
    prediction_length=12, freq="5min", trainer_kwargs={"max_epochs": 1}
).train(train_dataset)

forecasts = model.predict(test_dataset)
forecasts
# # Plot predictions
# plt.plot(df["1954":], color="black")
# for forecast in forecasts:
#   forecast.plot()
# plt.legend(["True values"], loc="upper left", fontsize="xx-large")
# plt.show()


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\olive\anaconda3\envs\algo_env\lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name  | Type        | Params | Mode  | In sizes                                                         | Out sizes   
--------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 26.8 K | train | [[1, 1], [1, 1], [1, 1164, 6], [1, 1164], [1, 1164], [1, 12, 6]] | [1, 100, 12]
--------------------------------------------------------------------------------------------------------------------------------
26.8 K    Trainable params
0         Non-trainable params
26.8 K    Total params
0.107     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval m

Epoch 0: |          | 50/? [00:03<00:00, 15.96it/s, v_num=37, train_loss=4.950]

Epoch 0, global step 50: 'train_loss' reached 4.94627 (best 4.94627), saving model to 'c:\\Users\\olive\\OneDrive\\Documents\\GitHub\\algo_trading\\lightning_logs\\version_37\\checkpoints\\epoch=0-step=50.ckpt' as top 1
`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: |          | 50/? [00:03<00:00, 15.79it/s, v_num=37, train_loss=4.950]


<generator object PyTorchPredictor.predict at 0x000001EC86FDC970>

In [44]:
forecasts_array = np.array(list(forecasts))
forecasts_array
# Generate x-axis values if timestamps are unavailable
# x_values = range(len(forecasts_array))
# x_values
# plt.figure(figsize=(10, 5))
# plt.plot(x_values, forecasts_array, label="Predicted Values", linestyle="dashed", color="red")
# plt.xlabel("Time Step")
# plt.ylabel("Forecasted Value")
# plt.title("Model Forecasts")
# plt.legend()
# plt.show()

array([], dtype=float64)

In [30]:
data = []
with open(jsonl_file, "r") as f:
    for line in f:
        data.append(json.loads(line))
toset = [type(pd.Timestamp(i["start"])) for i in data]

print(set(toset))

{<class 'pandas._libs.tslibs.timestamps.Timestamp'>}


In [35]:
print(next(iter(train_dataset))["start"].freq)

<5 * Minutes>
