Connected to Python 3.11.4

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error
from math import sqrt
import yfinance as yf
from datetime import datetime

def fetch_data(tickers):
    all_data = {}
    for ticker in tickers:
        all_data[ticker] = yf.download(ticker)
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled, new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(len(predicted_price), data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price[:, 0]
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL', 'MSFT']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}

for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    X_train, y_train, X_test, y_test = X[:int(len(X)*0.8)], y[:int(len(y)*0.8)], X[int(len(X)*0.8):], y[int(len(y)*0.8):]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=30, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test)))

print(f"Stocks to perform better in the future: {max(rmse_scores, key=rmse_scores.get)}")
print(predictions)

KeyboardInterrupt: 

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error
from math import sqrt
import yfinance as yf
from datetime import datetime
def fetch_data(tickers):
    all_data = {}
    for ticker in tickers:
        all_data[ticker] = yf.download(ticker)
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled, new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(len(predicted_price), data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price[:, 0]
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL', 'MSFT']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    X_train, y_train, X_test, y_test = X[:int(len(X)*0.8)], y[:int(len(y)*0.8)], X[int(len(X)*0.8):], y[int(len(y)*0.8):]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=30, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test)))

print(f"Stocks to perform better in the future: {max(rmse_scores, key=rmse_scores.get)}")
print(predictions)

2023-07-31 10:51:45.849859: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Epoch 1/30
Epoch 2/30

KeyboardInterrupt: 

In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error
from math import sqrt
import yfinance as yf
from datetime import datetime
def fetch_data(tickers):
    all_data = {}
    for ticker in tickers:
        all_data[ticker] = yf.download(ticker)
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled, new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(len(predicted_price), data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price[:, 0]
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL', 'MSFT']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    X_train, y_train, X_test, y_test = X[:int(len(X)*0.8)], y[:int(len(y)*0.8)], X[int(len(X)*0.8):], y[int(len(y)*0.8):]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=2, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test)))

print(f"Stocks to perform better in the future: {max(rmse_scores, key=rmse_scores.get)}")
print(predictions)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Epoch 1/2
Epoch 2/2




Epoch 1/2
Epoch 2/2




Epoch 1/2
Epoch 2/2




Stocks to perform better in the future: WMT
                          Date Ticker      Close
0   2023-07-31 11:04:04.217422    WMT  57.904330
1   2023-08-01 11:04:04.217422    WMT  58.196679
2   2023-08-02 11:04:04.217422    WMT  56.774599
3   2023-08-03 11:04:04.217422    WMT  53.870116
4   2023-08-04 11:04:04.217422    WMT  48.396628
..                         ...    ...        ...
175 2023-09-24 11:14:31.209555   MSFT   7.801501
176 2023-09-25 11:14:31.209555   MSFT   7.776650
177 2023-09-26 11:14:31.209555   MSFT   7.754201
178 2023-09-27 11:14:31.209555   MSFT   7.733925
179 2023-09-28 11:14:31.209555   MSFT   7.715606

[180 rows x 3 columns]


In [4]:
print(predictions)

                          Date Ticker      Close
0   2023-07-31 11:04:04.217422    WMT  57.904330
1   2023-08-01 11:04:04.217422    WMT  58.196679
2   2023-08-02 11:04:04.217422    WMT  56.774599
3   2023-08-03 11:04:04.217422    WMT  53.870116
4   2023-08-04 11:04:04.217422    WMT  48.396628
..                         ...    ...        ...
175 2023-09-24 11:14:31.209555   MSFT   7.801501
176 2023-09-25 11:14:31.209555   MSFT   7.776650
177 2023-09-26 11:14:31.209555   MSFT   7.754201
178 2023-09-27 11:14:31.209555   MSFT   7.733925
179 2023-09-28 11:14:31.209555   MSFT   7.715606

[180 rows x 3 columns]


In [5]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled, new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(len(predicted_price), data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price[:, 0]
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL', 'MSFT']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = len(data) - 60  # Use last 2 months (approx. 60 business days) for testing
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test)))

print(f"Stocks to perform better in the future: {max(rmse_scores, key=rmse_scores.get)}")
print(predictions)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


 431/3976 [==>...........................] - ETA: 2:08 - loss: nan

KeyboardInterrupt: 

In [6]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled, new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(len(predicted_price), data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price[:, 0]
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = len(data) - 60  # Use last 2 months (approx. 60 business days) for testing
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test)))

print(f"Stocks to perform better in the future: {max(rmse_scores, key=rmse_scores.get)}")
print(predictions)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)








ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [7]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error', run_eagerly=True)
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled, new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(len(predicted_price), data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price[:, 0]
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = len(data) - 60  # Use last 2 months (approx. 60 business days) for testing
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test)))

print(f"Stocks to perform better in the future: {max(rmse_scores, key=rmse_scores.get)}")
print(predictions)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)








ValueError: Input 0 of layer "sequential_6" is incompatible with the layer: expected shape=(None, 60, 10), found shape=(1, 61, 10)

In [8]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        all_data[ticker] = historical_data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)









Stocks to perform better in the future: WMT
                          Date Ticker         Close
0   2023-07-31 12:34:28.279730    WMT  1.368401e+06
1   2023-08-01 12:34:28.279730    WMT  3.418429e+06
2   2023-08-02 12:34:28.279730    WMT  4.674413e+06
3   2023-08-03 12:34:28.279730    WMT  6.969722e+06
4   2023-08-04 12:34:28.279730    WMT  8.835208e+06
..                         ...    ...           ...
115 2023-09-24 12:35:56.380981   AAPL  3.590765e+08
116 2023-09-25 12:35:56.380981   AAPL  3.590974e+08
117 2023-09-26 12:35:56.380981   AAPL  3.591138e+08
118 2023-09-27 12:35:56.380981   AAPL  3.591262e+08
119 2023-09-28 12:35:56.380981   AAPL  3.591355e+08

[120 rows x 3 columns]


In [9]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)








ValueError: Input contains NaN.

In [10]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        data.fillna(0, inplace=True)
        all_data[ticker] = data
    return all_data

In [11]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        data.fillna(0, inplace=True)
        all_data[ticker] = data
    return all_data

print(all_data)

{'WMT':                                  Open        High         Low       Close  \
Date                                                                        
2003-08-05 00:00:00-04:00   37.080105   37.448229   36.973015   37.013172   
2003-08-06 00:00:00-04:00   36.819075   37.642336   36.819075   37.307678   
2003-08-07 00:00:00-04:00   37.802979   38.485682   37.669115   38.151024   
2003-08-08 00:00:00-04:00   38.365174   38.679753   38.258084   38.666367   
2003-08-11 00:00:00-04:00   38.740011   39.021122   38.338419   38.619534   
...                               ...         ...         ...         ...   
2023-07-25 00:00:00-04:00  160.460007  160.509995  158.279999  159.169998   
2023-07-26 00:00:00-04:00  159.240005  160.690002  158.940002  159.740005   
2023-07-27 00:00:00-04:00  160.080002  160.639999  159.130005  159.160004   
2023-07-28 00:00:00-04:00  159.289993  160.940002  159.289993  159.910004   
2023-07-31 00:00:00-04:00  159.960007  160.639999  158.910004  159.1

In [12]:
print(dividends_data)

NameError: name 'dividends_data' is not defined

In [13]:
dividends_data = t.dividends
print(dividends_data)

NameError: name 't' is not defined

In [14]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        print(dividends_data)

In [15]:
dividends_data = t.dividends
print(dividends_data)

NameError: name 't' is not defined

In [16]:
print(t.dividends)

NameError: name 't' is not defined

In [17]:
dividends_data = t.dividends
print(t.dividends)

NameError: name 't' is not defined

In [18]:
ividends_data = dividends
        print(dividends)

IndentationError: unexpected indent (<ipython-input-18-52e83f869699>, line 2)

In [19]:
dividends_data = dividends
print(dividends)

NameError: name 'dividends' is not defined

In [20]:
f fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits

SyntaxError: invalid syntax (<ipython-input-20-62e87a70d533>, line 1)

In [21]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        data.fillna(0, inplace=True)
        all_data[ticker] = data
    return all_data

print(all_data)

{'WMT':                                  Open        High         Low       Close  \
Date                                                                        
2003-08-05 00:00:00-04:00   37.080105   37.448229   36.973015   37.013172   
2003-08-06 00:00:00-04:00   36.819075   37.642336   36.819075   37.307678   
2003-08-07 00:00:00-04:00   37.802979   38.485682   37.669115   38.151024   
2003-08-08 00:00:00-04:00   38.365174   38.679753   38.258084   38.666367   
2003-08-11 00:00:00-04:00   38.740011   39.021122   38.338419   38.619534   
...                               ...         ...         ...         ...   
2023-07-25 00:00:00-04:00  160.460007  160.509995  158.279999  159.169998   
2023-07-26 00:00:00-04:00  159.240005  160.690002  158.940002  159.740005   
2023-07-27 00:00:00-04:00  160.080002  160.639999  159.130005  159.160004   
2023-07-28 00:00:00-04:00  159.289993  160.940002  159.289993  159.910004   
2023-07-31 00:00:00-04:00  159.960007  160.639999  158.910004  159.1

In [22]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        data.fillna(0, inplace=True)
        all_data[ticker] = data
    return all_data

In [23]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        data.fillna(0, inplace=True)
        all_data[ticker] = data
    return all_data
    print(all_data)

In [24]:
    print(all_data)


def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}

for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

IndentationError: expected an indented block after function definition on line 4 (<ipython-input-24-a8ecdd6c7400>, line 5)

In [25]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        # ... add more attributes here if desired ...
        # Merge all dataframes on 'Date' column
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

print(all_data)

{'WMT':                                  Open        High         Low       Close  \
Date                                                                        
2003-08-05 00:00:00-04:00   37.080105   37.448229   36.973015   37.013172   
2003-08-06 00:00:00-04:00   36.819075   37.642336   36.819075   37.307678   
2003-08-07 00:00:00-04:00   37.802979   38.485682   37.669115   38.151024   
2003-08-08 00:00:00-04:00   38.365174   38.679753   38.258084   38.666367   
2003-08-11 00:00:00-04:00   38.740011   39.021122   38.338419   38.619534   
...                               ...         ...         ...         ...   
2023-07-25 00:00:00-04:00  160.460007  160.509995  158.279999  159.169998   
2023-07-26 00:00:00-04:00  159.240005  160.690002  158.940002  159.740005   
2023-07-27 00:00:00-04:00  160.080002  160.639999  159.130005  159.160004   
2023-07-28 00:00:00-04:00  159.289993  160.940002  159.289993  159.910004   
2023-07-31 00:00:00-04:00  159.960007  160.639999  158.910004  159.1

In [26]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
         income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data
print(all_data)

IndentationError: unexpected indent (<ipython-input-26-caefc9adab6a>, line 10)

In [27]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['AAPL'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

AAPL data:
                                           Open      High       Low     Close  \
2003-08-05 00:00:00-04:00              0.324041  0.324800  0.305069  0.309319   
2003-08-06 00:00:00-04:00              0.304461  0.306132  0.295962  0.297936   
2003-08-07 00:00:00-04:00              0.299453  0.304917  0.294748  0.302489   
2003-08-08 00:00:00-04:00              0.305220  0.305524  0.297480  0.298087   
2003-08-11 00:00:00-04:00              0.300819  0.302489  0.296114  0.298390   
...                                         ...       ...       ...       ...   
Deferred Tax                           0.000000  0.000000  0.000000  0.000000   
Deferred Income Tax                    0.000000  0.000000  0.000000  0.000000   
Depreciation Amortization Depletion    0.000000  0.000000  0.000000  0.000000   
Depreciation And Amortization          0.000000  0.000000  0.000000  0.000000   
Net Income From Continuing Operations  0.000000  0.000000  0.000000  0.000000   

                

In [28]:
tch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data
tickers = ['SPY'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

SyntaxError: invalid syntax (<ipython-input-28-9c4a92718a5e>, line 1)

In [29]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['SPY'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

SPY data:
                                 Open        High         Low       Close  \
2003-08-05 00:00:00-04:00   67.056099   67.294586   65.645606   65.700119   
2003-08-06 00:00:00-04:00   65.884121   66.817629   65.700141   66.081726   
2003-08-07 00:00:00-04:00   66.211219   66.824477   65.931850   66.776779   
2003-08-08 00:00:00-04:00   66.994793   67.151517   66.613214   66.967537   
2003-08-11 00:00:00-04:00   66.953922   67.485409   66.667732   67.219666   
...                               ...         ...         ...         ...   
2023-07-25 00:00:00-04:00  453.920013  456.739990  453.869995  455.440002   
2023-07-26 00:00:00-04:00  454.470001  456.989990  453.380005  455.510010   
2023-07-27 00:00:00-04:00  459.019989  459.440002  451.549988  452.489990   
2023-07-28 00:00:00-04:00  455.880005  457.779999  452.489990  456.920013   
2023-07-31 00:00:00-04:00  457.410004  458.160004  456.239990  456.855011   

                             Volume  Dividends  Stock Splits  Cap

In [30]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['TSLA'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

TSLA data:
                                             Open      High       Low  \
2010-06-29 00:00:00-04:00                1.266667  1.666667  1.169333   
2010-06-30 00:00:00-04:00                1.719333  2.028000  1.553333   
2010-07-01 00:00:00-04:00                1.666667  1.728000  1.351333   
2010-07-02 00:00:00-04:00                1.533333  1.540000  1.247333   
2010-07-06 00:00:00-04:00                1.333333  1.333333  1.055333   
...                                           ...       ...       ...   
Depreciation                             0.000000  0.000000  0.000000   
Operating Gains Losses                   0.000000  0.000000  0.000000   
Net Foreign Currency Exchange Gain Loss  0.000000  0.000000  0.000000   
Gain Loss On Sale Of PPE                 0.000000  0.000000  0.000000   
Net Income From Continuing Operations    0.000000  0.000000  0.000000   

                                            Close       Volume  Dividends  \
2010-06-29 00:00:00-04:00          

In [31]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

WMT data:
                                              Open       High        Low  \
2003-08-05 00:00:00-04:00                37.080105  37.448229  36.973015   
2003-08-06 00:00:00-04:00                36.819075  37.642336  36.819075   
2003-08-07 00:00:00-04:00                37.802949  38.485651  37.669085   
2003-08-08 00:00:00-04:00                38.365174  38.679753  38.258084   
2003-08-11 00:00:00-04:00                38.739995  39.021107  38.338404   
...                                            ...        ...        ...   
Earnings Losses From Equity Investments   0.000000   0.000000   0.000000   
Gain Loss On Investment Securities        0.000000   0.000000   0.000000   
Gain Loss On Sale Of PPE                  0.000000   0.000000   0.000000   
Gain Loss On Sale Of Business             0.000000   0.000000   0.000000   
Net Income From Continuing Operations     0.000000   0.000000   0.000000   

                                             Close      Volume  Dividends  \


In [32]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT,AAPL'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

WMT,AAPL: No timezone found, symbol may be delisted
WMT,AAPL: No timezone found, symbol may be delisted
WMT,AAPL: No timezone found, symbol may be delisted
WMT,AAPL: No timezone found, symbol may be delisted


TypeError: cannot concatenate object of type '<class 'list'>'; only Series and DataFrame objs are valid

In [33]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT','AAPL'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

WMT data:
                                              Open       High        Low  \
2003-08-05 00:00:00-04:00                37.080101  37.448225  36.973011   
2003-08-06 00:00:00-04:00                36.819071  37.642332  36.819071   
2003-08-07 00:00:00-04:00                37.802968  38.485670  37.669104   
2003-08-08 00:00:00-04:00                38.365197  38.679776  38.258106   
2003-08-11 00:00:00-04:00                38.740014  39.021126  38.338423   
...                                            ...        ...        ...   
Earnings Losses From Equity Investments   0.000000   0.000000   0.000000   
Gain Loss On Investment Securities        0.000000   0.000000   0.000000   
Gain Loss On Sale Of PPE                  0.000000   0.000000   0.000000   
Gain Loss On Sale Of Business             0.000000   0.000000   0.000000   
Net Income From Continuing Operations     0.000000   0.000000   0.000000   

                                             Close      Volume  Dividends  \


In [34]:
cashflow_data = t.cashflow
print(cashflow_data)

NameError: name 't' is not defined

In [35]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)

In [36]:
cashflow_data = t.cashflow
print('cashflow_data')

NameError: name 't' is not defined

In [37]:
cashflow_data = t.cashflow
print(cashflow_data)

NameError: name 't' is not defined

In [38]:
cashflow_data = t.cashflow
print(cashflow_data)

NameError: name 't' is not defined

In [39]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

In [40]:
tickers = ['WMT'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

In [41]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

In [42]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        info_data = t.info
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

In [43]:
info_data = t.info
print(info_data)

NameError: name 't' is not defined

In [44]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        info_data = t.info
        print(info_data)
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

{'address1': '702 South West 8th Street', 'city': 'Bentonville', 'state': 'AR', 'zip': '72716', 'country': 'United States', 'phone': '479 273 4000', 'website': 'https://www.stock.walmart.com', 'industry': 'Discount Stores', 'industryDisp': 'Discount Stores', 'sector': 'Consumer Defensive', 'longBusinessSummary': "Walmart Inc. engages in the operation of retail, wholesale, and other units worldwide. The company operates through three segments: Walmart U.S., Walmart International, and Sam's Club. It operates supercenters, supermarkets, hypermarkets, warehouse clubs, cash and carry stores, and discount stores under Walmart and Walmart Neighborhood Market brands; membership-only warehouse clubs; ecommerce websites, such as walmart.com, walmart.com.mx, walmart.ca, flipkart.com, and samsclub.com; and mobile commerce applications. The company offers grocery and consumables, including dry grocery, snacks, dairy, meat, produce, bakery and deli, alcoholic and nonalcoholic beverages, floral, cand

In [45]:
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT','AAPL'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

In [46]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT','AAPL'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"{ticker} data:")
    print(data)

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

TypeError: Feature names are only supported if all input features have string names, but your input has ['Timestamp', 'str'] as feature name / column name types. If you want feature names to be stored and validated, you must convert them all to strings, by using X.columns = X.columns.astype(str) for example. Otherwise you can remove feature / column names from your input data, or convert them all to a non-string data type.

In [47]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT','AAPL'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
#or ticker, data in all_data.items():
    #print(f"{ticker} data:")
   # print(data)

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT''AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted


Empty DataFrame
Columns: []
Index: []


TypeError: cannot concatenate object of type '<class 'list'>'; only Series and DataFrame objs are valid

In [48]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = t.actions
        dividends_data = t.dividends
        splits_data = t.splits
        income_stmt_data = t.income_stmt
        balance_sheet_data = t.balance_sheet
        cashflow_data = t.cashflow
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

tickers = ['WMT','AAPL'] # replace with the tickers you're interested in
all_data = fetch_data(tickers)
#or ticker, data in all_data.items():
    #print(f"{ticker} data:")
   # print(data)

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT''AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted


Empty DataFrame
Columns: []
Index: []


TypeError: cannot concatenate object of type '<class 'list'>'; only Series and DataFrame objs are valid

In [49]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        splits_data = getattr(t, 'splits', pd.DataFrame())
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT''AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted


Empty DataFrame
Columns: []
Index: []


TypeError: cannot concatenate object of type '<class 'list'>'; only Series and DataFrame objs are valid

In [50]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        actions_data = actions_data if isinstance(actions_data, pd.DataFrame) else actions_data.to_frame()
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        dividends_data = dividends_data if isinstance(dividends_data, pd.DataFrame) else dividends_data.to_frame()
        splits_data = getattr(t, 'splits', pd.DataFrame())
        splits_data = splits_data if isinstance(splits_data, pd.DataFrame) else splits_data.to_frame()
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        income_stmt_data = income_stmt_data if isinstance(income_stmt_data, pd.DataFrame) else income_stmt_data.to_frame()
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        balance_sheet_data = balance_sheet_data if isinstance(balance_sheet_data, pd.DataFrame) else balance_sheet_data.to_frame()
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        cashflow_data = cashflow_data if isinstance(cashflow_data, pd.DataFrame) else cashflow_data.to_frame()
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT''AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted


AttributeError: 'list' object has no attribute 'to_frame'

In [51]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT''AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted


Empty DataFrame
Columns: []
Index: []


ValueError: Found array with 0 sample(s) (shape=(0, 6)) while a minimum of 1 is required by MinMaxScaler.

In [52]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data['WMT'] = data
    return all_data

In [53]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data['WMT'] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT''AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted
WMTAAPL: No timezone found, symbol may be delisted


Empty DataFrame
Columns: []
Index: []


ValueError: Found array with 0 sample(s) (shape=(0, 6)) while a minimum of 1 is required by MinMaxScaler.

In [54]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
        tickers = ['WMT', 'AAPL']
    return all_data

In [55]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
        tickers = ['WMT', 'AAPL']
    return all_data

print(all_data)

{'WMT': Empty DataFrame
Columns: [Open, High, Low, Close, Adj Close, Volume]
Index: []}


In [56]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error
from math import sqrt
import yfinance as yf
from datetime import datetime
def fetch_data(tickers):
    all_data = {}
    for ticker in tickers:
        all_data[ticker] = yf.download(ticker)
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled, new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(len(predicted_price), data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price[:, 0]
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL', 'MSFT']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    X_train, y_train, X_test, y_test = X[:int(len(X)*0.8)], y[:int(len(y)*0.8)], X[int(len(X)*0.8):], y[int(len(y)*0.8):]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=2, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test)))

print(f"Stocks to perform better in the future: {max(rmse_scores, key=rmse_scores.get)}")
print(predictions)
#%%

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
        tickers = ['WMT', 'AAPL']
    return all_data

tickers = ['WMT','AAPL']
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"Data for {ticker}: {data}")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Epoch 1/2


KeyboardInterrupt: 

In [57]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
        tickers = ['WMT', 'AAPL']
    return all_data

tickers = ['WMT','AAPL']
all_data = fetch_data(tickers)
for ticker, data in all_data.items():
    print(f"Data for {ticker}: {data}")

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

In [58]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
        tickers = ['WMT', 'AAPL']

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT','AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

                                                   2023-01-31     2022-01-31  \
Free Cash Flow                                  12244000000.0  11075000000.0   
Repurchase Of Capital Stock                     -9920000000.0  -9787000000.0   
Repayment Of Debt                               -2689000000.0 -13010000000.0   
Issuance Of Debt                                 5041000000.0   6945000000.0   
Capital Expenditure                            -16857000000.0 -13106000000.0   
Interest Paid Supplemental Data                  2051000000.0   2237000000.0   
Income Tax Paid Supplemental Data                3310000000.0   5918000000.0   
End Cash Position                                9101000000.0  14834000000.0   
Other Cash Adjustment Outside Changein Cash               0.0   1848000000.0   
Beginning Cash Position                         14834000000.0  17788000000.0   
Effect Of Exchange Rate Changes                   -73000000.0   -140000000.0   
Changes In Cash                         

AttributeError: 'NoneType' object has no attribute 'items'

In [60]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))

        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)

        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)

        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)

        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)

        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)

        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)

        print(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
     tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)  # All columns are considered for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values   # All columns are considered
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])  # Shift the window
        dummy = np.zeros(shape=(1, data.shape[1]))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT','AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}

for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))  # Use 80% of the data for training
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 57)

In [61]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error
from math import sqrt
import yfinance as yf
from datetime import datetime

def fetch_data(tickers):
    all_data = {}
    for ticker in tickers:
        all_data[ticker] = yf.download(ticker)
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude 'Date' column for scaling
    X, y = [], []
    for i in range(60, len(data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, 1:].values   # Exclude 'Date' column
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled, new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(len(predicted_price), data.shape[1] - 1))  # Create dummy array with shape (n, 17)
        dummy[:, 3] = predicted_price[:, 0]
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL', 'MSFT']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}

for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    X_train, y_train, X_test, y_test = X[:int(len(X)*0.8)], y[:int(len(y)*0.8)], X[int(len(X)*0.8):], y[int(len(y)*0.8):]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=2, validation_split=0.2)  # Increase epochs to 30 and add validation split
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test)))

print(f"Stocks to perform better in the future: {max(rmse_scores, key=rmse_scores.get)}")
print(predictions)
#%%

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt

def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))

        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)

        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)

        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)

        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)

        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)

        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)

        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data  # Ensure you return the fetched data

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)

def process_data(data):
    # rest of your code...

def get_model(input_shape):
    # rest of your code...

def predict(model, ticker, data, scaler):
    # rest of your code...

predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}

for ticker, data in all_data.items():
    # rest of your code...

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

IndentationError: expected an indented block after function definition on line 131 (<ipython-input-61-a6bbe3fd765b>, line 134)

In [62]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)
    X, y = [], []
    for i in range(60, len(scaled_data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(1, data.shape[1]))
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)

TypeError: Feature names are only supported if all input features have string names, but your input has ['Timestamp', 'str'] as feature name / column name types. If you want feature names to be stored and validated, you must convert them all to strings, by using X.columns = X.columns.astype(str) for example. Otherwise you can remove feature / column names from your input data, or convert them all to a non-string data type.

In [63]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd
import numpy as np
from math import sqrt
def fetch_data(tickers):
    start_date = datetime.now() - timedelta(days=20*365)  # Get date 20 years ago
    all_data = {}
    for ticker in tickers:
        t = yf.Ticker(ticker)
        historical_data = t.history(start=start_date.strftime('%Y-%m-%d'))
        actions_data = getattr(t, 'actions', pd.DataFrame())
        if isinstance(actions_data, pd.Series):
            actions_data = actions_data.to_frame()
        elif isinstance(actions_data, list):
            actions_data = pd.DataFrame(actions_data)
        dividends_data = getattr(t, 'dividends', pd.DataFrame())
        if isinstance(dividends_data, pd.Series):
            dividends_data = dividends_data.to_frame()
        elif isinstance(dividends_data, list):
            dividends_data = pd.DataFrame(dividends_data)
        splits_data = getattr(t, 'splits', pd.DataFrame())
        if isinstance(splits_data, pd.Series):
            splits_data = splits_data.to_frame()
        elif isinstance(splits_data, list):
            splits_data = pd.DataFrame(splits_data)
        income_stmt_data = getattr(t, 'income_stmt', pd.DataFrame())
        if isinstance(income_stmt_data, pd.Series):
            income_stmt_data = income_stmt_data.to_frame()
        elif isinstance(income_stmt_data, list):
            income_stmt_data = pd.DataFrame(income_stmt_data)
        balance_sheet_data = getattr(t, 'balance_sheet', pd.DataFrame())
        if isinstance(balance_sheet_data, pd.Series):
            balance_sheet_data = balance_sheet_data.to_frame()
        elif isinstance(balance_sheet_data, list):
            balance_sheet_data = pd.DataFrame(balance_sheet_data)
        cashflow_data = getattr(t, 'cashflow', pd.DataFrame())
        if isinstance(cashflow_data, pd.Series):
            cashflow_data = cashflow_data.to_frame()
        elif isinstance(cashflow_data, list):
            cashflow_data = pd.DataFrame(cashflow_data)
        data = pd.concat([historical_data, actions_data, dividends_data, splits_data, income_stmt_data, balance_sheet_data, cashflow_data], axis=1)
        data.columns = data.columns.astype(str)  # Convert column names to string
        data.fillna(0, inplace=True)  # Fill NaN values with 0
        all_data[ticker] = data
    return all_data

def process_data(data):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)
    X, y = [], []
    for i in range(60, len(scaled_data)):
        X.append(scaled_data[i-60:i, :])
        y.append(scaled_data[i, 3])  # 'Close' is the 4th column
    return np.array(X), np.array(y), scaler

def get_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=96, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=96, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def predict(model, ticker, data, scaler):
    last_60_days = data.iloc[-60:, :].values
    last_60_days_scaled = scaler.transform(last_60_days)
    predicted_prices = pd.DataFrame(columns=["Date", "Ticker", "Close"])
    for date in pd.date_range(datetime.now(), periods=60).tolist():
        X_test = np.reshape(last_60_days_scaled, (1, last_60_days_scaled.shape[0], last_60_days_scaled.shape[1]))
        predicted_price = model.predict(X_test)
        new_row = np.concatenate((last_60_days_scaled[-1, 1:], predicted_price.flatten()))
        last_60_days_scaled = np.vstack([last_60_days_scaled[1:], new_row.reshape(1, -1)])
        dummy = np.zeros(shape=(1, data.shape[1]))
        dummy[:, 3] = predicted_price
        transformed_pred = scaler.inverse_transform(dummy)[:, 3]
        prediction_row = pd.DataFrame({"Date": [date], "Ticker": [ticker], "Close": [transformed_pred[0]]})
        predicted_prices = pd.concat([predicted_prices, prediction_row], ignore_index=True)
    return predicted_prices

tickers = ['WMT', 'AAPL']
all_data = fetch_data(tickers)
predictions = pd.DataFrame(columns=["Date", "Ticker", "Close"])
rmse_scores = {}
for ticker, data in all_data.items():
    X, y, scaler = process_data(data)
    train_data_len = int(0.8 * len(X))
    X_train, y_train, X_test, y_test = X[:train_data_len], y[:train_data_len], X[train_data_len:], y[train_data_len:]
    model = get_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, batch_size=1, epochs=1, validation_split=0.2)
    new_predictions = predict(model, ticker, data, scaler)
    predictions = pd.concat([predictions, new_predictions], ignore_index=True)
    rmse_scores[ticker] = sqrt(mean_squared_error(y_test, model.predict(X_test).flatten()))

print(f"Stocks to perform better in the future: {min(rmse_scores, key=rmse_scores.get)}")
print(predictions)









Stocks to perform better in the future: WMT
                          Date Ticker      Close
0   2023-07-31 14:54:44.053838    WMT  -5.065563
1   2023-08-01 14:54:44.053838    WMT  -6.383937
2   2023-08-02 14:54:44.053838    WMT  -9.206388
3   2023-08-03 14:54:44.053838    WMT -12.777159
4   2023-08-04 14:54:44.053838    WMT -15.357391
..                         ...    ...        ...
115 2023-09-24 14:56:16.597874   AAPL   0.183426
116 2023-09-25 14:56:16.597874   AAPL  -3.596249
117 2023-09-26 14:56:16.597874   AAPL  -5.483090
118 2023-09-27 14:56:16.597874   AAPL  -5.320914
119 2023-09-28 14:56:16.597874   AAPL  -3.052348

[120 rows x 3 columns]
