AlphaVantage API Key: 4RAX5IX44VPJZ708


In [2]:
# Load Libraries

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt
import os
import urllib
import json
import datetime as dt






In [3]:
# API key from Alpha Vantage
api_key = '4RAX5IX44VPJZ708'

# Ticker symbol for Amazon
ticker = "AMZN"

# Construct URL to fetch stock data
url_string = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={ticker}&outputsize=full&apikey={api_key}"

# CSV filename to save stock data
file_to_save = f'stock_market_data-{ticker}.csv'


try:
    # Fetch new data from API
    with urllib.request.urlopen(url_string) as url:
        data = json.loads(url.read().decode())
        data = data['Time Series (Daily)']
        new_df = pd.DataFrame(columns=['Date', 'Low', 'High', 'Close', 'Open'])
        for k, v in data.items():
            date = dt.datetime.strptime(k, '%Y-%m-%d')
            data_row = [date.date(), float(v['3. low']), float(v['2. high']),
                        float(v['4. close']), float(v['1. open'])]
            new_df.loc[-1, :] = data_row
            new_df.index = new_df.index + 1
        new_df = new_df.sort_index()

    # Load existing data if it exists
    if os.path.exists(file_to_save):
        existing_df = pd.read_csv(file_to_save)
        existing_df['Date'] = pd.to_datetime(existing_df['Date'])
        existing_df.set_index('Date', inplace=True)

        # Combine new data with existing data
        combined_df = pd.concat([existing_df, new_df]).drop_duplicates()
        combined_df.reset_index(drop=True, inplace=True)
    else:
        combined_df = new_df

    # Save updated data back to CSV
    combined_df.to_csv(file_to_save, index=False)
    print("Data updated and saved.")

except Exception as e:
    print(f"An error occurred while updating data: {e}")


Data updated and saved.


In [21]:
import pandas as pd

# Ensure the index is of datetime type and sort it
combined_df.index = pd.to_datetime(combined_df.index)
combined_df.sort_index(inplace=True)

# Now try filtering the data from 2020 onwards
df_2020 = combined_df['2020-01-01':]

# Display the filtered DataFrame
print(df_2020.head())




Empty DataFrame
Columns: [Low, High, Close, Open, Date]
Index: []


In [None]:
# Correctly checking if the index is monotonic increasing
if df.index.is_monotonic_increasing:
    df_2020 = df['2020-01-01':]
    print("Index is monotonic increasing. Data filtered successfully.")
    print(df_2020.head())
else:
    print("Index is not monotonic increasing. Sorting might be needed.")


In [None]:
# Check if the starting date exists in the index, if not, adjust
start_date = '2020-01-01'
if start_date in df.index:
    df_2020 = df[start_date:]
else:
    # Find the next date after '2020-01-01' in the index
    start_date = df[df.index >= start_date].index.min()
    df_2020 = df[start_date:]

print(df_2020.head())


In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Assume df is your DataFrame and it includes a 'Close' price column
close_prices = df['Close'].values.reshape(-1, 1)  # Reshape for scaling

# Initialize scaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Scale the 'Close' prices
scaled_close = scaler.fit_transform(close_prices)

# Now you can safely call the function
sequence_length = 60
X, y = create_sequences(scaled_close, sequence_length)


In [None]:
# Function to create sequences of data
def create_sequences(data, sequence_length):
    X = []
    y = []
    for i in range(sequence_length, len(data)):
        X.append(data[i-sequence_length:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

# Create sequences with a window of the last 60 days
sequence_length = 60
X, y = create_sequences(scaled_close, sequence_length)



In [None]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Reshape input to be [samples, time steps, features] which is required for LSTM
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.losses import MeanSquaredError

# Assuming 'model' is already defined
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    LSTM(50),
    Dense(1)  # Predicting a single continuous value
])

# Compile the model with a suitable loss function for regression
model.compile(optimizer='adam', loss=MeanSquaredError(), metrics=['mae'])


In [None]:
# Now, fit the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)



In [None]:
# Plotting training and validation losses
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Losses')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Making predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)
y_test_scaled = scaler.inverse_transform(y_test.reshape(-1, 1))



In [None]:
# After making predictions
# ... [Code to make predictions] ...

# Get the last `len(predictions)` dates from the DataFrame for plotting
plot_dates = df.index[-len(predictions):]

# Plotting actual vs. predicted prices
plt.figure(figsize=(14, 5))
plt.plot(plot_dates, y_test_scaled, color='blue', label='Actual AMZN Close Price')
plt.plot(plot_dates, predictions, color='red', linestyle='dashed', label='Predicted AMZN Close Price')
plt.title('AMZN Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('AMZN Stock Price')
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Assuming 'predictions' and 'y_test_scaled' are the predicted and actual prices from the test set

# Calculate MAE, MSE, RMSE, and R2
mae = mean_absolute_error(y_test_scaled, predictions)
mse = mean_squared_error(y_test_scaled, predictions)
rmse = np.sqrt(mse)  # Or directly use mean_squared_error(..., squared=False)
r2 = r2_score(y_test_scaled, predictions)

# Print out the metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")
