Antor V1

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go

# Step 1: Data Collection
# Fetch Historical INTEL Stock Price Data from Yahoo Finance from starting to today
INTC_data = yf.download(tickers='INTC', start='2013-01-01', end='2023-12-29', interval='1d')
df = INTC_data.reset_index()

# Step 2: Data Preprocessing
df = df[['Date', 'Close']].dropna()
df.columns = ['Date', 'Price']

# Step 3: Feature Engineering
df['Previous_Price'] = df['Price'].shift(1)
df.dropna(inplace=True)

# Using only the 'Previous_Price' as a feature
X = df[['Previous_Price']]
y = df['Price']

# Splitting the data for training
train_data = df[df['Date'] < '2023-12-29']

X_train = train_data[['Previous_Price']]
y_train = train_data['Price']

# Step 4: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions for the next trading day
features_next_day = pd.DataFrame([[df['Price'].iloc[-1]]], columns=['Previous_Price'])
predicted_price_next_day = model.predict(features_next_day)[0]

# Print Predictions
next_trading_day = df['Date'].iloc[-1] + pd.Timedelta(days=1)
print(f"Predicted INTEL Price for {next_trading_day.date()}: ${predicted_price_next_day:.2f}")

# Visualization using Plotly
fig = go.Figure()

# Adding historical data to the plot
fig.add_trace(go.Scatter(x=df['Date'], y=df['Price'], mode='lines', name='Historical Price'))

# Highlighting the last known price
fig.add_trace(go.Scatter(x=[df['Date'].iloc[-1]], y=[df['Price'].iloc[-1]], mode='markers', name='Last Known Price', marker=dict(color='blue', size=10)))

# Highlighting predicted price for the next trading day
fig.add_trace(go.Scatter(x=[next_trading_day], y=[predicted_price_next_day], mode='markers', name='Predicted Price', marker=dict(color='red', size=10)))

# Update layout for better visualization
fig.update_layout(
    title=f'INTEL Price Prediction for {next_trading_day.date()}',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    annotations=[
        dict(
            x=next_trading_day,
            y=predicted_price_next_day,
            xref="x",
            yref="y",
            text=f"Predicted: ${predicted_price_next_day:.2f}",
            showarrow=True,
            arrowhead=5,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()


**Antor v2** </br>
Time zoon Added.

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz

# Define the timezone you want to use
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
# Fetch Historical INTEL Stock Price Data from Yahoo Finance from starting to today
INTC_data = yf.download(tickers='INTC', start='2013-01-01', end='2024-2-29', interval='1d')
df = INTC_data.reset_index()

# Step 2: Data Preprocessing
df = df[['Date', 'Close']].dropna()
df.columns = ['Date', 'Price']

# Convert the 'Date' column to the user's timezone
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 3: Feature Engineering
df['Previous_Price'] = df['Price'].shift(1)
df.dropna(inplace=True)

# Using only the 'Previous_Price' as a feature
X = df[['Previous_Price']]
y = df['Price']

# Splitting the data for training
train_data = df[df['Date'] < df['Date'].iloc[-1]]

X_train = train_data[['Previous_Price']]
y_train = train_data['Price']

# Step 4: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions for the next trading day
features_next_day = pd.DataFrame([[df['Price'].iloc[-1]]], columns=['Previous_Price'])
predicted_price_next_day = model.predict(features_next_day)[0]

# Print Predictions
next_trading_day = df['Date'].iloc[-1] + pd.Timedelta(days=1)
print(f"Predicted INTEL Price for {next_trading_day.strftime('%Y-%m-%d %H:%M:%S %Z%z')}: ${predicted_price_next_day:.2f}")

# Visualization using Plotly
fig = go.Figure()

# Adding historical data to the plot
fig.add_trace(go.Scatter(x=df['Date'], y=df['Price'], mode='lines', name='Historical Price'))

# Highlighting the last known price
fig.add_trace(go.Scatter(x=[df['Date'].iloc[-1]], y=[df['Price'].iloc[-1]], mode='markers', name='Last Known Price', marker=dict(color='blue', size=10)))

# Highlighting predicted price for the next trading day
fig.add_trace(go.Scatter(x=[next_trading_day], y=[predicted_price_next_day], mode='markers', name='Predicted Price', marker=dict(color='red', size=10)))

# Update layout for better visualization
fig.update_layout(
    title=f"INTEL Price Prediction for {next_trading_day.strftime('%Y-%m-%d %H:%M:%S %Z%z')}",
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    annotations=[
        dict(
            x=next_trading_day,
            y=predicted_price_next_day,
            xref="x",
            yref="y",
            text=f"Predicted: ${predicted_price_next_day:.2f}",
            showarrow=True,
            arrowhead=5,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()



In [None]:
# features: moving averages, exponential moving averages, and RSI

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz
import numpy as np

# Define the timezone you want to use
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
# Fetch Historical INTEL Stock Price Data from Yahoo Finance from starting to today
INTC_data = yf.download(tickers='INTC', start='2013-01-01', end='2024-03-22', interval='1d')
df = INTC_data.reset_index()

# Step 2: Data Preprocessing
df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']].dropna()
df.columns = ['Date', 'Open', 'High', 'Low', 'Price', 'Volume']

# Convert the 'Date' column to the user's timezone
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 3: Feature Engineering
# Previous day's price
df['Previous_Price'] = df['Price'].shift(1)

# Moving averages
df['MA_5'] = df['Price'].rolling(window=5).mean()
df['MA_10'] = df['Price'].rolling(window=10).mean()

# Exponential moving averages
df['EMA_5'] = df['Price'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Price'].ewm(span=10, adjust=False).mean()

# Relative Strength Index (RSI)
delta = df['Price'].diff()
gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)
avg_gain = pd.Series(gain).rolling(window=14).mean()
avg_loss = pd.Series(loss).rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Drop rows with NaN values resulting from moving averages and RSI calculation
df.dropna(inplace=True)

# Using the engineered features
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI']
X = df[features]
y = df['Price']

# Splitting the data for training
train_data = df[df['Date'] < df['Date'].iloc[-1]]

X_train = train_data[features]
y_train = train_data['Price']

# Step 4: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions for the next trading day
features_next_day = pd.DataFrame([df.iloc[-1][features].values], columns=features)
predicted_price_next_day = model.predict(features_next_day)[0]

# Print Predictions
next_trading_day = df['Date'].iloc[-1] + pd.Timedelta(days=1)
print(f"Predicted INTEL Price for {next_trading_day.strftime('%Y-%m-%d %H:%M:%S %Z%z')}: ${predicted_price_next_day:.2f}")

# Visualization using Plotly
fig = go.Figure()

# Adding historical data to the plot
fig.add_trace(go.Scatter(x=df['Date'], y=df['Price'], mode='lines', name='Historical Price'))

# Highlighting the last known price
fig.add_trace(go.Scatter(x=[df['Date'].iloc[-1]], y=[df['Price'].iloc[-1]], mode='markers', name='Last Known Price', marker=dict(color='blue', size=10)))

# Highlighting predicted price for the next trading day
fig.add_trace(go.Scatter(x=[next_trading_day], y=[predicted_price_next_day], mode='markers', name='Predicted Price', marker=dict(color='red', size=10)))

# Update layout for better visualization
fig.update_layout(
    title=f"INTEL Price Prediction for {next_trading_day.strftime('%Y-%m-%d %H:%M:%S %Z%z')}",
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    annotations=[
        dict(
            x=next_trading_day,
            y=predicted_price_next_day,
            xref="x",
            yref="y",
            text=f"Predicted: ${predicted_price_next_day:.2f}",
            showarrow=True,
            arrowhead=5,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()


In [None]:
# Added time zoon, date in graph, features: moving averages, exponential moving averages, and RSI

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz
import numpy as np

# Define the timezone you want to use
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
# Fetch Historical INTEL Stock Price Data from Yahoo Finance from starting to today
INTC_data = yf.download(tickers='INTC', start='2013-01-01', end='2024-03-22', interval='1d')
df = INTC_data.reset_index()

# Step 2: Data Preprocessing
df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']].dropna()
df.columns = ['Date', 'Open', 'High', 'Low', 'Price', 'Volume']

# Convert the 'Date' column to the user's timezone
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 3: Feature Engineering
# Previous day's price
df['Previous_Price'] = df['Price'].shift(1)

# Moving averages
df['MA_5'] = df['Price'].rolling(window=5).mean()
df['MA_10'] = df['Price'].rolling(window=10).mean()

# Exponential moving averages
df['EMA_5'] = df['Price'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Price'].ewm(span=10, adjust=False).mean()

# Relative Strength Index (RSI)
delta = df['Price'].diff()
gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)
avg_gain = pd.Series(gain).rolling(window=14).mean()
avg_loss = pd.Series(loss).rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Drop rows with NaN values resulting from moving averages and RSI calculation
df.dropna(inplace=True)

# Using the engineered features
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI']
X = df[features]
y = df['Price']

# Splitting the data for training
train_data = df[df['Date'] < df['Date'].iloc[-1]]

X_train = train_data[features]
y_train = train_data['Price']

# Step 4: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions for the next trading day
features_next_day = pd.DataFrame([df.iloc[-1][features].values], columns=features)
predicted_price_next_day = model.predict(features_next_day)[0]

# Print Predictions
next_trading_day = df['Date'].iloc[-1] + pd.Timedelta(days=1)
print(f"Predicted INTEL Price for {next_trading_day.strftime('%Y-%m-%d %H:%M:%S %Z%z')}: ${predicted_price_next_day:.2f}")

# Visualization using Plotly
fig = go.Figure()

# Adding historical data to the plot
fig.add_trace(go.Scatter(x=df['Date'], y=df['Price'], mode='lines', name='Historical Price'))

# Highlighting the last known price with the full date
last_known_price_date = df['Date'].iloc[-1].strftime('%d.%m.%Y')
last_known_price = df['Price'].iloc[-1]
fig.add_trace(go.Scatter(x=[df['Date'].iloc[-1]], y=[last_known_price], mode='markers+text', name='Last Known Price', marker=dict(color='blue', size=10), text=[last_known_price_date], textposition="top center"))

# Highlighting predicted price for the next trading day with the full date
predicted_price_date = next_trading_day.strftime('%d.%m.%Y')
fig.add_trace(go.Scatter(x=[next_trading_day], y=[predicted_price_next_day], mode='markers+text', name='Predicted Price', marker=dict(color='red', size=10), text=[predicted_price_date], textposition="bottom center"))

# Update layout for better visualization
fig.update_layout(
    title=f"INTEL Price Prediction for {predicted_price_date}",
    xaxis_title='Date',
    yaxis_title='Price (USD)'
)

# Show the interactive plot
fig.show()

Shekhor v1

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2023-03-22', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering
df['Previous_Price'] = df['Close'].shift(1)
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI']
X = df[features]
y = df['Close']

train_data = df.iloc[:-7]
test_data = df.iloc[-7:]

# Step 3: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(train_data[features], train_data['Close'])

# Step 4: Predicting Prices for Next 7 Days
predictions = model.predict(test_data[features])

# Step 5: Visualization using Plotly
fig = go.Figure()

# Historical Data
fig.add_trace(go.Scatter(x=train_data['Date'], y=train_data['Close'], mode='lines', name='Historical Prices',
                         line=dict(color='royalblue')))

# Predicted Data
predicted_dates = [test_data['Date'].iloc[-1] + pd.Timedelta(days=i) for i in range(1, 8)]
fig.add_trace(go.Scatter(x=predicted_dates, y=predictions, mode='lines+markers', name='Predicted Prices',
                         line=dict(color='red')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction for Next 7 Days (Up to January 8th, 2024)',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    annotations=[
        dict(
            x=predicted_dates[0],
            y=predictions[0],
            xref="x",
            yref="y",
            text=f"Predicted Price: ${predictions[0]:.2f}",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()


shekhor v2

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2023-03-22', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering
df['Previous_Price'] = df['Close'].shift(1)
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI']
X = df[features]
y = df['Close']

train_data = df.iloc[:-7]
test_data = df.iloc[-7:]

# Step 3: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(train_data[features], train_data['Close'])

# Step 4: Predicting Prices for Next 7 Days
predictions = model.predict(test_data[features])

# Step 5: Visualization using Plotly
fig = go.Figure()

# Historical Data
fig.add_trace(go.Scatter(x=train_data['Date'], y=train_data['Close'], mode='lines', name='Historical Prices',
                         line=dict(color='royalblue')))

# Predicted Data
predicted_dates = [test_data['Date'].iloc[-1] + pd.Timedelta(days=i) for i in range(1, 8)]
fig.add_trace(go.Scatter(x=predicted_dates, y=predictions, mode='lines+markers', name='Predicted Prices',
                         line=dict(color='red')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction for Next 7 Days (Up to January 8th, 2024)',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),  # Corrected this line
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    annotations=[
        dict(
            x=predicted_dates[0],
            y=predictions[0],
            xref="x",
            yref="y",
            text=f"Predicted Price: ${predictions[0]:.2f}",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()


shekhor v3

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pytz

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2023-03-22', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering
df['Previous_Price'] = df['Close'].shift(1)
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI']
X = df[features]
y = df['Close']

train_data = df.iloc[:-7]
test_data = df.iloc[-7:]

# Step 3: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(train_data[features], train_data['Close'])

# Step 4: Predicting Prices for Next 7 Days
predictions = model.predict(test_data[features])

# Create Candlestick Chart
candlestick = go.Candlestick(x=train_data['Date'],
                             open=train_data['Open'],
                             high=train_data['High'],
                             low=train_data['Low'],
                             close=train_data['Close'],
                             name='Candlesticks')

# Create Volume Bars
volume_bars = go.Bar(x=train_data['Date'],
                     y=train_data['Volume'],
                     marker_color=train_data['Close'].diff().apply(lambda x: 'green' if x >= 0 else 'red'),
                     name='Volume')

# Create Subplots
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    vertical_spacing=0.1, subplot_titles=("Candlestick Chart", "Volume"))

# Add traces to subplots
fig.add_trace(candlestick, row=1, col=1)
fig.add_trace(volume_bars, row=2, col=1)

# Add Predicted Prices as Annotations
for i, date in enumerate(test_data['Date']):
    fig.add_annotation(x=date, y=predictions[i],
                       text=f"Predicted Price: ${predictions[i]:.2f}",
                       showarrow=True,
                       arrowhead=7,
                       ax=0,
                       ay=-40)

# Update layout for interactivity
fig.update_layout(title='INTEL Stock Price Prediction with Candlestick and Volume',
                  xaxis_title='Date',
                  hovermode='x unified',  # Unified hover mode
                  xaxis=dict(type='date',
                             rangeslider=dict(visible=True),
                             rangeselector=dict(
                                 buttons=list([
                                     dict(count=1, label="1d", step="day", stepmode="backward"),
                                     dict(count=7, label="1w", step="day", stepmode="backward"),
                                     dict(count=1, label="1m", step="month", stepmode="backward"),
                                     dict(count=6, label="6m", step="month", stepmode="backward"),
                                     dict(count=1, label="YTD", step="year", stepmode="todate"),
                                     dict(count=1, label="1y", step="year", stepmode="backward"),
                                     dict(step="all")
                                 ])
                             )),
                  margin=dict(l=50, r=50, t=50, b=50),
                  showlegend=True,
                  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
                  )

# Show the interactive plot
fig.show()


shekhor V4

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-03-25', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']
X = df[features]
y = df['Close']

train_data = df.iloc[:-7]
test_data = df.iloc[-7:]

# Step 3: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(train_data[features], train_data['Close'])

# Step 4: Predicting Prices for Next 7 Days
predictions = model.predict(test_data[features])

# Step 5: Visualization using Plotly
fig = go.Figure()

# Historical Data
fig.add_trace(go.Scatter(x=train_data['Date'], y=train_data['Close'], mode='lines', name='Historical Prices',
                         line=dict(color='royalblue')))

# Predicted Data
predicted_dates = [test_data['Date'].iloc[-1] + pd.Timedelta(days=i) for i in range(1, 8)]
fig.add_trace(go.Scatter(x=predicted_dates, y=predictions, mode='lines+markers', name='Predicted Prices',
                         line=dict(color='red')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction for Next 7 Days (Up to January 8th, 2024)',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),  # Corrected this line
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    annotations=[
        dict(
            x=predicted_dates[0],
            y=predictions[0],
            xref="x",
            yref="y",
            text=f"Predicted Price: ${predictions[0]:.2f}",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()

shekhor v5


In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-04-18', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']
X = df[features]
y = df['Close']

train_data = df.iloc[:-7]
test_data = df.iloc[-7:]

# Step 3: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(train_data[features], train_data['Close'])

# Step 4: Predicting Prices for Next 7 Days
predictions = model.predict(test_data[features])

# Step 5: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data
predicted_dates = [df['Date'].iloc[-1] + pd.Timedelta(days=i) for i in range(1, 8)]
fig.add_trace(go.Scatter(x=predicted_dates, y=predictions, mode='lines+markers', name='Predicted Prices',
                         line=dict(color='red')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction for Next 7 Days',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    annotations=[
        dict(
            x=predicted_dates[0],
            y=predictions[0],
            xref="x",
            yref="y",
            text=f"Predicted Price: ${predictions[0]:.2f}",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()

[*********************100%%**********************]  1 of 1 completed


Shehor V6

In [None]:
def vsa(df):
    # Calculate Volume Spread Analysis (VSA)
    df['Close_diff'] = df['Close'].diff()
    df['VSA'] = (df['Close_diff'] > 0) * df['Volume'] - (df['Close_diff'] < 0) * df['Volume']
    return df['VSA']


In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-04-18', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering
# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Volume Spread Analysis (VSA)
df['Close_diff'] = df['Close'].diff()
df['VSA'] = (df['Close_diff'] > 0) * df['Volume'] - (df['Close_diff'] < 0) * df['Volume']

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line', 'VSA']
X = df[features]
y = df['Close']

train_data = df.iloc[:-7]
test_data = df.iloc[-7:]

# Step 3: Model Training
# Define hyperparameters for tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize RandomForestRegressor
rf_regressor = RandomForestRegressor(random_state=42)

# Perform Grid Search Cross Validation
grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(train_data[features], train_data['Close'])

# Get the best parameters
best_params = grid_search.best_params_

# Train RandomForestRegressor with the best parameters
model = RandomForestRegressor(**best_params, random_state=42)
model.fit(train_data[features], train_data['Close'])

# Step 4: Predicting Prices for Next 7 Days
predictions = model.predict(test_data[features])

# Step 5: Model Evaluation
train_predictions = model.predict(train_data[features])
train_mae = mean_absolute_error(train_data['Close'], train_predictions)
train_rmse = mean_squared_error(train_data['Close'], train_predictions, squared=False)

test_mae = mean_absolute_error(test_data['Close'], predictions)
test_rmse = mean_squared_error(test_data['Close'], predictions, squared=False)

print("Train MAE:", train_mae)
print("Train RMSE:", train_rmse)
print("Test MAE:", test_mae)
print("Test RMSE:", test_rmse)

# Step 6: Visualization using Plotly
# Your existing visualization code...

# Enhancements for Interactivity and Visualization
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data
predicted_dates = [df['Date'].iloc[-1] + pd.Timedelta(days=i) for i in range(1, 8)]
fig.add_trace(go.Scatter(x=predicted_dates, y=predictions, mode='lines+markers', name='Predicted Prices',
                         line=dict(color='red')))

# Individual Graphs for Different Feature Engineering Scores
for feature in features:
    fig_feature = go.Figure()
    fig_feature.add_trace(go.Scatter(x=df['Date'], y=df[feature], mode='lines', name=feature))
    fig_feature.update_layout(
        title=f'{feature} over Time',
        xaxis_title='Date',
        yaxis_title=feature,
        hovermode='x'
    )
    fig_feature.show()


[*********************100%%**********************]  1 of 1 completed


Train MAE: 0.1253318063987025
Train RMSE: 0.2138627208044392
Test MAE: 0.263760696919789
Test RMSE: 0.28740620578647635


In [None]:
# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction for Next 7 Days',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    annotations=[
        dict(
            x=predicted_dates[0],
            y=predictions[0],
            xref="x",
            yref="y",
            text=f"Predicted Price: ${predictions[0]:.2f}",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()

Shekhor V7

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-04-01', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']
X = df[features]
y = df['Close']

train_data = df.iloc[:-7]
test_data = df.iloc[-7:]

# Step 3: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(train_data[features], train_data['Close'])

# Step 4: Predicting Prices for Next 7 Days
predictions = model.predict(test_data[features])

# Calculate absolute percentage difference for each day
absolute_percentage_diff = abs((predictions - test_data['Close']) / test_data['Close'])

# Calculate average accuracy ratio
accuracy_ratio = absolute_percentage_diff.mean()

# Calculate average accuracy in percentage
accuracy_percentage = (1 - accuracy_ratio) * 100
print("Accuracy:", accuracy_percentage, "%")

# Step 5: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data
predicted_dates = [df['Date'].iloc[-1] + pd.Timedelta(days=i) for i in range(1, 8)]
fig.add_trace(go.Scatter(x=predicted_dates, y=predictions, mode='lines+markers', name='Predicted Prices',
                         line=dict(color='red')))

# Actual Data for the next 7 days
actual_dates = [df['Date'].iloc[-1] + pd.Timedelta(days=i) for i in range(1, 8)]
fig.add_trace(go.Scatter(x=actual_dates, y=test_data['Close'], mode='lines+markers', name='Actual Prices',
                         line=dict(color='blue')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction for Next 7 Days',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    annotations=[
        dict(
            x=predicted_dates[0],
            y=predictions[0],
            xref="x",
            yref="y",
            text=f"Predicted Price: ${predictions[0]:.2f}",
            showarrow=True,
            arrowhead=7,
            ax=0,
            ay=-40
        )
    ]
)

# Show the interactive plot
fig.show()


[*********************100%%**********************]  1 of 1 completed


Accuracy: 99.32804822012162 %


In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-03-31', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

# Step 3: Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(df[features], df['Close'])

# Step 4: Predicting Prices for Next 7 Days
predictions = model.predict(df[features])

# Calculate absolute percentage difference for each day
absolute_percentage_diff = abs((predictions - df['Close']) / df['Close'])

# Calculate average accuracy ratio
accuracy_ratio = (1 - absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio:", accuracy_ratio, "%")

# Step 5: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data
fig.add_trace(go.Scatter(x=df['Date'], y=predictions, mode='lines', name='Predicted Prices', line=dict(color='red')))

# Actual Data
fig.add_trace(go.Scatter(x=df['Date'], y=df['Close'], mode='lines', name='Actual Prices', line=dict(color='blue')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction vs. Actual Prices',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    yaxis=dict(
        autorange=True,  # Enable vertical zooming
        type='linear'  # Linear scale for y-axis
    )
)

# Show the interactive plot
fig.show()


[*********************100%%**********************]  1 of 1 completed


Accuracy Ratio: 99.74740503091665 %


In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-03-31', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

# Calculate the indices for the splits
train_size = int(len(df) * 0.85)
validation_size = int(len(df) * 0.12)
test_size = len(df) - train_size - validation_size

# Split the data
train_df = df[:train_size]
validation_df = df[train_size:train_size + validation_size]
test_df = df[train_size + validation_size:]

# Step 3: Hyperparameter Tuning with Grid Search
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
grid_search.fit(train_df[features], train_df['Close'])

best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Step 4: Validation
validation_predictions = best_model.predict(validation_df[features])
validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

# Step 5: Testing
test_predictions = best_model.predict(test_df[features])
test_absolute_percentage_diff = abs((test_predictions - test_df['Close']) / test_df['Close'])
test_accuracy_ratio = (1 - test_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Test Data:", test_accuracy_ratio, "%")

# Step 6: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_predictions, mode='lines', name='Validation Predicted Prices', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_predictions, mode='lines', name='Test Predicted Prices', line=dict(color='red')))

# Actual Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_df['Close'], mode='lines', name='Validation Actual Prices', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_df['Close'], mode='lines', name='Test Actual Prices', line=dict(color='green')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction vs. Actual Prices',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    yaxis=dict(
        autorange=True,  # Enable vertical zooming
        type='linear'  # Linear scale for y-axis
    )
)

# Show the interactive plot
fig.show()


[*********************100%%**********************]  1 of 1 completed


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 50}
Accuracy Ratio on Validation Data: 98.58152896313544 %
Accuracy Ratio on Test Data: 99.00998018497067 %


In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-26', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

# Calculate the indices for the splits
train_size = int(len(df) * 0.90)
validation_size = int(len(df) * 0.10)
test_size = 7  # Next 7 days for testing

# Split the data
train_df = df[:train_size]
validation_df = df[train_size:train_size + validation_size]
test_df = df[-test_size:]  # Last 7 days for testing

# Step 3: Hyperparameter Tuning with Grid Search
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
grid_search.fit(train_df[features], train_df['Close'])

best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Step 4: Validation
validation_predictions = best_model.predict(validation_df[features])
validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

# Step 5: Testing on unseen data (next 7 days)
test_predictions = best_model.predict(test_df[features])
test_absolute_percentage_diff = abs((test_predictions - test_df['Close']) / test_df['Close'])
test_accuracy_ratio = (1 - test_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Test Data:", test_accuracy_ratio, "%")

# Step 6: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_predictions, mode='lines', name='Validation Predicted Prices', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_predictions, mode='lines', name='Test Predicted Prices', line=dict(color='red')))

# Actual Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_df['Close'], mode='lines', name='Validation Actual Prices', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_df['Close'], mode='lines', name='Test Actual Prices', line=dict(color='green')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction vs. Actual Prices',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    yaxis=dict(
        autorange=True,  # Enable vertical zooming
        type='linear'  # Linear scale for y-axis
    )
)

# Show the interactive plot
fig.show()


[*********************100%%**********************]  1 of 1 completed


Best Parameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
Accuracy Ratio on Validation Data: 98.85407297359092 %
Accuracy Ratio on Test Data: 99.17160516668876 %


Data Splitting:

The data is split into 90% for training, 10% for validation, and the last 7 days as the unseen test data.
Hyperparameter Tuning with Grid Search:

Grid Search is used to find the best hyperparameters for the RandomForestRegressor.
Model Training with Best Parameters:

The model is trained using the best hyperparameters obtained from the grid search on the training data.
Model Validation and Testing:

The model is validated with the validation data, and the accuracy ratio is calculated.
The model is tested on the unseen next 7 days of data, and the accuracy ratio is calculated.
Visualization:

The predictions and actual prices for the validation and test periods are visualized.

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-26', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

# Calculate the indices for the splits
train_size = int(len(df) * 0.90)
validation_size = int(len(df) * 0.10)
test_size = 7  # Next 7 days for testing

# Split the data
train_df = df[:train_size]
validation_df = df[train_size:train_size + validation_size]
test_df = df[-test_size:]  # Last 7 days for testing

# Step 3: Hyperparameter Tuning with Time Series Cross-Validation
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

tscv = TimeSeriesSplit(n_splits=5)
model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
grid_search.fit(train_df[features], train_df['Close'])

best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Step 4: Validation
validation_predictions = best_model.predict(validation_df[features])
validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

# Step 5: Testing on unseen data (next 7 days)
test_predictions = best_model.predict(test_df[features])
test_absolute_percentage_diff = abs((test_predictions - test_df['Close']) / test_df['Close'])
test_accuracy_ratio = (1 - test_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Test Data:", test_accuracy_ratio, "%")

# Step 6: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_predictions, mode='lines', name='Validation Predicted Prices', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_predictions, mode='lines', name='Test Predicted Prices', line=dict(color='red')))

# Actual Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_df['Close'], mode='lines', name='Validation Actual Prices', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_df['Close'], mode='lines', name='Test Actual Prices', line=dict(color='green')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction vs. Actual Prices',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5 * 60 * 1000, label="5m", step="minute", stepmode="backward"),
                dict(count=15 * 60 * 1000, label="15m", step="minute", stepmode="backward"),
                dict(count=1 * 60 * 60 * 1000, label="1h", step="hour", stepmode="backward"),
                dict(count=4 * 60 * 60 * 1000, label="4h", step="hour", stepmode="backward"),
                dict(count=12 * 60 * 60 * 1000, label="12h", step="hour", stepmode="backward"),
                dict(count=1, label="1d", step="day", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=7, label="1w", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    yaxis=dict(
        autorange=True,  # Enable vertical zooming
        type='linear'  # Linear scale for y-axis
    )
)

# Show the interactive plot
fig.show()


[*********************100%%**********************]  1 of 1 completed


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy Ratio on Validation Data: 98.86099283719949 %
Accuracy Ratio on Test Data: 98.86705256345941 %


Time Series Cross-Validation (Walk-Forward Validation):

TimeSeriesSplit is used to ensure that the training set always precedes the validation set, respecting the temporal order of the data.
Hyperparameter Tuning with Grid Search:

GridSearchCV is used with TimeSeriesSplit to find the best hyperparameters for the RandomForestRegressor.
Validation and Testing:

The model is validated using the validation data and tested on the unseen next 7 days of data, with accuracy ratios calculated for both.

Time Series Cross-Validation (Walk-Forward Validation): This method ensures that the training set always precedes the validation set, mimicking how new data becomes available over time.
Advanced Models: Using models specifically designed for time series forecasting, such as ARIMA, GARCH, or machine learning models that include temporal features, like LSTM (Long Short-Term Memory) networks.

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import plotly.express as px
import pytz
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-23', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

# Calculate the indices for the splits
train_size = int(len(df) * 0.95)
validation_size = int(len(df) * 0.05)
test_size = 7  # Next 7 days for testing

# Split the data
train_df = df[:train_size]
validation_df = df[train_size:train_size + validation_size]
test_df = df[-test_size:]  # Last 7 days for testing

# Step 3: Hyperparameter Tuning with Time Series Cross-Validation
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

tscv = TimeSeriesSplit(n_splits=5)
model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
grid_search.fit(train_df[features], train_df['Close'])

best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Step 4: Validation
validation_predictions = best_model.predict(validation_df[features])
validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

# Step 5: Testing on unseen data (next 7 days)
test_predictions = best_model.predict(test_df[features])
test_absolute_percentage_diff = abs((test_predictions - test_df['Close']) / test_df['Close'])
test_accuracy_ratio = (1 - test_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Test Data:", test_accuracy_ratio, "%")

# Step 6: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data for Test
test_dates = test_df['Date'].dt.strftime('%Y-%m-%d').tolist()  # Convert dates to string for plotly
fig.add_trace(go.Scatter(x=test_dates, y=test_predictions, mode='lines', name='Predicted Prices', line=dict(color='orange')))

# Actual Data for Test
fig.add_trace(go.Scatter(x=test_dates, y=test_df['Close'], mode='lines', name='Actual Prices', line=dict(color='blue')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction vs. Actual Prices',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeslider=dict(visible=True),
        type="date"
    ),
    yaxis=dict(
        autorange=True,  # Enable vertical zooming
        type='linear'  # Linear scale for y-axis
    )
)

# Show the interactive plot
fig.show()


[*********************100%%**********************]  1 of 1 completed


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Accuracy Ratio on Validation Data: 98.89190411302489 %
Accuracy Ratio on Test Data: 99.11209975821257 %


In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.preprocessing.sequence import TimeseriesGenerator
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-23', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data for Short-Term and Long-Term Predictions
short_term_features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
                       'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

long_term_features = short_term_features

# Short-Term Data Splitting
short_term_train_size = int(len(df) * 0.95)
short_term_validation_size = len(df) - short_term_train_size

short_term_train_df = df[:short_term_train_size]
short_term_validation_df = df[short_term_train_size:]

# Long-Term Data Splitting
long_term_train_size = int(len(df) * 0.95)
long_term_train_df = df[:long_term_train_size]
long_term_validation_df = df[long_term_train_size:]

# Function for Short-Term Prediction using RandomForestRegressor
def short_term_prediction(df, features, train_df, validation_df):
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    tscv = TimeSeriesSplit(n_splits=5)
    model = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
    grid_search.fit(train_df[features], train_df['Close'])

    best_model = grid_search.best_estimator_
    print("Best Parameters for Short-Term Model:", grid_search.best_params_)

    validation_predictions = best_model.predict(validation_df[features])
    validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
    validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

    print("Short-Term Model Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

    return best_model

# Function for Long-Term Prediction using LSTM
def long_term_prediction(df, features, train_df, validation_df):
    n_input = 30
    n_features = len(features)

    train_generator = TimeseriesGenerator(train_df[features].values, train_df['Close'].values, length=n_input, batch_size=32)
    validation_generator = TimeseriesGenerator(validation_df[features].values, validation_df['Close'].values, length=n_input, batch_size=32)

    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

    model.fit(train_generator, validation_data=validation_generator, epochs=50)

    validation_predictions = model.predict(validation_generator)
    validation_actuals = validation_df['Close'].values[n_input:]
    validation_absolute_percentage_diff = abs((validation_predictions.flatten() - validation_actuals) / validation_actuals)
    validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

    print("Long-Term Model Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

    return model

# Train models
short_term_model = short_term_prediction(df, short_term_features, short_term_train_df, short_term_validation_df)
long_term_model = long_term_prediction(df, long_term_features, long_term_train_df, long_term_validation_df)




[*********************100%%**********************]  1 of 1 completed


Best Parameters for Short-Term Model: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Short-Term Model Accuracy Ratio on Validation Data: 98.89582402657754 %
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Long-Term Model Accuracy Ratio on Validation Data: 67.64946995256437 %


ValueError: Input X contains NaN.
RandomForestRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [None]:
# Predict future prices
def predict_future(model, df, features, n_days, short_term=True):
    future_dates = pd.date_range(start=df['Date'].max(), periods=n_days+1, freq='D')[1:]
    future_df = pd.DataFrame(index=future_dates, columns=df.columns)
    future_df['Date'] = future_dates
    df = pd.concat([df, future_df])

    # Fill missing values
    if short_term:
        df[features] = df[features].ffill()
    else:
        df[features] = df[features].fillna(df[features].mean())

    if short_term:
        predictions = []
        for i in range(len(future_dates)):
            prediction = model.predict(df[features].iloc[-(n_days + i):-i or None])
            predictions.append(prediction[-1])
        return predictions
    else:
        future_generator = TimeseriesGenerator(df[features].values, np.zeros(len(df)), length=30, batch_size=1)
        predictions = model.predict(future_generator)
        return predictions[-n_days:]

short_term_predictions = predict_future(short_term_model, df, short_term_features, 7, short_term=True)
long_term_predictions = predict_future(long_term_model, df, long_term_features, 30, short_term=False)

# Visualization using Plotly
def plot_predictions(df, short_term_predictions, long_term_predictions):
    fig = go.Figure()

    # Historical Candlestick Data
    fig.add_trace(go.Candlestick(x=df['Date'],
                                 open=df['Open'],
                                 high=df['High'],
                                 low=df['Low'],
                                 close=df['Close'],
                                 name='Candlestick'))

    # Short-Term Predictions
    future_dates_short = pd.date_range(start=df['Date'].max(), periods=len(short_term_predictions)+1, freq='D')[1:]
    fig.add_trace(go.Scatter(x=future_dates_short, y=short_term_predictions, mode='lines', name='Short-Term Predictions', line=dict(color='orange')))

    # Long-Term Predictions
    future_dates_long = pd.date_range(start=df['Date'].max(), periods=len(long_term_predictions)+1, freq='D')[1:]
    fig.add_trace(go.Scatter(x=future_dates_long, y=long_term_predictions, mode='lines', name='Long-Term Predictions', line=dict(color='blue')))

    fig.update_layout(
        title='INTEL Stock Price Prediction vs. Actual Prices',
        xaxis_title='Date',
        yaxis_title='Price (USD)',
        hovermode='x',
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1d", step="day", stepmode="backward"),
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=3, label="3m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(visible=True),
            type="date"
        )
    )

    fig.show()

plot_predictions(df, short_term_predictions, long_term_predictions)



In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-23', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

# Calculate the indices for the splits
train_size = int(len(df) * 0.90)
validation_size = int(len(df) * 0.10)
test_size = 7  # Next 7 days for testing

# Split the data
train_df = df[:train_size]
validation_df = df[train_size:train_size + validation_size]
test_df = df[-test_size:]  # Last 7 days for testing

# Step 3: Hyperparameter Tuning with Time Series Cross-Validation
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

tscv = TimeSeriesSplit(n_splits=5)
model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
grid_search.fit(train_df[features], train_df['Close'])

best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Step 4: Validation
validation_predictions = best_model.predict(validation_df[features])
validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

# Step 5: Testing on unseen data (next 7 days)
test_predictions = best_model.predict(test_df[features])
test_absolute_percentage_diff = abs((test_predictions - test_df['Close']) / test_df['Close'])
test_accuracy_ratio = (1 - test_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Test Data:", test_accuracy_ratio, "%")

# Step 6: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_predictions, mode='lines', name='Validation Predicted Prices', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_predictions, mode='lines', name='Test Predicted Prices', line=dict(color='red')))

# Actual Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_df['Close'], mode='lines', name='Validation Actual Prices', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_df['Close'], mode='lines', name='Test Actual Prices', line=dict(color='green')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction vs. Actual Prices',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5, label="5d", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=3, label="3m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    yaxis=dict(
        autorange=True,  # Enable vertical zooming
        fixedrange=False,  # Allow vertical zooming
        type='linear'  # Linear scale for y-axis
    )
)

# Show the interactive plot
fig.show()


[*********************100%%**********************]  1 of 1 completed


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy Ratio on Validation Data: 98.86099283719949 %
Accuracy Ratio on Test Data: 98.86705256345941 %


In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-26', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data
features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
            'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

# Calculate the indices for the splits
train_size = int(len(df) * 0.90)
validation_size = int(len(df) * 0.10)
test_size = 7  # Next 7 days for testing

# Split the data
train_df = df[:train_size]
validation_df = df[train_size:train_size + validation_size]
test_df = df[-test_size:]  # Last 7 days for testing

# Step 3: Hyperparameter Tuning with Time Series Cross-Validation
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

tscv = TimeSeriesSplit(n_splits=5)
model = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
grid_search.fit(train_df[features], train_df['Close'])

best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Step 4: Validation
validation_predictions = best_model.predict(validation_df[features])
validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

# Step 5: Testing on unseen data (next 7 days)
test_predictions = best_model.predict(test_df[features])
test_absolute_percentage_diff = abs((test_predictions - test_df['Close']) / test_df['Close'])
test_accuracy_ratio = (1 - test_absolute_percentage_diff.mean()) * 100

print("Accuracy Ratio on Test Data:", test_accuracy_ratio, "%")

# Step 6: Visualization using Plotly
fig = go.Figure()

# Historical Candlestick Data
fig.add_trace(go.Candlestick(x=df['Date'],
                             open=df['Open'],
                             high=df['High'],
                             low=df['Low'],
                             close=df['Close'],
                             name='Candlestick'))

# Predicted Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_predictions, mode='lines', name='Validation Predicted Prices', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_predictions, mode='lines', name='Test Predicted Prices', line=dict(color='red')))

# Actual Data for Validation and Test
fig.add_trace(go.Scatter(x=validation_df['Date'], y=validation_df['Close'], mode='lines', name='Validation Actual Prices', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=test_df['Date'], y=test_df['Close'], mode='lines', name='Test Actual Prices', line=dict(color='green')))

# Enhancements for Interactivity and Visualization
fig.update_layout(
    title='INTEL Stock Price Prediction vs. Actual Prices',
    xaxis_title='Date',
    yaxis_title='Price (USD)',
    hovermode='x',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=5, label="5d", step="day", stepmode="backward"),
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=3, label="3m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    ),
    yaxis=dict(
        autorange=True,  # Enable vertical zooming
        fixedrange=False,  # Allow vertical zooming
        type='linear'  # Linear scale for y-axis
    )
)

# Show the interactive plot
fig.show()


[*********************100%%**********************]  1 of 1 completed


Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy Ratio on Validation Data: 98.86099283719949 %
Accuracy Ratio on Test Data: 98.86705256345941 %


Model Customization for Short-Term and Long-Term Prediction:
Feature Engineering:
Added various technical indicators such as Moving Averages (MA), Exponential Moving Averages (EMA), Relative Strength Index (RSI), Bollinger Bands, Stochastic Oscillator, Average True Range (ATR), On-Balance Volume (OBV), and Moving Average Convergence Divergence (MACD).
Hyperparameter Tuning:

Used GridSearchCV with TimeSeriesSplit for hyperparameter tuning.
Visualization with Plotly:

Replaced static plots with interactive candlestick charts using Plotly for better insights.

In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.preprocessing.sequence import TimeseriesGenerator
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-23', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering
df['Previous_Price'] = df['Close'].shift(1)
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data for Short-Term and Long-Term Predictions
short_term_features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
                       'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

long_term_features = short_term_features

# Short-Term Data Splitting
short_term_train_size = int(len(df) * 0.95)
short_term_validation_size = len(df) - short_term_train_size

short_term_train_df = df[:short_term_train_size]
short_term_validation_df = df[short_term_train_size:]

# Long-Term Data Splitting
long_term_train_size = int(len(df) * 0.95)
long_term_train_df = df[:long_term_train_size]
long_term_validation_df = df[long_term_train_size:]

# Function for Short-Term Prediction using RandomForestRegressor
def short_term_prediction(df, features, train_df, validation_df):
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    tscv = TimeSeriesSplit(n_splits=5)
    model = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
    grid_search.fit(train_df[features], train_df['Close'])

    best_model = grid_search.best_estimator_
    print("Best Parameters for Short-Term Model:", grid_search.best_params_)

    validation_predictions = best_model.predict(validation_df[features])
    validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
    validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

    print("Short-Term Model Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

    return best_model

# Function for Long-Term Prediction using LSTM
def long_term_prediction(df, features, train_df, validation_df):
    n_input = 30
    n_features = len(features)

    train_generator = TimeseriesGenerator(train_df[features].values, train_df['Close'].values, length=n_input, batch_size=32)
    validation_generator = TimeseriesGenerator(validation_df[features].values, validation_df['Close'].values, length=n_input, batch_size=32)

    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

    model.fit(train_generator, validation_data=validation_generator, epochs=50)

    validation_predictions = model.predict(validation_generator)
    validation_actuals = validation_df['Close'].values[n_input:]
    validation_absolute_percentage_diff = abs((validation_predictions.flatten() - validation_actuals) / validation_actuals)
    validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

    print("Long-Term Model Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

    return model

# Train models
short_term_model = short_term_prediction(df, short_term_features, short_term_train_df, short_term_validation_df)
long_term_model = long_term_prediction(df, long_term_features, long_term_train_df, long_term_validation_df)

# Predict future prices
def predict_future(model, df, features, n_days, short_term=True):
    future_dates = pd.date_range(start=df['Date'].max(), periods=n_days+1, freq='D')[1:]
    future_df = pd.DataFrame(index=future_dates, columns=df.columns)
    future_df['Date'] = future_dates
    df = pd.concat([df, future_df])

    # Fill missing values
    if short_term:
        df[features] = df[features].ffill()
    else:
        df[features] = df[features].fillna(df[features].mean())

    predictions = []
    if short_term:
        for i in range(n_days):
            prediction = model.predict(df[features].iloc[-(n_days + i):-i or None])
            predictions.append(prediction[-1])
    else:
        future_generator = TimeseriesGenerator(df[features].values, np.zeros(len(df)), length=30, batch_size=1)
        predictions = model.predict(future_generator)[-n_days:]

    return predictions

short_term_predictions = predict_future(short_term_model, df, short_term_features, 7, short_term=True)
long_term_predictions = predict_future(long_term_model, df, long_term_features, 30, short_term=False)

# Visualization using Plotly
def plot_predictions(df, short_term_predictions, long_term_predictions, short_term_days, long_term_days):
    fig = go.Figure()

    # Historical Candlestick Data
    fig.add_trace(go.Candlestick(x=df['Date'],
                                 open=df['Open'],
                                 high=df['High'],
                                 low=df['Low'],
                                 close=df['Close'],
                                 name='Candlestick'))

    # Short-Term Predictions
    future_dates_short = pd.date_range(start=df['Date'].max(), periods=short_term_days+1, freq='D')[1:]
    future_short_df = pd.DataFrame(index=future_dates_short, columns=df.columns)
    future_short_df['Close'] = short_term_predictions

    fig.add_trace(go.Scatter(x=future_short_df.index,
                             y=future_short_df['Close'],
                             mode='lines+markers',
                             name='Short-Term Predictions'))

    # Long-Term Predictions
    future_dates_long = pd.date_range(start=df['Date'].max(), periods=long_term_days+1, freq='D')[1:]
    future_long_df = pd.DataFrame(index=future_dates_long, columns=df.columns)
    future_long_df['Close'] = long_term_predictions

    fig.add_trace(go.Scatter(x=future_long_df.index,
                             y=future_long_df['Close'],
                             mode='lines+markers',
                             name='Long-Term Predictions'))

    fig.update_layout(title='Stock Price Predictions',
                      xaxis_title='Date',
                      yaxis_title='Price',
                      xaxis_rangeslider_visible=False)
    fig.show()

plot_predictions(df, short_term_predictions, long_term_predictions, 7, 30)


[*********************100%%**********************]  1 of 1 completed

os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.



Best Parameters for Short-Term Model: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Short-Term Model Accuracy Ratio on Validation Data: 98.89582402657754 %
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Long-Term Model Accuracy Ratio on Validation Data: 86.64636111949167 %


In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.preprocessing.sequence import TimeseriesGenerator
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_absolute_percentage_error

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-23', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering

# Previous Price (Shift the Close column)
df['Previous_Price'] = df['Close'].shift(1)

# Moving Averages
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()

# Exponential Moving Averages
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data for Short-Term and Long-Term Predictions
short_term_features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
                       'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

long_term_features = short_term_features

# Short-Term Data Splitting
short_term_train_size = int(len(df) * 0.95)
short_term_validation_size = len(df) - short_term_train_size

short_term_train_df = df[:short_term_train_size]
short_term_validation_df = df[short_term_train_size:]

# Long-Term Data Splitting
long_term_train_size = int(len(df) * 0.95)
long_term_train_df = df[:long_term_train_size]
long_term_validation_df = df[long_term_train_size:]

# Function for Short-Term Prediction using RandomForestRegressor
def short_term_prediction(df, features, train_df, validation_df):
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    tscv = TimeSeriesSplit(n_splits=5)
    model = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
    grid_search.fit(train_df[features], train_df['Close'])

    best_model = grid_search.best_estimator_
    print("Best Parameters for Short-Term Model:", grid_search.best_params_)

    validation_predictions = best_model.predict(validation_df[features])
    validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
    validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

    print("Short-Term Model Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

    return best_model

# Function for Long-Term Prediction using LSTM
def long_term_prediction(df, features, train_df, validation_df):
    n_input = 30
    n_features = len(features)

    train_generator = TimeseriesGenerator(train_df[features].values, train_df['Close'].values, length=n_input, batch_size=32)
    validation_generator = TimeseriesGenerator(validation_df[features].values, validation_df['Close'].values, length=n_input, batch_size=32)

    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

    model.fit(train_generator, validation_data=validation_generator, epochs=50)

    validation_predictions = model.predict(validation_generator)
    validation_actuals = validation_df['Close'].values[n_input:]
    validation_absolute_percentage_diff = abs((validation_predictions.flatten() - validation_actuals) / validation_actuals)
    validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

    print("Long-Term Model Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

    return model

# Train models
short_term_model = short_term_prediction(df, short_term_features, short_term_train_df, short_term_validation_df)
long_term_model = long_term_prediction(df, long_term_features, long_term_train_df, long_term_validation_df)

# Predict future prices
def predict_future(model, df, features, n_days, short_term=True):
    future_dates = pd.date_range(start=df['Date'].max(), periods=n_days+1, freq='D')[1:]
    future_df = pd.DataFrame(index=future_dates, columns=df.columns)
    future_df['Date'] = future_dates
    df = pd.concat([df, future_df])

    # Fill missing values
    if short_term:
        df[features] = df[features].ffill()
    else:
        df[features] = df[features].fillna(df[features].mean())

    if short_term:
        predictions = []
        for i in range(len(future_dates)):
            prediction = model.predict(df[features].iloc[-(n_days + i):-i or None])
            predictions.append(prediction[-1])
        return predictions
    else:
        future_generator = TimeseriesGenerator(df[features].values, np.zeros(len(df)), length=30, batch_size=1)
        predictions = model.predict(future_generator)
        return predictions[-n_days:]

short_term_predictions = predict_future(short_term_model, df, short_term_features, 7, short_term=True)
long_term_predictions = predict_future(long_term_model, df, long_term_features, 30, short_term=False)

# Visualization using Plotly
def plot_predictions(df, short_term_predictions, long_term_predictions, short_term_days, long_term_days):
    fig = go.Figure()

    # Historical Candlestick Data
    fig.add_trace(go.Candlestick(x=df['Date'],
                                 open=df['Open'],
                                 high=df['High'],
                                 low=df['Low'],
                                 close=df['Close'],
                                 name='Historical Data'))

    # Short-Term Predictions as Candlestick
    future_dates_short = pd.date_range(start=df['Date'].max(), periods=short_term_days+1, freq='D')[1:]
    future_short_df = pd.DataFrame({'Date': future_dates_short})
    future_short_df['Open'] = df['Close'].iloc[-1]
    future_short_df['Close'] = short_term_predictions
    future_short_df['High'] = future_short_df[['Open', 'Close']].max(axis=1)
    future_short_df['Low'] = future_short_df[['Open', 'Close']].min(axis=1)
    fig.add_trace(go.Candlestick(x=future_short_df['Date'],
                                 open=future_short_df['Open'],
                                 high=future_short_df['High'],
                                 low=future_short_df['Low'],
                                 close=future_short_df['Close'],
                                 name='Short-Term Predictions'))

    # Long-Term Predictions as Candlestick
    future_dates_long = pd.date_range(start=df['Date'].max(), periods=long_term_days+1, freq='D')[1:]
    future_long_df = pd.DataFrame({'Date': future_dates_long})
    future_long_df['Open'] = df['Close'].iloc[-1]
    future_long_df['Close'] = long_term_predictions
    future_long_df['High'] = future_long_df[['Open', 'Close']].max(axis=1)
    future_long_df['Low'] = future_long_df[['Open', 'Close']].min(axis=1)
    fig.add_trace(go.Candlestick(x=future_long_df['Date'],
                                 open=future_long_df['Open'],
                                 high=future_long_df['High'],
                                 low=future_long_df['Low'],
                                 close=future_long_df['Close'],
                                 name='Long-Term Predictions'))

    fig.update_layout(title='Stock Price Predictions',
                      xaxis_title='Date',
                      yaxis_title='Price',
                      xaxis_rangeslider_visible=False)

    fig.show()

plot_predictions(df, short_term_predictions, long_term_predictions, 7, 30)

# Function to resample data for different timeframes
def resample_data(df, timeframe):
    df_resampled = df.resample(timeframe, on='Date').agg({
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last',
        'Volume': 'sum'
    }).dropna().reset_index()
    return df_resampled

# Timeframes to visualize
timeframes = ['5T', '15T', '1H', '2H', '4H', '12H', '1D', '3D', '7D', '15D', '1M', '3M', '6M', '1Y', 'AS']

# Visualize data on different timeframes
def plot_timeframes(df, timeframes):
    for timeframe in timeframes:
        df_resampled = resample_data(df, timeframe)
        fig = go.Figure()
        fig.add_trace(go.Candlestick(x=df_resampled['Date'],
                                     open=df_resampled['Open'],
                                     high=df_resampled['High'],
                                     low=df_resampled['Low'],
                                     close=df_resampled['Close'],
                                     name=f'{timeframe} Data'))
        fig.update_layout(title=f'Stock Price Data - {timeframe}',
                          xaxis_title='Date',
                          yaxis_title='Price',
                          xaxis_rangeslider_visible=False)
        fig.show()

plot_timeframes(df, timeframes)


[*********************100%%**********************]  1 of 1 completed

os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.



Best Parameters for Short-Term Model: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Short-Term Model Accuracy Ratio on Validation Data: 98.89582402657754 %
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Long-Term Model Accuracy Ratio on Validation Data: 75.89333771980459 %


In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.preprocessing.sequence import TimeseriesGenerator
import plotly.graph_objects as go
import pytz
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV

# Define the timezone
user_timezone = pytz.timezone('Asia/Yekaterinburg')

# Step 1: Data Collection
df = yf.download('INTC', start='2013-01-01', end='2024-05-23', interval='1d').reset_index()
df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert(user_timezone)

# Step 2: Feature Engineering
df['Previous_Price'] = df['Close'].shift(1)
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_10'] = df['Close'].rolling(window=10).mean()
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()

# RSI Calculation
delta = df['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# Bollinger Bands
df['Middle_Band'] = df['Close'].rolling(window=20).mean()
df['Upper_Band'] = df['Middle_Band'] + 2 * df['Close'].rolling(window=20).std()
df['Lower_Band'] = df['Middle_Band'] - 2 * df['Close'].rolling(window=20).std()

# Stochastic Oscillator
low_min = df['Low'].rolling(window=14).min()
high_max = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - low_min) / (high_max - low_min) * 100

# Average True Range (ATR)
df['ATR'] = df['High'] - df['Low'].shift(1)

# On-Balance Volume (OBV)
df['OBV'] = (df['Close'] - df['Close'].shift(1)).apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0).cumsum()

# MACD (Moving Average Convergence Divergence)
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Drop NA values
df.dropna(inplace=True)

# Splitting Data for Short-Term and Long-Term Predictions
short_term_features = ['Previous_Price', 'MA_5', 'MA_10', 'EMA_5', 'EMA_10', 'RSI', 'Middle_Band', 'Upper_Band', 'Lower_Band',
                       'Stochastic_Oscillator', 'ATR', 'OBV', 'MACD', 'Signal_Line']

long_term_features = short_term_features

# Short-Term Data Splitting
short_term_train_size = int(len(df) * 0.95)
short_term_validation_size = len(df) - short_term_train_size

short_term_train_df = df[:short_term_train_size]
short_term_validation_df = df[short_term_train_size:]

# Long-Term Data Splitting
long_term_train_size = int(len(df) * 0.95)
long_term_train_df = df[:long_term_train_size]
long_term_validation_df = df[long_term_train_size:]

# Function for Short-Term Prediction using RandomForestRegressor
def short_term_prediction(df, features, train_df, validation_df):
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    tscv = TimeSeriesSplit(n_splits=5)
    model = RandomForestRegressor(random_state=42)
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=tscv, n_jobs=-1, scoring='neg_mean_absolute_percentage_error')
    grid_search.fit(train_df[features], train_df['Close'])

    best_model = grid_search.best_estimator_
    print("Best Parameters for Short-Term Model:", grid_search.best_params_)

    validation_predictions = best_model.predict(validation_df[features])
    validation_absolute_percentage_diff = abs((validation_predictions - validation_df['Close']) / validation_df['Close'])
    validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

    print("Short-Term Model Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

    return best_model

# Function for Long-Term Prediction using LSTM
def long_term_prediction(df, features, train_df, validation_df):
    n_input = 30
    n_features = len(features)

    train_generator = TimeseriesGenerator(train_df[features].values, train_df['Close'].values, length=n_input, batch_size=32)
    validation_generator = TimeseriesGenerator(validation_df[features].values, validation_df['Close'].values, length=n_input, batch_size=32)

    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')

    model.fit(train_generator, validation_data=validation_generator, epochs=50, verbose=0)

    validation_predictions = model.predict(validation_generator)
    validation_actuals = validation_df['Close'].values[n_input:]
    validation_absolute_percentage_diff = abs((validation_predictions.flatten() - validation_actuals) / validation_actuals)
    validation_accuracy_ratio = (1 - validation_absolute_percentage_diff.mean()) * 100

    print("Long-Term Model Accuracy Ratio on Validation Data:", validation_accuracy_ratio, "%")

    return model

# Train models
short_term_model = short_term_prediction(df, short_term_features, short_term_train_df, short_term_validation_df)
long_term_model = long_term_prediction(df, long_term_features, long_term_train_df, long_term_validation_df)

# Predict future prices
def predict_future(model, df, features, n_days, short_term=True):
    future_dates = pd.date_range(start=df['Date'].max(), periods=n_days+1, freq='D')[1:]
    future_df = pd.DataFrame(index=future_dates, columns=df.columns)
    future_df['Date'] = future_dates
    df = pd.concat([df, future_df])

    # Fill missing values
    if short_term:
        df[features] = df[features].ffill()
    else:
        df[features] = df[features].fillna(df[features].mean())

    predictions = []
    if short_term:
        for i in range(n_days):
            prediction = model.predict(df[features].iloc[-(n_days + i):-i or None])
            predictions.append(prediction[-1])
    else:
        future_generator = TimeseriesGenerator(df[features].values, np.zeros(len(df)), length=30, batch_size=1)
        predictions = model.predict(future_generator)[-n_days:]

    return predictions

short_term_predictions = predict_future(short_term_model, df, short_term_features, 7, short_term=True)
long_term_predictions = predict_future(long_term_model, df, long_term_features, 30, short_term=False)

# Visualization using Plotly
def plot_predictions(df, short_term_predictions, long_term_predictions, short_term_days, long_term_days):
    fig = go.Figure()

    # Historical Candlestick Data
    fig.add_trace(go.Candlestick(x=df['Date'],
                                 open=df['Open'],
                                 high=df['High'],
                                 low=df['Low'],
                                 close=df['Close'],
                                 name='Historical Data'))

    # Short-Term Predictions
    future_dates_short = pd.date_range(start=df['Date'].max(), periods=short_term_days+1, freq='D')[1:]
    future_short_df = pd.DataFrame(index=future_dates_short, columns=df.columns)
    future_short_df['Close'] = short_term_predictions

    fig.add_trace(go.Candlestick(x=future_short_df.index,
                                 open=future_short_df['Close'],  # Placeholder for open prices
                                 high=future_short_df['Close'],  # Placeholder for high prices
                                 low=future_short_df['Close'],   # Placeholder for low prices
                                 close=future_short_df['Close'],
                                 name='Short-Term Predictions'))

    # Long-Term Predictions
    future_dates_long = pd.date_range(start=df['Date'].max(), periods=long_term_days+1, freq='D')[1:]
    future_long_df = pd.DataFrame(index=future_dates_long, columns=df.columns)
    future_long_df['Close'] = long_term_predictions

    fig.add_trace(go.Candlestick(x=future_long_df.index,
                                 open=future_long_df['Close'],  # Placeholder for open prices
                                 high=future_long_df['Close'],  # Placeholder for high prices
                                 low=future_long_df['Close'],   # Placeholder for low prices
                                 close=future_long_df['Close'],
                                 name='Long-Term Predictions'))

    fig.update_layout(title='Stock Price Predictions',
                      xaxis_title='Date',
                      yaxis_title='Price',
                      xaxis_rangeslider_visible=False)
    fig.show()

plot_predictions(df, short_term_predictions, long_term_predictions, 7, 30)

# New function for interactive graph with various intervals
def plot_intervals(ticker, intervals):
    fig = go.Figure()
    for interval in intervals:
        try:
            df_interval = yf.download(ticker, period='1y', interval=interval).reset_index()
            x_col = 'Datetime' if 'Datetime' in df_interval.columns else 'Date'
            fig.add_trace(go.Candlestick(x=df_interval[x_col],
                                         open=df_interval['Open'],
                                         high=df_interval['High'],
                                         low=df_interval['Low'],
                                         close=df_interval['Close'],
                                         name=interval))
        except Exception as e:
            print(f"Error downloading data for interval {interval}: {e}")

    fig.update_layout(title='Candlestick Chart with Various Intervals',
                      xaxis_title='Date',
                      yaxis_title='Price',
                      xaxis_rangeslider_visible=False)
    fig.show()

intervals = ['5m', '15m', '1h', '2h', '4h', '12h', '1d', '3d', '7d', '15d', '1mo', '3mo', '6mo', '1y', 'ytd']
plot_intervals('INTC', intervals)


[*********************100%%**********************]  1 of 1 completed

os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.



Best Parameters for Short-Term Model: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Short-Term Model Accuracy Ratio on Validation Data: 98.89582402657754 %
Long-Term Model Accuracy Ratio on Validation Data: 68.5186323867067 %


[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['INTC']: YFChartError('%ticker%: 5m data not available for startTime=1685325673 and endTime=1716861673. The requested range must be within the last 60 days.')
[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['INTC']: YFChartError('%ticker%: 15m data not available for startTime=1685325673 and endTime=1716861673. The requested range must be within the last 60 days.')
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['INTC']: YFChartError('%ticker%: Invalid input - interval=2h is not supported. Valid intervals: [1m, 2m, 5m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo]')
[*********************100%%**********************]  1 of 1 completed
ERROR:yfinance:
1 Failed