In [None]:
# Building a Model where it predict if the tomorrow stock will be higher or lower

In [111]:
# Data manipulation and analysis
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Data visualization
import plotly.graph_objects as go

# Machine learning and deep learning
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam

# Technical indicators (optional)
# import talib as ta  # For technical analysis indicators like SMA, RSI, etc.


In [112]:
# Fetching stock data of apple stock
import yfinance as yf

# Define the stock symbol for Apple
symbol = "NVDA"

# Fetch historical data (adjust the period and interval as needed)
df = yf.download(symbol, period="1y", interval="1d")

# Show the first few rows of the data
df.head()


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,NVDA,NVDA,NVDA,NVDA,NVDA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2024-04-10,87.010406,87.371288,83.681499,83.89843,431929000
2024-04-11,90.586235,90.709196,86.897452,87.391286,431637000
2024-04-12,88.157028,90.145381,87.501245,89.669532,426805000
2024-04-15,85.972748,90.583232,85.900773,89.068731,443077000
2024-04-16,87.386284,88.089048,86.03573,86.404604,370453000


In [113]:
print(df.columns)

MultiIndex([( 'Close', 'NVDA'),
            (  'High', 'NVDA'),
            (   'Low', 'NVDA'),
            (  'Open', 'NVDA'),
            ('Volume', 'NVDA')],
           names=['Price', 'Ticker'])


In [114]:
# Show the first few rows of the data
print("First few rows of the fetched data:")
print(df.head())
print(df.columns)

# Flatten multi-level column index (if it exists)
df.columns = [' '.join(col).strip() if isinstance(col, tuple) else col for col in df.columns.values]

# remoce MSFT from the column
df.columns = df.columns.str.replace(f'{symbol}', '', regex=False)
df.columns = df.columns.str.strip()


# Now you can check the data with flattened columns
print("\nFirst few rows after flattening columns:")
print(df.head())
print(df.columns)

# Check if the data contains NaNs for any required columns
# print("\nChecking for missing values in the data:")
# print(df.isna().sum())

# # Now try printing the first few rows of SMA columns
# print(df[['SMA 5', 'SMA 20', 'SMA 50']].head())

First few rows of the fetched data:
Price           Close       High        Low       Open     Volume
Ticker           NVDA       NVDA       NVDA       NVDA       NVDA
Date                                                             
2024-04-10  87.010406  87.371288  83.681499  83.898430  431929000
2024-04-11  90.586235  90.709196  86.897452  87.391286  431637000
2024-04-12  88.157028  90.145381  87.501245  89.669532  426805000
2024-04-15  85.972748  90.583232  85.900773  89.068731  443077000
2024-04-16  87.386284  88.089048  86.035730  86.404604  370453000
MultiIndex([( 'Close', 'NVDA'),
            (  'High', 'NVDA'),
            (   'Low', 'NVDA'),
            (  'Open', 'NVDA'),
            ('Volume', 'NVDA')],
           names=['Price', 'Ticker'])

First few rows after flattening columns:
                Close       High        Low       Open     Volume
Date                                                             
2024-04-10  87.010406  87.371288  83.681499  83.898430  4319290

In [115]:
# Function to add technical indicators (SMA, RSI, etc.)
def add_indicators(df):
    # Calculate SMAs
    df['SMA 5'] = df['Close'].rolling(window=5).mean()
    df['SMA 20'] = df['Close'].rolling(window=20).mean()
    df['SMA 50'] = df['Close'].rolling(window=50).mean()

    # Calculate price changes
    df['Price Change'] = df['Close'].pct_change()

    # Calculate target (1 if tomorrow's price is higher, 0 if lower)
    df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

    # Calculate RSI (Relative Strength Index)
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0).rolling(window=14).mean())
    loss = (-delta.where(delta < 0, 0).rolling(window=14).mean())
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    return df

# Apply the indicator function to the data
df = add_indicators(df)

# Show the first few rows of the SMA columns to check if the values are calculated
print("\nFirst few rows of the SMA and other indicators:")
print(df[['SMA 5', 'SMA 20', 'SMA 50', 'Price Change', 'RSI']].head())




First few rows of the SMA and other indicators:
               SMA 5  SMA 20  SMA 50  Price Change  RSI
Date                                                   
2024-04-10       NaN     NaN     NaN           NaN  NaN
2024-04-11       NaN     NaN     NaN      0.041097  NaN
2024-04-12       NaN     NaN     NaN     -0.026817  NaN
2024-04-15       NaN     NaN     NaN     -0.024777  NaN
2024-04-16  87.82254     NaN     NaN      0.016442  NaN


In [116]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create the figure
fig = go.Figure()

# Add Candlestick chart
fig.add_trace(go.Candlestick(
    x=df.index,
    open=df['Open'],
    high=df['High'],
    low=df['Low'],
    close=df['Close'],
    name='Candlestick'
))

# Add SMAs
fig.add_trace(go.Scatter(
    x=df.index,
    y=df['SMA 5'],
    line=dict(color='blue', width=1),
    name='SMA 5'
))

fig.add_trace(go.Scatter(
    x=df.index,
    y=df['SMA 20'],
    line=dict(color='orange', width=1),
    name='SMA 20'
))

fig.add_trace(go.Scatter(
    x=df.index,
    y=df['SMA 50'],
    line=dict(color='purple', width=1),
    name='SMA 50'
))

# Update layout with title and axis labels
fig.update_layout(
    title='Apple Stock Price and SMA Analysis',
    yaxis_title='Stock Price (USD)',
    xaxis_title='Date',
    xaxis_rangeslider_visible=False,  # Hide the range slider
    height=800
)

# Show the plot
fig.show()


In [117]:
# Drop rows where technical indicators (SMA or RSI) are NaN
df = df.dropna(subset=['SMA 5', 'SMA 20', 'SMA 50', 'RSI'])

# Prepare data for LSTM
def prepare_data(df, look_back=10):
    features = ['Close', 'SMA 5', 'SMA 20', 'SMA 50', 'Price Change', 'RSI']

    # Scale the features
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df[features])

    # Prepare sequences for LSTM (look_back time steps)
    X, y = [], []
    for i in range(look_back, len(scaled_data) - 1):
        X.append(scaled_data[i - look_back:i])  # Get the previous 'look_back' rows
        y.append(df['Target'].iloc[i])  # The target (next day's movement)

    # Convert to numpy arrays
    return np.array(X), np.array(y)

# Prepare the data
look_back = 10  # Number of previous days to use for prediction
X, y = prepare_data(df, look_back)

# Print the shape of the data to verify it
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")


Shape of X: (192, 10, 6)
Shape of y: (192,)


In [118]:
def create_model(input_shape):
    model = Sequential()

    # LSTM Layer
    model.add(LSTM(64, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.2))  # Dropout layer to reduce overfitting

    # Another LSTM Layer
    model.add(LSTM(32, return_sequences=False))
    model.add(Dropout(0.2))

    # Fully connected layer
    model.add(Dense(16, activation='relu'))

    # Output layer (binary classification: 0 or 1)
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model


In [119]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Create the model
model = create_model(X_train.shape[1:])

# Early stopping callback to prevent overfitting
# early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
train_score = model.evaluate(X_train, y_train, verbose=0)
test_score = model.evaluate(X_test, y_test, verbose=0)

print(f"Train accuracy: {train_score[1]:.4f}")
print(f"Test accuracy: {test_score[1]:.4f}")


Epoch 1/10



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 184ms/step - accuracy: 0.4689 - loss: 0.6991 - val_accuracy: 0.4103 - val_loss: 0.6984
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - accuracy: 0.5458 - loss: 0.6874 - val_accuracy: 0.4615 - val_loss: 0.6996
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.5545 - loss: 0.6857 - val_accuracy: 0.4872 - val_loss: 0.7015
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.5450 - loss: 0.6827 - val_accuracy: 0.4872 - val_loss: 0.7029
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5967 - loss: 0.6832 - val_accuracy: 0.4872 - val_loss: 0.7048
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5802 - loss: 0.6789 - val_accuracy: 0.5128 - val_loss: 0.7083
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [120]:
# Load the unseen data
unseen_data = pd.read_csv('unseen_stock_data_200.csv', index_col='Date', parse_dates=True)

# Calculate SMA (for example, 5, 20, and 50 days)
unseen_data['SMA_5'] = unseen_data['Close'].rolling(window=5).mean()
unseen_data['SMA_20'] = unseen_data['Close'].rolling(window=20).mean()
unseen_data['SMA_50'] = unseen_data['Close'].rolling(window=50).mean()

# Calculate price changes
unseen_data['Price Change'] = df['Close'].pct_change()

# Calculate target (1 if tomorrow's price is higher, 0 if lower)
unseen_data['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

# Calculate RSI (Relative Strength Index)
delta = unseen_data['Close'].diff()
gain = (delta.where(delta > 0, 0).rolling(window=14).mean())
loss = (-delta.where(delta < 0, 0).rolling(window=14).mean())
rs = gain / loss
unseen_data['RSI'] = 100 - (100 / (1 + rs))

# print(unseen_data.head())
print(unseen_data.tail())



              Open    High     Low   Close    Volume    SMA_5    SMA_20  \
Date                                                                      
2025-01-08  421.93  422.73  420.00  420.65  16611754  420.004  420.5770   
2025-01-09  415.58  416.37  414.19  415.99  16114593  418.332  420.3750   
2025-01-10  420.77  422.89  417.80  420.38  16226952  418.326  419.9455   
2025-01-13  420.29  422.66  418.28  420.81  20931161  419.672  420.0610   
2025-01-14  414.29  416.25  413.20  414.82  21790593  418.530  419.0550   

              SMA_50  Price Change  Target        RSI  
Date                                                   
2025-01-08  420.6266     -0.000214     0.0  54.250672  
2025-01-09  420.6444           NaN     NaN  51.963309  
2025-01-10  420.7422     -0.029976     0.0  46.021700  
2025-01-13  420.7266     -0.019719     0.0  52.208997  
2025-01-14  420.5582     -0.011034     1.0  41.448017  


In [121]:
# Make Prediction for tomorrow
last_sequence = X[-1:]  # Get the last sequence from the data
tomorrow_pred = model.predict(last_sequence)[0][0]

# Output the prediction
print(f"\nPrediction for tomorrow:")
print(f"Probability of price increase: {tomorrow_pred:.2%}")
print(f"Predicted direction: {'UP' if tomorrow_pred > 0.5 else 'DOWN'}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 372ms/step

Prediction for tomorrow:
Probability of price increase: 62.11%
Predicted direction: UP


In [122]:
!git init


[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/


In [None]:
|