In [1]:
import pandas as pd
import numpy as np
import yfinance as yf



In [2]:
startDate = '2024-04-23'
endDate = '2024-04-30'
symbol='TSLA'
RSI= 300
n_back = 10
n_back_full = max(n_back, RSI)
x_forward = 10

# Fetch the Ticker object for the symbol
ticker = yf.Ticker(symbol)

In [3]:
#info = ticker.info

In [4]:
tickerDf = ticker.history(interval='1m', start=startDate, end=endDate)

if tickerDf.index.name is not None or isinstance(tickerDf.index, pd.MultiIndex):
    tickerDf = tickerDf.reset_index()

# Rename the first column to 'Open time'
first_column_name = tickerDf.columns[0]
tickerDf = tickerDf.rename(columns={first_column_name: 'Open time'})
# List of columns to drop
columns_to_drop = ['Dividends', 'Stock Splits']  # Replace with the actual column names

# Drop the specified columns
tickerDf = tickerDf.drop(columns=columns_to_drop)

In [6]:
# Assuming tickerDf is your DataFrame
tickerDf['Open time'] = pd.to_datetime(tickerDf['Open time'], unit='ms')

# Convert 'Open' and 'Close' columns to numeric
tickerDf['Open'] = pd.to_numeric(tickerDf['Open'], errors='coerce')
tickerDf['Close'] = pd.to_numeric(tickerDf['Close'], errors='coerce')

# Calculate Gain/Loss
# This column represents the difference between the closing price and the opening price for each period
tickerDf['Gain/Loss'] = tickerDf['Close'] - tickerDf['Open']

# Calculate Percentage Change
# This column calculates the percentage change in price from the opening to the closing price
tickerDf['Percentage Change'] = (tickerDf['Gain/Loss'] / tickerDf['Open']) * 100

# Identify sign changes in Percentage Change and calculate a conditional cumulative sum
# This helps to track the cumulative percentage change based on the change in direction of price movements
sign_changes = np.sign(tickerDf['Percentage Change']).diff().ne(0)
tickerDf['Conditional Cumulative Sum'] = tickerDf.groupby(sign_changes.cumsum())['Percentage Change'].cumsum()

# Calculate the 300-period moving average of the 'Close' prices
# Since this example has fewer than 300 periods, replace '300' with a smaller number like '3' to see the calculation
# These columns provide moving averages for different periods, which are useful for trend analysis
tickerDf['300 Period MA'] = tickerDf['Close'].rolling(window=300, min_periods=1).mean()
tickerDf['60 Period MA'] = tickerDf['Close'].rolling(window=60, min_periods=1).mean()
tickerDf['14 Period MA'] = tickerDf['Close'].rolling(window=14, min_periods=1).mean()

# Calculate the distance between the Open price and the 300-period moving average
# This column helps to understand how far the current open price is from the long-term average
tickerDf['Distance_300_MA'] = tickerDf['Open'] - tickerDf['300 Period MA']

# These columns store the closing prices for the previous n periods, useful for lagged features in predictive models
for i in range(1, n_back + 1):
    tickerDf[f'Close_prev_{i}'] = tickerDf['Close'].shift(i)

# Generate columns for future close prices
# These columns store the closing prices for the next x periods, useful for forecasting future prices
for i in range(1, x_forward + 1):
    tickerDf[f'Close_future_{i}'] = tickerDf['Close'].shift(-i)


                  Open time        Open        High         Low       Close  \
0 2024-04-23 09:30:00-04:00  143.039993  143.039993  143.039993  143.039993   
1 2024-04-23 09:31:00-04:00  143.009995  143.440002  142.750000  143.320007   
2 2024-04-23 09:32:00-04:00  143.294998  143.869995  142.619995  142.675095   
3 2024-04-23 09:33:00-04:00  142.690002  142.809998  142.070007  142.138504   
4 2024-04-23 09:34:00-04:00  142.115005  142.339996  141.800003  142.130005   

    Volume  Gain/Loss  Percentage Change  Conditional Cumulative Sum  \
0  3549380   0.000000           0.000000                    0.000000   
1   540162   0.310013           0.216777                    0.216777   
2   507777  -0.619904          -0.432607                   -0.432607   
3   540335  -0.551498          -0.386501                   -0.819108   
4   591431   0.014999           0.010554                    0.010554   

   300 Period MA  ...  Close_future_1  Close_future_2  Close_future_3  \
0     143.039993  .

In [7]:
# Define the function to calculate RSI
def calculate_rsi(df, periods=14):
    # Calculate the difference in 'Open' prices
    delta = df['Open'].diff()
    
    # Separate the gains and losses
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)
    
    # Calculate the exponential moving average of gains and losses
    avg_gain = gain.ewm(com=periods-1, min_periods=periods).mean()
    avg_loss = loss.ewm(com=periods-1, min_periods=periods).mean()
    
    # Calculate the Relative Strength (RS)
    # Add a small number to avg_loss to avoid division by zero
    rs = avg_gain / (avg_loss + 1e-10)
    
    # Calculate the Relative Strength Index (RSI)
    rsi = 100 - (100 / (1 + rs))
    
    return rsi

# Calculate RSI for the 'Open' prices with a period of 300
# This column will represent the RSI, a momentum indicator that measures the magnitude of recent price changes to evaluate overbought or oversold conditions
tickerDf['RSI_300'] = calculate_rsi(tickerDf, periods=300)


                  Open time        Open        High         Low       Close  \
0 2024-04-23 09:30:00-04:00  143.039993  143.039993  143.039993  143.039993   
1 2024-04-23 09:31:00-04:00  143.009995  143.440002  142.750000  143.320007   
2 2024-04-23 09:32:00-04:00  143.294998  143.869995  142.619995  142.675095   
3 2024-04-23 09:33:00-04:00  142.690002  142.809998  142.070007  142.138504   
4 2024-04-23 09:34:00-04:00  142.115005  142.339996  141.800003  142.130005   

    Volume  Gain/Loss  Percentage Change  Conditional Cumulative Sum  \
0  3549380   0.000000           0.000000                    0.000000   
1   540162   0.310013           0.216777                    0.216777   
2   507777  -0.619904          -0.432607                   -0.432607   
3   540335  -0.551498          -0.386501                   -0.819108   
4   591431   0.014999           0.010554                    0.010554   

   300 Period MA  ...  Close_future_2  Close_future_3  Close_future_4  \
0     143.039993  .

In [11]:
tickerDf.to_json('data_stocks_1.json')

In [13]:
with pd.option_context('display.max_columns', None):  # None means unlimited
    print(tickerDf)

                Open time        Open        High         Low       Close  \
300   1713897000000000000  145.425003  145.500000  145.272995  145.279999   
301   1713897060000000000  145.289993  145.445007  145.149994  145.389999   
302   1713897120000000000  145.380005  145.410004  145.300797  145.360992   
303   1713897180000000000  145.375000  145.380005  145.149994  145.149994   
304   1713897240000000000  145.160004  145.380005  145.160004  145.350006   
...                   ...         ...         ...         ...         ...   
1933  1714419900000000000  193.940002  194.149994  193.756104  194.119995   
1934  1714419960000000000  194.115005  194.139999  193.740005  193.779999   
1935  1714420020000000000  193.789307  194.169998  193.730194  194.130005   
1936  1714420080000000000  194.110107  194.369995  194.095001  194.257202   
1937  1714420140000000000  194.240005  194.259995  193.960007  194.067001   

      Volume  Gain/Loss  Percentage Change  Conditional Cumulative Sum  \
3