In [1]:
import pandas as pd
import numpy as np
import yfinance as yf



In [2]:
startDate = '2024-05-23'
endDate = '2024-05-30'
symbol='TSLA'
RSI= 300
n_back = 10
n_back_full = max(n_back, RSI)
x_forward = 10

# Fetch the Ticker object for the symbol
ticker = yf.Ticker(symbol)

In [3]:
#info = ticker.info

In [4]:
tickerDf = ticker.history(interval='1m', start=startDate, end=endDate)

if tickerDf.index.name is not None or isinstance(tickerDf.index, pd.MultiIndex):
    tickerDf = tickerDf.reset_index()

# Rename the first column to 'Open time'
first_column_name = tickerDf.columns[0]
tickerDf = tickerDf.rename(columns={first_column_name: 'Open time'})
# List of columns to drop
columns_to_drop = ['Dividends', 'Stock Splits']  # Replace with the actual column names

# Drop the specified columns
tickerDf = tickerDf.drop(columns=columns_to_drop)

In [5]:
# Assuming tickerDf is your DataFrame
tickerDf['Open time'] = pd.to_datetime(tickerDf['Open time'], unit='ms')

# Convert 'Open' and 'Close' columns to numeric
tickerDf['Open'] = pd.to_numeric(tickerDf['Open'], errors='coerce')
tickerDf['Close'] = pd.to_numeric(tickerDf['Close'], errors='coerce')

# Calculate Gain/Loss
# This column represents the difference between the closing price and the opening price for each period
tickerDf['Gain/Loss'] = tickerDf['Close'] - tickerDf['Open']

# Calculate Percentage Change
# This column calculates the percentage change in price from the opening to the closing price
tickerDf['Percentage Change'] = (tickerDf['Gain/Loss'] / tickerDf['Open']) * 100

# Identify sign changes in Percentage Change and calculate a conditional cumulative sum
# This helps to track the cumulative percentage change based on the change in direction of price movements
sign_changes = np.sign(tickerDf['Percentage Change']).diff().ne(0)
tickerDf['Conditional Cumulative Sum'] = tickerDf.groupby(sign_changes.cumsum())['Percentage Change'].cumsum()

# Calculate the 300-period moving average of the 'Close' prices
# Since this example has fewer than 300 periods, replace '300' with a smaller number like '3' to see the calculation
# These columns provide moving averages for different periods, which are useful for trend analysis
tickerDf['300 Period MA'] = tickerDf['Close'].rolling(window=300, min_periods=1).mean()
tickerDf['60 Period MA'] = tickerDf['Close'].rolling(window=60, min_periods=1).mean()
tickerDf['14 Period MA'] = tickerDf['Close'].rolling(window=14, min_periods=1).mean()

# Calculate the distance between the Open price and the 300-period moving average
# This column helps to understand how far the current open price is from the long-term average
tickerDf['Distance_300_MA'] = tickerDf['Open'] - tickerDf['300 Period MA']

# These columns store the closing prices for the previous n periods, useful for lagged features in predictive models
for i in range(1, n_back + 1):
    tickerDf[f'Close_prev_{i}'] = tickerDf['Close'].shift(i)

# Generate columns for future close prices
# These columns store the closing prices for the next x periods, useful for forecasting future prices
for i in range(1, x_forward + 1):
    tickerDf[f'Close_future_{i}'] = tickerDf['Close'].shift(-i)


In [6]:
# Define the function to calculate RSI
def calculate_rsi(df, periods=14):
    # Calculate the difference in 'Open' prices
    delta = df['Open'].diff()
    
    # Separate the gains and losses
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)
    
    # Calculate the exponential moving average of gains and losses
    avg_gain = gain.ewm(com=periods-1, min_periods=periods).mean()
    avg_loss = loss.ewm(com=periods-1, min_periods=periods).mean()
    
    # Calculate the Relative Strength (RS)
    # Add a small number to avg_loss to avoid division by zero
    rs = avg_gain / (avg_loss + 1e-10)
    
    # Calculate the Relative Strength Index (RSI)
    rsi = 100 - (100 / (1 + rs))
    
    return rsi

# Calculate RSI for the 'Open' prices with a period of 300
# This column will represent the RSI, a momentum indicator that measures the magnitude of recent price changes to evaluate overbought or oversold conditions
tickerDf['RSI_300'] = calculate_rsi(tickerDf, periods=300)


In [8]:
# Convert all relevant columns to numeric, missing values will be NaN
tickerDf = tickerDf.apply(pd.to_numeric, errors='coerce')

# Define the function to process the highest future close difference
def process_highest_difference(row):
    close_value = row['Close']
    # Extract all future close values from the row, ignoring NaN values
    future_values = [row[col] for col in tickerDf if col.startswith('Close_future_') and pd.notna(row[col])]
    if not future_values or pd.isna(close_value):  # Check if future_values is empty or close_value is NaN
        return 0  # Return 0 if there are no valid future values or if close is NaN

    # Find the maximum future value
    max_future_value = max(future_values)
    # Calculate the percentage difference and round to three decimal places
    percentage_difference = round((max_future_value - close_value) / close_value, 3)
    # Handle negative values
    if percentage_difference < 0:
        return 0
    # Extract the last decimal digit and cap it at 7
    last_digit = int((percentage_difference * 1000) % 10)
    return min(last_digit, 7)

# Apply the function to each row and create a new column
tickerDf['Processed Difference'] = tickerDf.apply(process_highest_difference, axis=1)

In [9]:
tickerDf.to_json('data_stocks_1.json')

In [10]:
with pd.option_context('display.max_columns', None):  # None means unlimited
    print(tickerDf)

                Open time        Open        High         Low       Close  \
0     1716471000000000000  180.399994  180.399994  180.399994  180.399994   
1     1716471060000000000  180.410004  180.699997  180.037506  180.699997   
2     1716471120000000000  180.789993  180.789993  180.259995  180.449905   
3     1716471180000000000  180.440002  180.462097  179.634995  179.899994   
4     1716471240000000000  179.880005  179.949997  179.460007  179.579895   
...                   ...         ...         ...         ...         ...   
1553  1717012500000000000  176.740005  176.744995  176.449997  176.539993   
1554  1717012560000000000  176.529999  176.529999  176.309998  176.315002   
1555  1717012620000000000  176.315002  176.330002  176.160004  176.259995   
1556  1717012680000000000  176.250000  176.359695  176.250000  176.349899   
1557  1717012740000000000  176.350006  176.350006  176.050003  176.210007   

       Volume  Gain/Loss  Percentage Change  Conditional Cumulative Sum  \


In [11]:
tickerDf.columns

Index(['Open time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Gain/Loss',
       'Percentage Change', 'Conditional Cumulative Sum', '300 Period MA',
       '60 Period MA', '14 Period MA', 'Distance_300_MA', 'Close_prev_1',
       'Close_prev_2', 'Close_prev_3', 'Close_prev_4', 'Close_prev_5',
       'Close_prev_6', 'Close_prev_7', 'Close_prev_8', 'Close_prev_9',
       'Close_prev_10', 'Close_future_1', 'Close_future_2', 'Close_future_3',
       'Close_future_4', 'Close_future_5', 'Close_future_6', 'Close_future_7',
       'Close_future_8', 'Close_future_9', 'Close_future_10', 'RSI_300',
       'Processed Difference'],
      dtype='object')

In [12]:
tickerDf.head()

Unnamed: 0,Open time,Open,High,Low,Close,Volume,Gain/Loss,Percentage Change,Conditional Cumulative Sum,300 Period MA,...,Close_future_3,Close_future_4,Close_future_5,Close_future_6,Close_future_7,Close_future_8,Close_future_9,Close_future_10,RSI_300,Processed Difference
0,1716471000000000000,180.399994,180.399994,180.399994,180.399994,2807532,0.0,0.0,0.0,180.399994,...,179.899994,179.579895,179.289993,178.879898,178.967896,178.949997,178.520004,178.295593,,2
1,1716471060000000000,180.410004,180.699997,180.037506,180.699997,476665,0.289993,0.160741,0.160741,180.549995,...,179.579895,179.289993,178.879898,178.967896,178.949997,178.520004,178.295593,179.054993,,0
2,1716471120000000000,180.789993,180.789993,180.259995,180.449905,364609,-0.340088,-0.188112,-0.188112,180.516632,...,179.289993,178.879898,178.967896,178.949997,178.520004,178.295593,179.054993,179.330002,,0
3,1716471180000000000,180.440002,180.462097,179.634995,179.899994,603120,-0.540009,-0.299273,-0.487385,180.362473,...,178.879898,178.967896,178.949997,178.520004,178.295593,179.054993,179.330002,179.100006,,0
4,1716471240000000000,179.880005,179.949997,179.460007,179.579895,341432,-0.30011,-0.166839,-0.654224,180.205957,...,178.967896,178.949997,178.520004,178.295593,179.054993,179.330002,179.100006,178.835007,,0
