In [19]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd 
import pandas_ta as ta 

In [20]:
import pandas_datareader as pdr
from alpha_vantage.timeseries import TimeSeries

key = 'SJNDG3FXMS6F62L0'

ts = TimeSeries(key = key,output_format='pandas')

data, meta_data = ts.get_monthly_adjusted('TTM')

data


Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-05-12,25.14,25.14,25.14,25.14,25.1400,0.0,0.0
2023-04-28,25.14,25.14,25.14,25.14,25.1400,0.0,0.0
2023-03-31,25.14,25.14,25.14,25.14,25.1400,0.0,0.0
2023-02-28,25.14,25.14,25.14,25.14,25.1400,0.0,0.0
2023-01-31,23.40,25.93,22.61,25.14,25.1400,32189809.0,0.0
...,...,...,...,...,...,...,...
2005-02-28,11.35,11.64,10.66,10.88,9.2084,4014900.0,0.0
2005-01-31,12.22,12.25,10.45,11.63,9.8431,5082700.0,0.0
2004-12-31,11.00,12.18,10.71,11.92,10.0886,7659800.0,0.0
2004-11-30,9.20,11.19,9.10,11.13,9.4200,5777800.0,0.0


In [21]:
# Add technical indicators using the '4. close' column
data['RSI'] = ta.rsi(data['4. close'], length=15)
data['EMAF'] = ta.ema(data['4. close'], length=20)
data['EMAM'] = ta.ema(data['4. close'], length=100)
data['EMAS'] = ta.ema(data['4. close'], length=150)

# Create target columns
data['Target'] = data['5. adjusted close'] - data['1. open']
data['Target'] = data['Target'].shift(-1)

# Use np.where for performance and avoid warnings
data['TargetClass'] = np.where(data['Target'] > 0, 1, 0)

# Shift adjusted close to create the "next close"
data['TargetNextClose'] = data['5. adjusted close'].shift(-1)

# Drop rows with any NaNs (mainly caused by indicators and shifting)
data.dropna(inplace=True)

# Reset index if needed
data.reset_index(inplace=True)

# Drop unused columns
data.drop(['6. volume', '4. close', 'date', '7. dividend amount'], axis=1, inplace=True, errors='ignore')


In [22]:
data_set = data.iloc[:, 0:11]

# Ensure full display
pd.set_option('display.max_columns', None)

# Display the final DataFrame preview
print(data_set.head())

   1. open  2. high  3. low  5. adjusted close        RSI       EMAF  \
0    34.23    34.38   27.36            27.9892  55.933129  24.309562   
1    28.35    37.65   27.57            31.2041  59.588747  25.109603   
2    25.90    29.16   25.29            26.8254  53.154012  25.396308   
3    21.71    26.06   21.54            24.3356  49.872892  25.407136   
4    19.11    22.93   18.80            20.4243  45.179039  25.026456   

        EMAM       EMAS  Target  TargetClass  TargetNextClose  
0  25.915678  25.594400  2.8541            1          31.2041  
1  26.050219  25.688646  0.9254            1          26.8254  
2  26.091205  25.720850  2.6256            1          24.3356  
3  26.079696  25.718057  1.3143            1          20.4243  
4  25.987227  25.660997  0.7736            1          17.7736  


In [23]:
# See all columns clearly
print(data.columns.tolist())

# OR display last 5 rows to verify TargetNextClose
print(data[['5. adjusted close', 'TargetNextClose']].tail(5))


['1. open', '2. high', '3. low', '5. adjusted close', 'RSI', 'EMAF', 'EMAM', 'EMAS', 'Target', 'TargetClass', 'TargetNextClose']
    5. adjusted close  TargetNextClose
69             8.2097           9.2084
70             9.2084           9.8431
71             9.8431          10.0886
72            10.0886           9.4200
73             9.4200           7.8288


In [24]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
data_set_scaled = sc.fit_transform(data_set)
print(data_set_scaled)

[[1.00000000e+00 9.02154399e-01 9.91435563e-01 8.84920749e-01
  7.98759138e-01 9.28779778e-01 9.80605703e-01 9.82056560e-01
  1.00000000e+00 1.00000000e+00 1.00000000e+00]
 [8.08656036e-01 1.00000000e+00 1.00000000e+00 1.00000000e+00
  9.14393305e-01 9.80693499e-01 9.95471403e-01 9.95430286e-01
  7.78577579e-01 1.00000000e+00 8.43261838e-01]
 [7.28929385e-01 7.45960503e-01 9.07014682e-01 8.43261838e-01
  7.10850379e-01 9.99297396e-01 1.00000000e+00 1.00000000e+00
  9.73767292e-01 1.00000000e+00 7.54137971e-01]
 [5.92580540e-01 6.53201676e-01 7.54078303e-01 7.54137971e-01
  6.07062326e-01 1.00000000e+00 9.98728348e-01 9.99603709e-01
  8.23224844e-01 1.00000000e+00 6.14130668e-01]
 [5.07972665e-01 5.59545183e-01 6.42332790e-01 6.14130668e-01
  4.58586800e-01 9.75298171e-01 9.88511243e-01 9.91506743e-01
  7.61150336e-01 1.00000000e+00 5.19247290e-01]
 [4.39310120e-01 4.51526032e-01 5.57504078e-01 5.19247290e-01
  3.75805557e-01 9.37499191e-01 9.73026559e-01 9.78818286e-01
  6.53613455e-01

In [18]:
print(data_set.isnull().sum())  # Any NaNs?
print(np.isinf(data_set).sum())  # Any infinite values?


1. open               0
2. high               0
3. low                0
5. adjusted close     0
7. dividend amount    0
RSI                   0
EMAF                  0
EMAM                  0
EMAS                  0
Target                0
TargetClass           0
dtype: int64
1. open               0
2. high               0
3. low                0
5. adjusted close     0
7. dividend amount    0
RSI                   0
EMAF                  0
EMAM                  0
EMAS                  0
Target                0
TargetClass           0
dtype: int64


In [25]:
# Remove duplicate columns by keeping the first
data_set = data_set.loc[:, ~data_set.columns.duplicated()]


In [26]:
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range=(0, 1))
data_set_scaled = sc.fit_transform(data_set)

print(data_set_scaled.max())  # Should be 1.0
print(data_set_scaled.min())  # Should be 0.0


1.0000000000000002
0.0


In [27]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
data_set_scaled = sc.fit_transform(data_set)
print(data_set_scaled)

[[1.00000000e+00 9.02154399e-01 9.91435563e-01 8.84920749e-01
  7.98759138e-01 9.28779778e-01 9.80605703e-01 9.82056560e-01
  1.00000000e+00 1.00000000e+00 1.00000000e+00]
 [8.08656036e-01 1.00000000e+00 1.00000000e+00 1.00000000e+00
  9.14393305e-01 9.80693499e-01 9.95471403e-01 9.95430286e-01
  7.78577579e-01 1.00000000e+00 8.43261838e-01]
 [7.28929385e-01 7.45960503e-01 9.07014682e-01 8.43261838e-01
  7.10850379e-01 9.99297396e-01 1.00000000e+00 1.00000000e+00
  9.73767292e-01 1.00000000e+00 7.54137971e-01]
 [5.92580540e-01 6.53201676e-01 7.54078303e-01 7.54137971e-01
  6.07062326e-01 1.00000000e+00 9.98728348e-01 9.99603709e-01
  8.23224844e-01 1.00000000e+00 6.14130668e-01]
 [5.07972665e-01 5.59545183e-01 6.42332790e-01 6.14130668e-01
  4.58586800e-01 9.75298171e-01 9.88511243e-01 9.91506743e-01
  7.61150336e-01 1.00000000e+00 5.19247290e-01]
 [4.39310120e-01 4.51526032e-01 5.57504078e-01 5.19247290e-01
  3.75805557e-01 9.37499191e-01 9.73026559e-01 9.78818286e-01
  6.53613455e-01

In [28]:
duplicates = data_set.columns[data_set.columns.duplicated()].tolist()
print("Duplicate columns:", duplicates)


Duplicate columns: []


In [29]:
# Step 1: See max value column-wise
sc = MinMaxScaler(feature_range=(0, 1))
data_scaled = sc.fit_transform(data_set)
scaled_df = pd.DataFrame(data_scaled, columns=data_set.columns)

# Step 2: Find columns where max value > 1
print("Columns with scaled values > 1:")
for col in scaled_df.columns:
    if scaled_df[col].max() > 1:
        print(f"{col}: max = {scaled_df[col].max()}")

Columns with scaled values > 1:
3. low: max = 1.0000000000000002


In [30]:
print(data_set_scaled)

[[1.00000000e+00 9.02154399e-01 9.91435563e-01 8.84920749e-01
  7.98759138e-01 9.28779778e-01 9.80605703e-01 9.82056560e-01
  1.00000000e+00 1.00000000e+00 1.00000000e+00]
 [8.08656036e-01 1.00000000e+00 1.00000000e+00 1.00000000e+00
  9.14393305e-01 9.80693499e-01 9.95471403e-01 9.95430286e-01
  7.78577579e-01 1.00000000e+00 8.43261838e-01]
 [7.28929385e-01 7.45960503e-01 9.07014682e-01 8.43261838e-01
  7.10850379e-01 9.99297396e-01 1.00000000e+00 1.00000000e+00
  9.73767292e-01 1.00000000e+00 7.54137971e-01]
 [5.92580540e-01 6.53201676e-01 7.54078303e-01 7.54137971e-01
  6.07062326e-01 1.00000000e+00 9.98728348e-01 9.99603709e-01
  8.23224844e-01 1.00000000e+00 6.14130668e-01]
 [5.07972665e-01 5.59545183e-01 6.42332790e-01 6.14130668e-01
  4.58586800e-01 9.75298171e-01 9.88511243e-01 9.91506743e-01
  7.61150336e-01 1.00000000e+00 5.19247290e-01]
 [4.39310120e-01 4.51526032e-01 5.57504078e-01 5.19247290e-01
  3.75805557e-01 9.37499191e-01 9.73026559e-01 9.78818286e-01
  6.53613455e-01

In [None]:
#multiple feature form data provided to the model 
X = []
#print(data_set_scaled[0].size)
#data_set_scaled=data_set.values
backcandles = 30 
print