In [None]:
import numpy as np

# Load dataset
X = np.load("/content/drive/MyDrive/mini project/X.npy")  # Shape: (samples, 50, features)
y = np.load("/content/drive/MyDrive/mini project/y.npy")  # Shape: (samples,)

# Define time horizons for multi-step forecasting
time_horizons = [1, 3, 5, 10, 15]
y_multi_step = {}

# Create multi-step target arrays
for T in time_horizons:
    X_T = X[:-T]  # Remove last T samples to match y length
    y_T = np.array([y[i: i+T] for i in range(len(y)-T)])  # Create T-step ahead targets

    # Save separately for different prediction horizons
    np.save(f"/content/drive/MyDrive/mini project/X_train_T{T}.npy", X_T)
    np.save(f"/content/drive/MyDrive/mini project/y_train_T{T}.npy", y_T)

print("Multi-step training datasets created successfully!")


Multi-step training datasets created successfully!


In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load dataset, skipping first two rows
df = pd.read_csv('/content/drive/MyDrive/mini project/stock_data.csv', skiprows=2)

# Manually set column names
df.columns = ['ticker', 'date', 'open', 'high', 'low', 'close', 'volume',
              'open_jnj', 'high_jnj', 'low_jnj', 'close_jnj', 'volume_jnj',
              'open_nke', 'high_nke', 'low_nke', 'close_nke', 'volume_nke']

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Select AAPL stock data (modify if needed)
df = df[['date', 'open', 'high', 'low', 'close', 'volume']]

# Convert numeric columns
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Drop NaN values
df = df.dropna()

# Normalize features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[numeric_columns])

# Save the scaler for inverse transformation later
with open('/content/drive/MyDrive/mini project/scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Define sequence length and time horizons
sequence_length = 50
time_horizons = [1, 3, 5, 10, 15]

# Generate train-test datasets for each T
for T in time_horizons:
    X, y = [], []

    for i in range(len(scaled_data) - sequence_length - T):
        X.append(scaled_data[i:i+sequence_length])  # Past 50 timesteps
        y.append(scaled_data[i+sequence_length:i+sequence_length+T, 3])  # Next T 'close' prices

    X, y = np.array(X), np.array(y)

    # Split into training (80%) and testing (20%) sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Save files for each T-step model
    np.save(f"/content/drive/MyDrive/mini project/X_train_T{T}.npy", X_train)
    np.save(f"/content/drive/MyDrive/mini project/y_train_T{T}.npy", y_train)
    np.save(f"/content/drive/MyDrive/mini project/X_test_T{T}.npy", X_test)
    np.save(f"/content/drive/MyDrive/mini project/y_test_T{T}.npy", y_test)

    print(f"✅ Data saved for T={T} steps ahead!")

print("🚀 Preprocessing completed! All train & test datasets are ready.")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')


✅ Data saved for T=1 steps ahead!
✅ Data saved for T=3 steps ahead!
✅ Data saved for T=5 steps ahead!
✅ Data saved for T=10 steps ahead!
✅ Data saved for T=15 steps ahead!
🚀 Preprocessing completed! All train & test datasets are ready.


In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import MinMaxScaler

# Load dataset, skipping first two rows
df = pd.read_csv('/content/drive/MyDrive/mini project/stock_data.csv', skiprows=2)

# Set column names
df.columns = ['ticker', 'date', 'open', 'high', 'low', 'close', 'volume',
              'open_jnj', 'high_jnj', 'low_jnj', 'close_jnj', 'volume_jnj',
              'open_nke', 'high_nke', 'low_nke', 'close_nke', 'volume_nke']

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Select AAPL stock for now (modify for JNJ or NKE)
df = df[['date', 'open', 'high', 'low', 'close', 'volume']]

# Convert numeric columns
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Drop NaN values
df = df.dropna()

# Apply rolling mean smoothing (window size 3)
df['close'] = df['close'].rolling(window=3, min_periods=1).mean()

# Normalize only 'close' prices
scaler_close = MinMaxScaler()
df['close_scaled'] = scaler_close.fit_transform(df[['close']])

# Sequence preparation
sequence_length = 60
X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(df[['close_scaled']].values[i:i+sequence_length])  # Only close prices
    y.append(df['close_scaled'].values[i+sequence_length])

X, y = np.array(X), np.array(y)

# Save processed data
np.save('/content/drive/MyDrive/mini project/X.npy', X)
np.save('/content/drive/MyDrive/mini project/y.npy', y)
with open('/content/drive/MyDrive/mini project/scaler_close.pkl', 'wb') as f:
    pickle.dump(scaler_close, f)

print("Preprocessed data saved: 'X.npy', 'y.npy', 'scaler_close.pkl'.")


Preprocessed data saved: 'X.npy', 'y.npy', 'scaler_close.pkl'.
