In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import MinMaxScaler

# Load dataset, skipping first two rows
df = pd.read_csv('/content/drive/MyDrive/mini project/stock_data.csv', skiprows=2)

# Manually set column names
df.columns = ['ticker', 'date', 'open', 'high', 'low', 'close', 'volume',
              'open_jnj', 'high_jnj', 'low_jnj', 'close_jnj', 'volume_jnj',
              'open_nke', 'high_nke', 'low_nke', 'close_nke', 'volume_nke']

# Convert date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Select AAPL stock for now (modify for JNJ or NKE)
df = df[['date', 'open', 'high', 'low', 'close', 'volume']]

# Convert numeric columns
numeric_columns = ['open', 'high', 'low', 'close', 'volume']
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert to float

# Drop NaN values
df = df.dropna()

# Normalize features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[numeric_columns])

# Create sequences for LSTM
sequence_length = 50
X, y = [], []
for i in range(len(scaled_data) - sequence_length):
    X.append(scaled_data[i:i+sequence_length])
    y.append(scaled_data[i+sequence_length, 3])  # Predicting 'Close' price

X, y = np.array(X), np.array(y)

# Save processed data
np.save('X.npy', X)
np.save('y.npy', y)
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

print("Preprocessed data saved: 'X.npy', 'y.npy', 'scaler.pkl'.")


Preprocessed data saved: 'X.npy', 'y.npy', 'scaler.pkl'.
