In [19]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from sklearn.preprocessing import MinMaxScaler

# Load and preprocess data
df = pd.read_csv('kafka_crypto_data.csv', index_col='atl_date', parse_dates=True)

# Ensure 'current_price' column exists
data_column = 'current_price'
if data_column not in df.columns:
    raise ValueError(f"Column '{data_column}' not found in the DataFrame")

# Extract data
data = df[data_column].values

# Print initial data info
print("Initial Data Type:", type(data))
print("Data Sample:", data[:5])  # Print first few values for inspection

# Convert to numpy array and ensure it’s float32
data = np.array(data, dtype=np.float32)
print("Converted Data Type:", data.dtype)
print("Data Shape:", data.shape)

# Reshape data to 2D
data = data.reshape(-1, 1)

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Check normalized data
print("Scaled Data Shape:", scaled_data.shape)
print("Scaled Data Sample:", scaled_data[:5])  # Print first few values

# Prepare training data
X_train, y_train = [], []
for i in range(60, len(scaled_data)):
    X_train.append(scaled_data[i-60:i, 0])
    y_train.append(scaled_data[i, 0])

# Convert to numpy arrays
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.float32)

# Print shapes and types
print("X_train Type:", X_train.dtype)
print("y_train Type:", y_train.dtype)
print("X_train Shape:", X_train.shape)
print("y_train Shape:", y_train.shape)

# Reshape X_train for LSTM [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# Build LSTM model
model = Sequential()
model.add(Input(shape=(X_train.shape[1], 1)))  # Define the input shape
model.add(LSTM(units=50, return_sequences=True))
model.add(LSTM(units=50))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=1, batch_size=1)

# Make predictions
predictions = model.predict(X_train)

# Check predictions type
print("Predictions Type:", type(predictions))
print("Predictions Shape:", predictions.shape)

# Inverse transform predictions
predictions = scaler.inverse_transform(predictions)

# Save predictions to a CSV file
predictions_df = pd.DataFrame(predictions, columns=['Predicted_Price'])
predictions_df.to_csv('lstm_predictions.csv', index=False)


Initial Data Type: <class 'numpy.ndarray'>
Data Sample: [5.78660e+04 2.45939e+03 9.99286e-01 5.16080e+02 1.32130e+02]
Converted Data Type: float32
Data Shape: (4600,)
Scaled Data Shape: (4600, 1)
Scaled Data Sample: [[9.9383420e-01]
 [4.2239413e-02]
 [1.7162474e-05]
 [8.8635469e-03]
 [2.2693002e-03]]
X_train Type: float32
y_train Type: float32
X_train Shape: (4540, 60)
y_train Shape: (4540,)


ValueError: object __array__ method not producing an array