In [5]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Sample data loading function (replace with your actual data)
def load_data():
    # For illustration, replace with your actual data loading logic
    # Simulating data with 1000 samples, 22 features
    X = np.random.rand(1000, 22)
    y = np.random.rand(1000)
    return X, y

# Data preprocessing
def preprocess_data(X, y, timesteps=5):
    X_reshaped = []
    y_reshaped = []
    
    # Create rolling window for timesteps
    for i in range(timesteps, len(X)):
        X_reshaped.append(X[i-timesteps:i])  # Use previous 'timesteps' for prediction
        y_reshaped.append(y[i])  # Use next day as target
    
    X_reshaped = np.array(X_reshaped)
    y_reshaped = np.array(y_reshaped)
    
    # Normalize the data
    feature_scaler = StandardScaler()
    X_reshaped = feature_scaler.fit_transform(X_reshaped.reshape(-1, X_reshaped.shape[2]))
    X_reshaped = X_reshaped.reshape(X_reshaped.shape[0], timesteps, X_reshaped.shape[1] // timesteps)
    
    return X_reshaped, y_reshaped, feature_scaler

# CNN model definition
def create_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(64, 2, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1))  # Regression output

    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Save training and validation loss graph
def save_loss_graph(history, filename="loss_curve.png"):
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(filename)
    plt.close()

# Save prediction vs actual graph
def save_prediction_vs_actual_graph(y_true, y_pred, filename="prediction_vs_actual.png"):
    plt.figure(figsize=(10, 6))
    plt.scatter(y_true, y_pred, color='blue', alpha=0.5)
    plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)], color='red', linestyle='--')
    plt.title('Prediction vs Actual')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.savefig(filename)
    plt.close()

# Main function
def main():
    # Load and preprocess data
    X, y = load_data()
    timesteps = 5  # Number of timesteps (days)
    X_train, y_train, feature_scaler = preprocess_data(X, y, timesteps)
    
    # Split into training and test sets (80-20 split)
    split_index = int(0.8 * len(X_train))
    X_train_split, X_test_split = X_train[:split_index], X_train[split_index:]
    y_train_split, y_test_split = y_train[:split_index], y_train[split_index:]
    
    # Create and train the CNN model
    cnn_model = create_cnn_model(X_train_split.shape[1:])
    history = cnn_model.fit(X_train_split, y_train_split, epochs=10, batch_size=32, validation_data=(X_test_split, y_test_split), verbose=1)
    
    # Save loss graph
    save_loss_graph(history, "training_loss_curve.png")
    
    # Evaluate the model
    test_loss = cnn_model.evaluate(X_test_split, y_test_split)
    print(f"Test loss: {test_loss}")
    
    # Predict on test data
    y_pred = cnn_model.predict(X_test_split)
    print(f"Predictions: {y_pred[:5]}")  # Print first 5 predictions
    
    # Save prediction vs actual graph
    save_prediction_vs_actual_graph(y_test_split, y_pred, "prediction_vs_actual.png")

if __name__ == "__main__":
    main()


ValueError: cannot reshape array of size 109450 into shape (4975,5,4)