In [13]:
import pandas as pd
import numpy as np
from glob import glob
import os
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [None]:
def load_training_data(folder_path, max_rows=200000):
    all_data = []
    file_count = 0
    
    # Get all CSV files in the folder
    csv_files = glob(os.path.join(folder_path, "*.csv"))
    
    print(f"Found {len(csv_files)} CSV files in the training folder")
    
    for file in csv_files:
        # Read CSV
        print(f"Processing file: {os.path.basename(file)}")
        df = pd.read_csv(file)
        
        # Take only up to max_rows
        df = df.head(max_rows)
        
        # Store the comp_out column
        data = df['Pll_out'].values
        all_data.append(data)
        file_count += 1
        print(f"Processed {file_count}/{len(csv_files)} files")
    
    # Combine all data
    combined_data = np.concatenate(all_data)
    print(f"Total samples loaded: {len(combined_data)}")
    
    # Reshape for MLP input (samples, features)
    return combined_data.reshape(-1, 1)

In [15]:
def create_mlp_model():
    model = Sequential([
        # First Dense layer
        Dense(64, input_shape=(1,), activation='relu'),
        Dropout(0.2),
        
        # Second Dense layer
        Dense(32, activation='relu'),
        Dropout(0.2),
        
        # Output layer
        Dense(1)
    ])
    
    # Compile model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    # Print model summary
    model.summary()
    
    return model

In [16]:
def train_model(train_folder, model_save_path):
    # Load training data
    print("Loading training data...")
    X_train = load_training_data(train_folder)
    y_train = X_train.reshape(-1, 1)  # Reshape targets
    
    print(f"Training data shape: {X_train.shape}")
    
    # Create model
    print("\nCreating MLP model...")
    model = create_mlp_model()
    
    # Define early stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    )
    
    # Train model
    print("\nStarting model training...")
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=1
    )
    
    # Save model
    print(f"\nSaving model to {model_save_path}")
    model.save(model_save_path)
    
    return model, history

In [17]:
# Set paths
train_folder = "E:\\PLL\\train"  # Adjust this path
model_save_path = "E:\\PLL\\forcast\\forcast_with_mlp\\result\\model.keras"  # Adjust this path

# Create output directory if it doesn't exist
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)

# Train the model
print("Starting training process...")
model, history = train_model(train_folder, model_save_path)
print("Training completed!")

# Print final metrics
final_loss = history.history['loss'][-1]
final_val_loss = history.history['val_loss'][-1]
final_mae = history.history['mae'][-1]
final_val_mae = history.history['val_mae'][-1]

print("\nFinal Training Metrics:")
print(f"Loss: {final_loss:.4f}")
print(f"Validation Loss: {final_val_loss:.4f}")
print(f"MAE: {final_mae:.4f}")
print(f"Validation MAE: {final_val_mae:.4f}")

Starting training process...
Loading training data...
Found 90 CSV files in the training folder
Processing file: fastN_fastP_3.3_105.csv
Processed 1/90 files
Processing file: fastN_fastP_3.3_125.csv
Processed 2/90 files
Processing file: fastN_fastP_3.3_45.csv
Processed 3/90 files
Processing file: fastN_fastP_3.3_85.csv
Processed 4/90 files
Processing file: fastN_fastP_3.6_145.csv
Processed 5/90 files
Processing file: fastN_fastP_3.6_165.csv
Processed 6/90 files
Processing file: fastN_fastP_3.6_185.csv
Processed 7/90 files
Processing file: fastN_fastP_3_105.csv
Processed 8/90 files
Processing file: fastN_fastP_3_125.csv
Processed 9/90 files
Processing file: fastN_fastP_3_145.csv
Processed 10/90 files
Processing file: fastN_fastP_3_165.csv
Processed 11/90 files
Processing file: fastN_fastP_3_85.csv
Processed 12/90 files
Processing file: fastN_slowP_3.3_105.csv
Processed 13/90 files
Processing file: fastN_slowP_3.3_125.csv
Processed 14/90 files
Processing file: fastN_slowP_3.3_145.csv
Pro

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Starting model training...
Epoch 1/50
[1m225000/225000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 623us/step - loss: 0.0438 - mae: 0.1521 - val_loss: 0.0029 - val_mae: 0.0414
Epoch 2/50
[1m225000/225000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 629us/step - loss: 0.0366 - mae: 0.1472 - val_loss: 0.0030 - val_mae: 0.0400
Epoch 3/50
[1m225000/225000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 639us/step - loss: 0.0365 - mae: 0.1470 - val_loss: 0.0012 - val_mae: 0.0315
Epoch 4/50
[1m225000/225000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 620us/step - loss: 0.0365 - mae: 0.1468 - val_loss: 0.0012 - val_mae: 0.0295
Epoch 5/50
[1m225000/225000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 612us/step - loss: 0.0365 - mae: 0.1469 - val_loss: 0.0016 - val_mae: 0.0334
Epoch 6/50
[1m225000/225000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 613us/step - loss: 0.0365 - mae: 0.1468 - val_loss: 0.0011 - val_mae: 0.0280


TESTING

In [None]:
import pandas as pd
import numpy as np
from glob import glob
import os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import load_model

def calculate_snr(signal, noise):
    """
    Calculate the Signal-to-Noise Ratio (SNR) in dB.
    """
    signal_power = np.mean(np.square(signal))
    noise_power = np.mean(np.square(noise))
    
    if noise_power == 0:
        return float('inf')  # Avoid division by zero
    
    return 10 * np.log10(signal_power / noise_power)

def analyze_test_file(file_path, model_path, output_folder):
    """
    Analyze a single test file, make predictions, and create visualizations
    """
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Split into actual values
    actual_train = df['Pll_out'][:200000].values
    actual_test = df['Pll_out'][200000:].values
    
    # Prepare data for prediction
    X_test = actual_test.reshape(-1, 1)
    
    # Load model and make predictions
    model = load_model(model_path)
    predictions = model.predict(X_test).flatten()
    
    # Calculate metrics
    mse = mean_squared_error(actual_test, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual_test, predictions)
    r2 = r2_score(actual_test, predictions)
    snr = calculate_snr(actual_test, actual_test - predictions)
    
    # Create visualization
    plt.figure(figsize=(15, 8))
    
    # Plot actual values (first 200000)
    plt.plot(range(len(actual_train)), 
             actual_train, 
             label='Actual (First 200000)', 
             color='blue')
    
    # Plot predictions
    plt.plot(range(len(actual_train), len(actual_train) + len(predictions)), 
             predictions, 
             label='Predictions', 
             color='red') 
             #, linestyle='--')
    
    # Add metrics to plot
    metrics_text = f'Metrics (After 200000):\nRMSE: {rmse:.2f}\nMAE: {mae:.2f}\nR²: {r2:.2f}\nSNR: {snr:.2f} dB'
    plt.text(0.02, 0.98, metrics_text,
             transform=plt.gca().transAxes,
             verticalalignment='top',
             bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    # Customize plot
    plt.title(f'Actual vs Predicted Values\nFile: {os.path.basename(file_path)}')
    plt.xlabel('Row Number')
    plt.ylabel('Pll_out Value')
    plt.legend()
    plt.grid(True)
    
    # Save plot
    plot_filename = os.path.join(output_folder, 
                                f'plot_{os.path.basename(file_path).replace(".csv", ".png")}')
    plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
    plt.close()
    
    # Return metrics
    return {
        'file': os.path.basename(file_path),
        'rmse': rmse,
        'mae': mae,
        'r2': r2,
        'snr': snr
    }

def process_all_test_files(test_folder, model_path, output_folder):
    """
    Process all test files and save results
    """
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Get all CSV files
    test_files = glob(os.path.join(test_folder, "*.csv"))
    
    # Process each file and collect metrics
    all_metrics = []
    for file in test_files:
        print(f"Processing {os.path.basename(file)}...")
        metrics = analyze_test_file(file, model_path, output_folder)
        all_metrics.append(metrics)
    
    # Create metrics summary
    metrics_df = pd.DataFrame(all_metrics)
    metrics_filepath = os.path.join(output_folder, 'metrics_summary.csv')
    metrics_df.to_csv(metrics_filepath, index=False)
    
    # Create summary plot
    plt.figure(figsize=(12, 6))
    files = metrics_df['file']
    x = range(len(files))
    
    plt.subplot(1, 3, 1)
    plt.bar(x, metrics_df['rmse'], alpha=0.6)
    plt.title('RMSE by File')
    plt.xticks(x, files, rotation=45)
    
    plt.subplot(1, 3, 2)
    plt.bar(x, metrics_df['r2'], alpha=0.6)
    plt.title('R² Score by File')
    plt.xticks(x, files, rotation=45)
    
    plt.subplot(1, 3, 3)
    plt.bar(x, metrics_df['snr'], alpha=0.6)
    plt.title('SNR by File')
    plt.xticks(x, files, rotation=45)
    
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, 'metrics_summary.png'), 
                dpi=300, bbox_inches='tight')
    plt.close()

# Usage example for Colab
if __name__ == "__main__":
    # Define paths (adjust these for your Colab setup)
    test_folder = "E:\\PLL\\test"
    model_path = "E:\\PLL\\forcast\\forcast_with_mlp\\result\\model.keras"
    output_folder = 'E:\\PLL\\forcast\\forcast_with_mlp\\result'
    
    # Process all test files
    process_all_test_files(test_folder, model_path, output_folder)

Processing fastN_fastP_3_185.csv...
[1m7292/7292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 281us/step
Processing fastN_slowP_3.6_185.csv...
[1m7292/7292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 277us/step
Processing fastN_slowP_3_25.csv...
[1m7292/7292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 275us/step
Processing slowN_slowP_3.6_185.csv...
[1m7292/7292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 271us/step
Processing typical_3.6_185.csv...
[1m7292/7292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 276us/step
Processing typical_3_5.csv...
[1m7292/7292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 271us/step
