In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error

data_dirs = [
    'dataset/UL-NCA',
    'dataset/UL-NCM',
    'dataset/UL-NCMNCA'
]
output_dir = 'results'
os.makedirs(output_dir, exist_ok=True)

degree = 3
window_size = 50
prediction_steps = 50
step_size = 1
all_results = []

def polynomial_fit_and_predict(X, y, degree=3):
    poly = PolynomialFeatures(degree=degree)
    X_poly = poly.fit_transform(X)
    model = LinearRegression()
    model.fit(X_poly, y)
    return model, poly

for data_dir in data_dirs:
    print(f"\nProcessing directory: {data_dir}")
    
    for filename in os.listdir(data_dir):
        if filename.endswith('.csv'):
            filepath = os.path.join(data_dir, filename)
            print(f"\nProcessing {filename}...")
            
            try:
                data = pd.read_csv(filepath)
                discharge_capacity = data['Discharge_Capacity'].values / 1000
                total_cycles = len(discharge_capacity)
                
                all_predictions = []
                actual_values = []
                
                for start_idx in range(0, total_cycles - window_size - prediction_steps + 1, step_size):
                    window_data = discharge_capacity[start_idx:start_idx+window_size]
                    future_data = discharge_capacity[start_idx+window_size:start_idx+window_size+prediction_steps]
                    
                    X = np.arange(start_idx, start_idx+window_size).reshape(-1, 1)
                    y = window_data
                    
                    model, poly = polynomial_fit_and_predict(X, y, degree=degree)
                    future_X = np.arange(start_idx+window_size, start_idx+window_size+prediction_steps).reshape(-1, 1)
                    future_X_poly = poly.transform(future_X)
                    predicted = model.predict(future_X_poly)
                    
                    all_predictions.append(predicted)
                    actual_values.append(future_data)
                
                all_predictions = np.array(all_predictions)
                actual_values = np.array(actual_values)
                
                mape = mean_absolute_percentage_error(actual_values.flatten(), all_predictions.flatten()) * 100
                rmse = np.sqrt(np.mean((actual_values - all_predictions)**2))
                
                print(f"File: {filename} | MAPE: {mape:.2f}% | RMSE: {rmse:.6f}")
                
                all_results.append({
                    'dataset': os.path.basename(os.path.dirname(data_dir)),
                    'filename': filename,
                    'mape': mape,
                    'rmse': rmse,
                    'predictions': all_predictions,
                    'actual': actual_values
                })
                
                plt.figure(figsize=(12, 6))
                
                plt.plot(np.arange(total_cycles), discharge_capacity, 'k-', linewidth=2, label='True Capacity')

                for i in range(all_predictions.shape[0]):
                    start_cycle = i * step_size + window_size
                    pred_cycles = np.arange(start_cycle, start_cycle + prediction_steps)
                    plt.plot(pred_cycles, all_predictions[i], 'r-', alpha=0.1, linewidth=0.5)
                
                mean_pred = np.mean(all_predictions, axis=0)
                mean_start = (all_predictions.shape[0]-1)*step_size + window_size
                mean_cycles = np.arange(mean_start, mean_start + prediction_steps)
                plt.plot(mean_cycles, mean_pred, 'b--', linewidth=2, 
                         label=f'Mean Prediction (MAPE: {mape:.2f}%)')
                
                plt.axvspan(total_cycles - window_size - prediction_steps, 
                            total_cycles - prediction_steps, 
                            color='yellow', alpha=0.1, label='Last Input Window')
                plt.title(f'Polynomial Fit (deg={degree}) - {os.path.basename(data_dir)} - {filename}')
                plt.xlabel('Cycle Number')
                plt.ylabel('Discharge Capacity (Normalized)')
                plt.legend()
                plt.grid(True, alpha=0.3)
                
                dataset_name = os.path.basename(data_dir)
                os.makedirs(os.path.join(output_dir, dataset_name), exist_ok=True)
                output_path = os.path.join(output_dir, dataset_name, f'polyfit_{filename.replace(".csv", ".png")}')
                plt.savefig(output_path, dpi=300, bbox_inches='tight')
                plt.close()
                
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")
                continue

if all_results:
    datasets = set(result['dataset'] for result in all_results)
    for dataset in datasets:
        dataset_results = [result for result in all_results if result['dataset'] == dataset]
        avg_mape = np.mean([result['mape'] for result in dataset_results])
        avg_rmse = np.mean([result['rmse'] for result in dataset_results])
        print(f"\nDataset {dataset} (n={len(dataset_results)}):")
        print(f"Average MAPE: {avg_mape:.2f}%")
        print(f"Average RMSE: {avg_rmse:.6f}")

    total_mape = np.mean([result['mape'] for result in all_results])
    total_rmse = np.mean([result['rmse'] for result in all_results])
    
    print("\n" + "="*50)
    print(f"Overall Results (averaged over {len(all_results)} files from {len(data_dirs)} datasets):")
    print(f"Average MAPE: {total_mape:.2f}%")
    print(f"Average RMSE: {total_rmse:.6f}")
    summary_df = pd.DataFrame(all_results)
    summary_df.to_csv(os.path.join(output_dir, 'summary_results.csv'), index=False)

    stats_df = summary_df.groupby('dataset').agg({'mape': ['mean', 'std'], 'rmse': ['mean', 'std']})
    stats_df.to_csv(os.path.join(output_dir, 'dataset_stats.csv'))
else:
    print("No valid files processed.")

print("\nProcessing completed.")


Processing directory: dataset/UL-NCA

Processing CY25-025_1-#1.csv...
File: CY25-025_1-#1.csv | MAPE: 4.33% | RMSE: 0.354796

Processing CY25-025_1-#2.csv...
File: CY25-025_1-#2.csv | MAPE: 0.18% | RMSE: 0.009881

Processing CY25-025_1-#3.csv...
File: CY25-025_1-#3.csv | MAPE: 0.50% | RMSE: 0.022681

Processing CY25-025_1-#4.csv...
File: CY25-025_1-#4.csv | MAPE: 0.47% | RMSE: 0.020662

Processing CY25-025_1-#5.csv...
File: CY25-025_1-#5.csv | MAPE: 0.47% | RMSE: 0.020112

Processing CY25-025_1-#6.csv...
File: CY25-025_1-#6.csv | MAPE: 0.67% | RMSE: 0.034586

Processing CY25-025_1-#7.csv...
File: CY25-025_1-#7.csv | MAPE: 0.73% | RMSE: 0.035656

Processing CY25-05_1-#1.csv...
File: CY25-05_1-#1.csv | MAPE: 2.04% | RMSE: 0.088761

Processing CY25-05_1-#10.csv...
File: CY25-05_1-#10.csv | MAPE: 0.58% | RMSE: 0.041239

Processing CY25-05_1-#11.csv...
File: CY25-05_1-#11.csv | MAPE: 1.19% | RMSE: 0.058229

Processing CY25-05_1-#12.csv...
File: CY25-05_1-#12.csv | MAPE: 1.31% | RMSE: 0.068