In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [27]:
import sqlite3
conn = sqlite3.connect('data.db')
df = pd.read_sql_query("SELECT * FROM final", conn)
conn.close()

In [6]:
temp = df.copy()

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1064000 entries, 0 to 1063999
Data columns (total 29 columns):
 #   Column                            Non-Null Count    Dtype  
---  ------                            --------------    -----  
 0   heating_power_can_kw              1064000 non-null  float64
 1   regenerative_braking_signal_      1064000 non-null  float64
 2   battery_current_a                 1064000 non-null  float64
 3   season                            1064000 non-null  object 
 4   heating_power_lin_w               1064000 non-null  float64
 5   requested_heating_power_w         1064000 non-null  float64
 6   heat_exchanger_temperature_c      1064000 non-null  float64
 7   ambient_temperature_c             1064000 non-null  float64
 8   coolant_temperature_inlet_c       1064000 non-null  float64
 9   throttle_                         1064000 non-null  float64
 10  time_s                            1064000 non-null  float64
 11  displayed_soc_                    106

In [11]:
cat = df.select_dtypes(include=['object']).columns.tolist()
cat += [col for col in df.columns if df[col].nunique() < 20 and col not in cat]

In [29]:
for i in cat :
    df[i] = df[i].astype("category")
    

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1064000 entries, 0 to 1063999
Data columns (total 29 columns):
 #   Column                            Non-Null Count    Dtype   
---  ------                            --------------    -----   
 0   heating_power_can_kw              1064000 non-null  float64 
 1   regenerative_braking_signal_      1064000 non-null  float64 
 2   battery_current_a                 1064000 non-null  float64 
 3   season                            1064000 non-null  category
 4   heating_power_lin_w               1064000 non-null  float64 
 5   requested_heating_power_w         1064000 non-null  float64 
 6   heat_exchanger_temperature_c      1064000 non-null  float64 
 7   ambient_temperature_c             1064000 non-null  float64 
 8   coolant_temperature_inlet_c       1064000 non-null  float64 
 9   throttle_                         1064000 non-null  float64 
 10  time_s                            1064000 non-null  float64 
 11  displayed_soc_          

In [33]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
heating_power_can_kw,1064000.0,1.243407,1.780409,0.0,0.0,0.96,1.48,40.04
regenerative_braking_signal_,1064000.0,0.053306,0.22435,0.0,0.0,0.0,0.0,1.0
battery_current_a,1064000.0,-19.437151,42.263547,-404.38,-32.06,-11.89,-2.1,144.49
heating_power_lin_w,1064000.0,1208.232658,1603.019565,0.0,13.236014,977.938033,1546.483905,38870.52
requested_heating_power_w,1064000.0,1188.895774,1806.053274,0.0,0.0,880.0,1440.0,38527.75
heat_exchanger_temperature_c,1064000.0,28.848711,16.039365,5.0,12.0,35.0,41.5,65.05
ambient_temperature_c,1064000.0,13.220534,9.70503,-3.5,5.0,9.5,22.0,33.5
coolant_temperature_inlet_c,1064000.0,45.725855,11.381492,-1.0,45.028231,46.3122,50.0,70.0
throttle_,1064000.0,28.593189,18.536805,0.0,12.5,33.55,43.18,135.25
time_s,1064000.0,959.420784,750.193432,0.0,381.6,793.1,1336.9,3821.9


In [36]:
df.columns

Index(['heating_power_can_kw', 'regenerative_braking_signal_',
       'battery_current_a', 'season', 'heating_power_lin_w',
       'requested_heating_power_w', 'heat_exchanger_temperature_c',
       'ambient_temperature_c', 'coolant_temperature_inlet_c', 'throttle_',
       'time_s', 'displayed_soc_', 'motor_torque_nm', 'heater_voltage_v',
       'coolant_temperature_heatercore_c', 'requested_coolant_temperature_c',
       'battery_temperature_c', 'elevation_m', 'battery_voltage_v',
       'heater_current_a', 'aircon_power_kw', 'soc_', 'velocity_kmh',
       'longitudinal_acceleration_ms2', 'heater_signal', 'session_id',
       'max_battery_temperature_c', 'cabin_temperature_sensor_c', 'weather'],
      dtype='object')

In [12]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)



In [15]:
import subprocess
import time
def monitor_gpu(interval=1):
    """Continuously monitor GPU usage using nvidia-smi"""
    try:
        while True:
            # Clear previous output
            display.clear_output(wait=True)
            
            # Run nvidia-smi and print output
            result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
            print(result.stdout.decode('utf-8'))
            
            # Wait for specified interval
            time.sleep(interval)
    except KeyboardInterrupt:
        print("GPU monitoring stopped.")

In [28]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.integration.pytorch_lightning import TuneReportCallback
import os

# Hardware configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.set_float32_matmul_precision('high')
plt.style.use('seaborn-v0_8')

In [29]:
class EVDataset(Dataset):
    def __init__(self, sequences, targets):
        self.X = torch.tensor(sequences, dtype=torch.float32).to(device)
        self.y = torch.tensor(targets, dtype=torch.float32).to(device)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [30]:
class SOCPredictor(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.save_hyperparameters()
        
        self.lstm = nn.LSTM(
            input_size=config['input_size'],
            hidden_size=config['hidden_size'],
            num_layers=config['num_layers'],
            batch_first=True,
            dropout=0.3 if config['num_layers'] > 1 else 0
        )
        self.fc = nn.Sequential(
            nn.Linear(config['hidden_size'], 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        self.loss_fn = nn.MSELoss()
        self.optimizer_name = config['optimizer']
        self.lr = config['lr']
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = self.loss_fn(y_pred, y.unsqueeze(1))
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = self.loss_fn(y_pred, y.unsqueeze(1))
        self.log('val_loss', loss, prog_bar=True)
        return loss
    
    def configure_optimizers(self):
        if self.optimizer_name == 'adam':
            return optim.Adam(self.parameters(), lr=self.lr)
        elif self.optimizer_name == 'sgd':
            return optim.SGD(self.parameters(), lr=self.lr, momentum=0.9)
        elif self.optimizer_name == 'rmsprop':
            return optim.RMSprop(self.parameters(), lr=self.lr)

In [31]:
def prepare_data(df, sequence_length=60, target_col='soc_'):
    df = pd.get_dummies(df, columns=['season', 'weather','heater_signal'], drop_first=True)
    df = df.sort_values(['session_id', 'time_s'])
    
    sequences = []
    targets = []
    numeric_cols = [col for col in df.columns 
                   if col not in [target_col, 'session_id', 'time_s'] 
                   and np.issubdtype(df[col].dtype, np.number)]
    
    for session in df['session_id'].unique():
        session_data = df[df['session_id'] == session]
        if len(session_data) <= sequence_length:
            continue
            
        scaler = StandardScaler()
        session_data[numeric_cols] = scaler.fit_transform(session_data[numeric_cols])
        
        for i in range(len(session_data) - sequence_length):
            seq = session_data.iloc[i:i+sequence_length][numeric_cols].values
            target = session_data.iloc[i+sequence_length][target_col]
            sequences.append(seq)
            targets.append(target)
    
    return np.array(sequences, dtype=np.float32), np.array(targets, dtype=np.float32)

In [32]:
def tune_model(config, sequences, targets):
    X_train, X_val, y_train, y_val = train_test_split(
        sequences, targets, test_size=0.2, random_state=42
    )
    
    train_loader = DataLoader(
        EVDataset(X_train, y_train),
        batch_size=config['batch_size'],
        shuffle=True,
        num_workers=4,
        pin_memory=True
    )
    val_loader = DataLoader(
        EVDataset(X_val, y_val),
        batch_size=config['batch_size'],
        num_workers=4,
        pin_memory=True
    )
    
    trainer = pl.Trainer(
        max_epochs=50,
        accelerator='gpu',
        devices=1,
        callbacks=[
            EarlyStopping(monitor='val_loss', patience=10),
            TuneReportCallback({'val_loss': 'val_loss'}, on='validation_end')
        ],
        enable_progress_bar=False
    )
    
    trainer.fit(SOCPredictor(config), train_loader, val_loader)

In [33]:
def plot_results(optimizer_results, dpi=600):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    for opt_name, results in optimizer_results.items():
        ax1.plot(results['train_loss'], label=f'{opt_name} Train')
        ax1.plot(results['val_loss'], '--', label=f'{opt_name} Val')
        ax2.bar(opt_name, results['final_mae'], label=opt_name)
    
    ax1.set_title('Training/Validation Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True)
    
    ax2.set_title('Final Test MAE Comparison')
    ax2.set_ylabel('MAE')
    ax2.grid(True)
    
    plt.tight_layout()
    plt.savefig('optimizer_comparison.png', dpi=dpi, bbox_inches='tight')
    plt.close()
    

In [34]:
if __name__ == '__main__':
    # 1. Data Preparation
    try:
        sequences, targets = prepare_data(df)
        input_size = sequences.shape[2]
        print(f"Data prepared successfully. Input size: {input_size}")
    except Exception as e:
        print(f"Data preparation failed: {str(e)}")
        raise

    # 2. Hyperparameter Search Setup
    config = {
        'input_size': input_size,
        'hidden_size': tune.choice([64, 128, 256]),
        'num_layers': tune.choice([1, 2, 3]),
        'batch_size': tune.choice([128, 256, 512]),
        'lr': tune.loguniform(1e-4, 1e-2),
        'optimizer': tune.grid_search(['adam', 'sgd', 'rmsprop'])
    }

    # 3. Initialize Ray Tune
    scheduler = ASHAScheduler(
        max_t=50,
        grace_period=10,
        reduction_factor=2
    )
    
    reporter = CLIReporter(
        parameter_columns=['optimizer', 'lr', 'batch_size', 'hidden_size', 'num_layers'],
        metric_columns=['val_loss', 'training_iteration']
    )

    # 4. Run Hyperparameter Optimization
    try:
        print("Starting hyperparameter optimization...")
        analysis = tune.run(
            tune.with_parameters(tune_model, sequences=sequences, targets=targets),
            resources_per_trial={'gpu': 1, 'cpu': 4},
            config=config,
            num_samples=15,  # Increased for better coverage
            scheduler=scheduler,
            progress_reporter=reporter,
            name='soc_prediction_tune',
            local_dir='./ray_results',
            raise_on_failed_trial=False
        )
    except Exception as e:
        print(f"Hyperparameter search failed: {str(e)}")
        raise

    # 5. Final Model Training
    optimizer_results = {}
    try:
        for opt_name in ['adam', 'sgd', 'rmsprop']:
            print(f"\nTraining final {opt_name} model...")
            
            # Get best config for current optimizer
            best_trial = analysis.get_best_trial(
                metric='val_loss',
                mode='min',
                scope='all',
                filter=lambda t: t.config['optimizer'] == opt_name
            )
            
            if not best_trial:
                print(f"No best trial found for {opt_name}, skipping...")
                continue
                
            best_config = best_trial.config
            print(f"Best config for {opt_name}: {best_config}")

            # Setup logger and callbacks
            logger = CSVLogger(
                save_dir='logs',
                name=opt_name,
                version=f'best_{opt_name}'
            )
            
            checkpoint_callback = ModelCheckpoint(
                monitor='val_loss',
                dirpath=f'checkpoints/{opt_name}',
                filename='best-{epoch}-{val_loss:.2f}',
                save_top_k=1
            )

            # Initialize trainer
            trainer = pl.Trainer(
                max_epochs=100,
                accelerator='gpu',
                devices=1,
                logger=logger,
                callbacks=[
                    EarlyStopping(monitor='val_loss', patience=15, mode='min'),
                    checkpoint_callback
                ],
                log_every_n_steps=10,
                deterministic=True
            )

            # Data loaders with best batch size
            train_loader = DataLoader(
                EVDataset(*train_test_split(sequences, targets, test_size=0.2)),
                batch_size=best_config['batch_size'],
                shuffle=True,
                num_workers=4,
                pin_memory=True,
                persistent_workers=True
            )
            
            val_loader = DataLoader(
                EVDataset(*train_test_split(sequences, targets, test_size=0.2)),
                batch_size=best_config['batch_size'],
                num_workers=4,
                pin_memory=True,
                persistent_workers=True
            )

            # Model initialization and training
            model = SOCPredictor(best_config)
            trainer.fit(model, train_loader, val_loader)

            # Load best checkpoint
            best_model_path = checkpoint_callback.best_model_path
            if best_model_path:
                model = SOCPredictor.load_from_checkpoint(best_model_path)
                print(f"Loaded best model from {best_model_path}")

            # Evaluation
            test_loader = DataLoader(
                EVDataset(*train_test_split(sequences, targets, test_size=0.2)),
                batch_size=best_config['batch_size'],
                num_workers=4
            )
            
            results = trainer.test(model, test_loader)
            test_loss = results[0]['test_loss'] if results else float('nan')

            # Store results
            history = pd.read_csv(f'{logger.log_dir}/metrics.csv')
            optimizer_results[opt_name] = {
                'train_loss': history['train_loss'].dropna().values,
                'val_loss': history['val_loss'].dropna().values,
                'test_loss': test_loss,
                'config': best_config,
                'model_path': best_model_path
            }

            # Save final model
            torch.save(model.state_dict(), f'models/soc_predictor_{opt_name}.pth')
            print(f"Saved {opt_name} model to models/soc_predictor_{opt_name}.pth")

    except Exception as e:
        print(f"Final training failed: {str(e)}")
        raise

    # 6. Results Analysis and Saving
    try:
        # Create directories if they don't exist
        os.makedirs('results', exist_ok=True)
        os.makedirs('plots', exist_ok=True)

        # Save results
        results_df = pd.DataFrame.from_dict(
            {k: {**v, 'config': str(v['config'])} for k, v in optimizer_results.items()},
            orient='index'
        )
        results_df.to_csv('results/optimizer_comparison.csv', index_label='optimizer')
        
        # Save best configs
        best_configs_df = pd.DataFrame(
            {k: v['config'] for k, v in optimizer_results.items()}
        ).T
        best_configs_df.to_csv('results/best_configs.csv')

        # Generate and save plots
        plot_results(optimizer_results, dpi=600)
        plt.savefig('plots/optimizer_comparison.png', bbox_inches='tight')
        plt.close()

        print("\nOptimization completed successfully!")
        print("Results saved in results/ directory")
        print("Plots saved in plots/ directory")

    except Exception as e:
        print(f"Results saving failed: {str(e)}")
        raise

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  session_data[numeric_cols] = scaler.fit_transform(session_data[numeric_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  session_data[numeric_cols] = scaler.fit_transform(session_data[numeric_cols])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  session_data[numeric_cols] = scaler.fit_transform

Data prepared successfully. Input size: 23
Starting hyperparameter optimization...
Hyperparameter search failed: The `local_dir` argument is deprecated. You should set the `storage_path` instead. See the docs: https://docs.ray.io/en/latest/train/user-guides/persistent-storage.html#setting-the-local-staging-directory


DeprecationWarning: The `local_dir` argument is deprecated. You should set the `storage_path` instead. See the docs: https://docs.ray.io/en/latest/train/user-guides/persistent-storage.html#setting-the-local-staging-directory