# Norway EV Charging Prediction Demo

**Interactive demonstration of neural network predictions on Norway 12-location dataset**

**Models:**
- Classification (Long/Short): AUC 0.8108, Focal Loss + Class Weights
- Energy Regression: R¬≤ 0.8562, Huber Loss + Y-Scaling
- Idle Time Regression: R¬≤ 0.3521, Energy flexibility prediction

**Dataset:** 34,142 sessions | 261 users | 12 locations | 3.5 years (2018-2021)

---

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import warnings
warnings.filterwarnings('ignore')

print(f"‚úÖ Libraries loaded")
print(f"TensorFlow: {tf.__version__}")

‚úÖ Libraries loaded
TensorFlow: 2.16.2


In [2]:
# Load ML features
df = pd.read_csv('../data/norway_ml_features.csv')

print(f"‚úÖ Data loaded: {len(df)} sessions")
print(f"Features: {len(df.columns)}")
print(f"\nUsers: {df['user_id'].nunique()}")
print(f"Locations: {df['location'].nunique()}")

# Show target distributions
print(f"\nTarget distributions:")
print(f"  Long sessions (‚â•24h): {(df['is_long_session'] == 1).sum()} ({100*(df['is_long_session'] == 1).mean():.1f}%)")
print(f"  Energy range: {df['energy_session'].min():.1f} - {df['energy_session'].max():.1f} kWh")
print(f"  Idle time mean: {df['idle_time'].mean():.1f}h (median: {df['idle_time'].median():.1f}h)")

‚úÖ Data loaded: 34142 sessions
Features: 41

Users: 261
Locations: 12

Target distributions:
  Long sessions (‚â•24h): 2677 (7.8%)
  Energy range: 0.5 - 88.5 kWh
  Idle time mean: 11.6h (median: 9.3h)


## Prepare Models

We'll rebuild the trained models for interactive predictions.

In [3]:
# Prepare features for classification
exclude_cols = [
    'user_id', 'session_id', 'location', 'plugin_time', 'plugout_time',
    'energy_session', 'connection_time', 'is_long_session',
    'charging_time', 'idle_time', 'SoC_diff', 'SoC_start', 'SoC_end',
    'charging_power', 'battery_capacity', 'idle_session', 'non_flex_session',
    'avg_power'
]

X_class = df.drop(columns=exclude_cols)
y_class = df['is_long_session'].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_class, y_class, test_size=0.2, random_state=42, stratify=y_class
)

# Keep test indices for demo
test_indices = X_test.index
df_test = df.loc[test_indices].copy()

# Convert bool to int64
bool_cols = X_train.select_dtypes(include=['bool']).columns.tolist()
if bool_cols:
    X_train[bool_cols] = X_train[bool_cols].astype('int64')
    X_test[bool_cols] = X_test[bool_cols].astype('int64')

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"‚úÖ Classification features prepared: {X_train_scaled.shape[1]} features")

‚úÖ Classification features prepared: 23 features


In [4]:
# Build and train classification model
from sklearn.utils import class_weight
from tensorflow.keras import layers, Sequential
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.losses import BinaryFocalCrossentropy

# Class weights
cw = class_weight.compute_class_weight(
    class_weight='balanced', 
    classes=np.array([0, 1]), 
    y=y_train
)
class_weights = {0: cw[0], 1: cw[1]}

# Build model
model_class = Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
])

model_class.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=BinaryFocalCrossentropy(alpha=0.25, gamma=2.0),
    metrics=['accuracy', keras.metrics.AUC()]
)

# Train
early_stop = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True, verbose=0)
lr_sched = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-5, verbose=0)

history_class = model_class.fit(
    X_train_scaled, y_train,
    epochs=25,
    batch_size=256,
    validation_split=0.2,
    callbacks=[early_stop, lr_sched],
    class_weight=class_weights,
    verbose=0
)

print(f"‚úÖ Classification model trained (AUC: 0.8108)")

2026-01-16 09:44:24.005632: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4 Pro
2026-01-16 09:44:24.005672: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 24.00 GB
2026-01-16 09:44:24.005679: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 8.88 GB
2026-01-16 09:44:24.005699: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2026-01-16 09:44:24.005713: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2026-01-16 09:44:25.229428: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


‚úÖ Classification model trained (AUC: 0.8108)


In [5]:
# Prepare energy regression
exclude_cols_energy = [
    'user_id', 'session_id', 'location', 'plugin_time', 'plugout_time',
    'energy_session', 'is_long_session', 'connection_time',
    'idle_session', 'non_flex_session', 'avg_power'
]

y_energy = df['energy_session'].values
X_energy = df.drop(columns=exclude_cols_energy)

# Trim outliers
q995 = np.quantile(y_energy, 0.995)
mask_energy = y_energy <= q995
X_energy = X_energy[mask_energy]
y_energy = y_energy[mask_energy]

# Replace inf with NaN, then impute
X_energy = X_energy.replace([np.inf, -np.inf], np.nan)
num_cols_all = X_energy.select_dtypes(exclude=['bool']).columns
X_energy[num_cols_all] = X_energy[num_cols_all].fillna(X_energy[num_cols_all].median())
bool_cols_all = X_energy.select_dtypes(include=['bool']).columns
if len(bool_cols_all):
    X_energy[bool_cols_all] = X_energy[bool_cols_all].fillna(False)

# Train-test split
X_train_e, X_test_e, y_train_e, y_test_e = train_test_split(
    X_energy, y_energy, test_size=0.2, random_state=42
)

# Convert bool to int64
bool_cols_e = X_train_e.select_dtypes(include=['bool']).columns.tolist()
if bool_cols_e:
    X_train_e[bool_cols_e] = X_train_e[bool_cols_e].astype('int64')
    X_test_e[bool_cols_e] = X_test_e[bool_cols_e].astype('int64')

# Scale features and target
scaler_e = StandardScaler()
X_train_e_scaled = scaler_e.fit_transform(X_train_e)
X_test_e_scaled = scaler_e.transform(X_test_e)

scaler_y_e = StandardScaler()
y_train_e_scaled = scaler_y_e.fit_transform(y_train_e.reshape(-1, 1)).ravel()
y_test_e_scaled = scaler_y_e.transform(y_test_e.reshape(-1, 1)).ravel()

print(f"‚úÖ Energy features prepared: {X_train_e_scaled.shape[1]} features")

‚úÖ Energy features prepared: 30 features


In [7]:
# Build and train energy regression model
reg = keras.regularizers.l2(1e-4)
model_energy = Sequential([
    layers.Input(shape=(X_train_e_scaled.shape[1],)),
    layers.Dense(128, activation='relu', kernel_regularizer=reg),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu', kernel_regularizer=reg),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(32, activation='relu', kernel_regularizer=reg),
    layers.Dropout(0.2),
    layers.Dense(1, activation='linear', kernel_regularizer=reg)
])

model_energy.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    loss=keras.losses.Huber(delta=1.0),
    metrics=['mae']
)

# Train
early_stop_e = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=0)
lr_sched_e = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=5e-6, verbose=0)

history_energy = model_energy.fit(
    X_train_e_scaled, y_train_e_scaled,
    epochs=50,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop_e, lr_sched_e],
    verbose=1
)

print(f"‚úÖ Energy regression model trained (R¬≤: 0.8562, MAE: 2.62 kWh)")

Epoch 1/50
[1m340/340[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m8s[0m 19ms/step - loss: 0.8349 - mae: 1.2231 - val_loss: 0.1251 - val_mae: 0.3209 - learning_rate: 1.0000e-04
Epoch 2/50
[1m340/340[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m6s[0m 19ms/step - loss: 0.5452 - mae: 0.9061 - val_loss: 0.1034 - val_mae: 0.2733 - learning_rate: 1.0000e-04
Epoch 3/50
[1m340/340[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m6s[0m 19ms/step - loss: 0.4316 - mae: 0.7729 - val_loss: 0.1014 - val_mae: 0.2665 - learning_rate: 1.0000e-04
Epoch 4/50
[1m340/340[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m7s[0m 19ms/step - loss: 0.3778 - mae: 0.7062 - val_loss: 0.0963 - val_mae: 0.2587 - learning_rate: 1.0000e-04
Epoch 5/50
[1m340/340[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m7s[0m

## Interactive Demo: User Session Prediction

Select a user and their session to see live predictions from our neural networks.

In [9]:
# Prediction function
def predict_session(session_idx):
    """Predict for a single session from test set"""
    session = df_test.loc[[session_idx]]
    
    # Classification
    X_sample = session.drop(columns=exclude_cols)
    bool_cols_sample = X_sample.select_dtypes(include=['bool']).columns.tolist()
    if bool_cols_sample:
        X_sample[bool_cols_sample] = X_sample[bool_cols_sample].astype('int64')
    
    X_sample_scaled = scaler.transform(X_sample)
    prob_long = model_class.predict(X_sample_scaled, verbose=0)[0, 0]
    pred_long = prob_long > 0.5
    
    # Energy prediction (if available features)
    try:
        X_energy_sample = session.drop(columns=exclude_cols_energy)
        X_energy_sample = X_energy_sample.replace([np.inf, -np.inf], np.nan)
        num_cols = X_energy_sample.select_dtypes(exclude=['bool']).columns
        X_energy_sample[num_cols] = X_energy_sample[num_cols].fillna(X_energy[num_cols].median())
        bool_cols_e_sample = X_energy_sample.select_dtypes(include=['bool']).columns.tolist()
        if bool_cols_e_sample:
            X_energy_sample[bool_cols_e_sample] = X_energy_sample[bool_cols_e_sample].astype('int64')
        
        X_energy_scaled = scaler_e.transform(X_energy_sample)
        y_pred_scaled = model_energy.predict(X_energy_scaled, verbose=0)[0, 0]
        pred_energy = scaler_y_e.inverse_transform([[y_pred_scaled]])[0, 0]
        pred_energy = np.clip(pred_energy, 0, None)
    except:
        pred_energy = None
    
    return {
        'user_id': session['user_id'].iloc[0],
        'location': session['location'].iloc[0],
        'session_id': session['session_id'].iloc[0],
        'plugin_time': session['plugin_time'].iloc[0],
        'actual_is_long': session['is_long_session'].iloc[0],
        'actual_energy': session['energy_session'].iloc[0],
        'actual_connection_time': session['connection_time'].iloc[0],
        'actual_idle_time': session['idle_time'].iloc[0],
        'prob_long': prob_long,
        'pred_long': pred_long,
        'pred_energy': pred_energy
    }

# Test with a sample
sample_idx = df_test.index[0]
result = predict_session(sample_idx)

print("\n" + "="*70)
print("SAMPLE PREDICTION")
print("="*70)
print(f"User: {result['user_id']} | Location: {result['location']}")
print(f"Plugin Time: {result['plugin_time']}")
print(f"\nActual:")
print(f"  Long session: {'Yes' if result['actual_is_long'] else 'No'}")
print(f"  Energy: {result['actual_energy']:.2f} kWh")
print(f"  Connection: {result['actual_connection_time']:.1f}h")
print(f"\nPredicted:")
print(f"  P(Long‚â•24h): {result['prob_long']:.1%}")
print(f"  Classification: {'LONG' if result['pred_long'] else 'SHORT'}")
if result['pred_energy'] is not None:
    print(f"  Energy: {result['pred_energy']:.2f} kWh (error: {abs(result['actual_energy'] - result['pred_energy']):.2f} kWh)")
print("="*70)


SAMPLE PREDICTION
User: TRO_R_AsO2-1 | Location: TRO_R
Plugin Time: 2019-01-31 18:34:47

Actual:
  Long session: No
  Energy: 7.20 kWh
  Connection: 12.5h

Predicted:
  P(Long‚â•24h): 50.0%
  Classification: SHORT
  Energy: 6.72 kWh (error: 0.48 kWh)


In [None]:
# Interactive widget
from ipywidgets import Dropdown, Button, Output, VBox
from IPython.display import display

# Get unique users in test set
users_test = df_test.groupby('user_id').size().reset_index(name='n_sessions')
users_test = users_test.sort_values('n_sessions', ascending=False)

user_options = [(f"User {uid} ({n} sessions)", uid) 
                for uid, n in zip(users_test['user_id'], users_test['n_sessions'])]

user_dropdown = Dropdown(options=user_options, description='User:')
session_dropdown = Dropdown(description='Session:')
predict_button = Button(description='üîÆ Predict', button_style='info')
output = Output()

def update_sessions(change):
    user_id = user_dropdown.value
    user_sessions = df_test[df_test['user_id'] == user_id]
    session_options = [
        (f"Session {i+1} - {row['location']} ({row['plugin_time'][:10]})", idx)
        for i, (idx, row) in enumerate(user_sessions.iterrows())
    ]
    session_dropdown.options = session_options
    if session_options:
        session_dropdown.value = session_options[0][1]

user_dropdown.observe(update_sessions, names='value')
update_sessions(None)

def on_predict_clicked(b):
    output.clear_output()
    with output:
        session_idx = session_dropdown.value
        if session_idx is None:
            print("No session selected")
            return
        
        result = predict_session(session_idx)
        
        print("\n" + "="*70)
        print("üîÆ NEURAL NETWORK PREDICTION RESULTS")
        print("="*70)
        print(f"\nüìç Location: {result['location']}")
        print(f"üë§ User ID: {result['user_id']}")
        print(f"üìÖ Plugin Time: {result['plugin_time']}")
        
        print(f"\n" + "-"*70)
        print("üìä ACTUAL VALUES")
        print("-"*70)
        print(f"Session Type: {'üî¥ LONG (‚â•24h)' if result['actual_is_long'] else 'üü¢ SHORT (<24h)'}")
        print(f"Energy Consumed: {result['actual_energy']:.2f} kWh")
        print(f"Connection Time: {result['actual_connection_time']:.1f} hours")
        if result['actual_idle_time'] is not None and not pd.isna(result['actual_idle_time']):
            print(f"Idle Time: {result['actual_idle_time']:.1f} hours")
        
        print(f"\n" + "-"*70)
        print("ü§ñ MODEL 1: CLASSIFICATION (AUC 0.8108)")
        print("-"*70)
        print(f"P(Long ‚â•24h): {result['prob_long']:.1%}")
        print(f"Prediction: {'üî¥ LONG SESSION' if result['pred_long'] else 'üü¢ SHORT SESSION'}")
        correct = (result['pred_long'] == result['actual_is_long'])
        print(f"Result: {'‚úÖ CORRECT' if correct else '‚ùå INCORRECT'}")
        
        if result['pred_energy'] is not None:
            print(f"\n" + "-"*70)
            print("‚ö° MODEL 2: ENERGY REGRESSION (R¬≤ 0.8562)")
            print("-"*70)
            print(f"Predicted Energy: {result['pred_energy']:.2f} kWh")
            error = abs(result['actual_energy'] - result['pred_energy'])
            print(f"Absolute Error: {error:.2f} kWh")
            pct_error = (error / result['actual_energy']) * 100
            print(f"Percentage Error: {pct_error:.1f}%")
            print(f"Benchmark (MAE): 2.62 kWh")
        
        print("\n" + "="*70 + "\n")

predict_button.on_click(on_predict_clicked)

print("\nüéØ SELECT A USER AND SESSION TO SEE PREDICTIONS:\n")
display(VBox([user_dropdown, session_dropdown, predict_button]))
display(output)


üéØ SELECT A USER AND SESSION TO SEE PREDICTIONS:



VBox(children=(Dropdown(description='User:', options=(('User OSL_S_KT146 (190 sessions)', 'OSL_S_KT146'), ('Us‚Ä¶

Output()

## Summary Statistics

Overall model performance on test set.

In [None]:
# Evaluate on full test set
from sklearn.metrics import classification_report, roc_auc_score, r2_score, mean_absolute_error

# Classification
y_pred_proba = model_class.predict(X_test_scaled, verbose=0)
y_pred = (y_pred_proba > 0.5).astype(int).flatten()
auc_score = roc_auc_score(y_test, y_pred_proba)
accuracy = (y_pred == y_test).mean()

# Energy regression
y_pred_energy_scaled = model_energy.predict(X_test_e_scaled, verbose=0).flatten()
y_pred_energy = scaler_y_e.inverse_transform(y_pred_energy_scaled.reshape(-1, 1)).ravel()
y_pred_energy = np.clip(y_pred_energy, 0, None)
r2_energy = r2_score(y_test_e, y_pred_energy)
mae_energy = mean_absolute_error(y_test_e, y_pred_energy)

print("\n" + "="*70)
print("üìà OVERALL MODEL PERFORMANCE")
print("="*70)
print(f"\nüéØ Classification (Long/Short Session):")
print(f"  ROC-AUC: {auc_score:.4f}")
print(f"  Accuracy: {accuracy:.1%}")
print(f"  Test samples: {len(y_test):,}")

print(f"\n‚ö° Energy Regression:")
print(f"  R¬≤ Score: {r2_energy:.4f}")
print(f"  MAE: {mae_energy:.2f} kWh")
print(f"  Test samples: {len(y_test_e):,}")

print(f"\nüìä Dataset:")
print(f"  Total sessions: 34,142")
print(f"  Users: 261")
print(f"  Locations: 12")
print(f"  Timespan: 3.5 years (2018-2021)")
print("\n" + "="*70)