In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import datetime

In [2]:
def generate_charging_data(n_samples=1000):
    """
    Generate synthetic EV charging data with realistic patterns
    """
    np.random.seed(42)
    
    # Generate timestamps across a month
    start_date = datetime.datetime(2024, 1, 1)
    timestamps = [start_date + datetime.timedelta(minutes=np.random.randint(0, 43200)) 
                 for _ in range(n_samples)]
    
    # Generate features that might affect charging duration
    
    parking_types = ['city parking lot', 'office space', 'Airport', 'mini charging station']
    
    data = {
        'timestamp': timestamps,
        'day_of_week': [t.weekday() for t in timestamps],
        'hour_of_day': [t.hour for t in timestamps],
        'battery_level_start': np.random.uniform(10, 80, n_samples),
        'battery_capacity': np.random.choice([40, 60, 75, 85, 100], n_samples),  # kWh
        'charging_power': np.random.choice([7.4, 11, 22, 50, 150], n_samples),   # kW
        'is_workday': [1 if t.weekday() < 5 else 0 for t in timestamps],
        'temperature': np.random.normal(20, 5, n_samples),  # Celsius
        'parking_type': np.random.choice(parking_types, n_samples) 
    }
    
    # Calculate realistic connection duration based on features
    duration_hours = []
    for i in range(n_samples):
        # Base duration depends on battery capacity and starting level
        needed_charge = (0.8 * data['battery_capacity'][i]) - (data['battery_level_start'][i] * data['battery_capacity'][i] / 100)
        charging_time = needed_charge / data['charging_power'][i]
        
        # Add behavioral factors
        if data['is_workday'][i] and 8 <= data['hour_of_day'][i] <= 17:
            # Work hours: people tend to stay longer
            charging_time *= np.random.uniform(1.5, 2.5)
        elif 22 <= data['hour_of_day'][i] or data['hour_of_day'][i] <= 5:
            # Overnight charging
            charging_time *= np.random.uniform(2.0, 3.0)
        
        # Add some random variation
        charging_time *= np.random.normal(1, 0.1)
        duration_hours.append(max(0.5, min(24, charging_time)))  # Clip between 0.5 and 24 hours
    
    data['connection_duration'] = duration_hours
    
    return pd.DataFrame(data)

In [3]:
data = generate_charging_data(1000)

In [4]:
data.head()

Unnamed: 0,timestamp,day_of_week,hour_of_day,battery_level_start,battery_capacity,charging_power,is_workday,temperature,parking_type,connection_duration
0,2024-01-11 23:15:00,3,23,33.443393,60,7.4,1,11.697587,city parking lot,10.211174
1,2024-01-01 14:20:00,0,14,19.749045,100,50.0,1,29.098197,Airport,3.285224
2,2024-01-27 11:58:00,5,11,65.581763,100,22.0,0,26.03276,city parking lot,0.634272
3,2024-01-08 20:04:00,0,20,53.405093,100,11.0,1,33.750536,office space,2.294966
4,2024-01-05 08:25:00,4,8,47.342276,60,7.4,1,26.691819,office space,6.265773


In [5]:
# one hot encode the parking type
data = pd.get_dummies(data, columns=['parking_type'])

In [6]:
data.columns

Index(['timestamp', 'day_of_week', 'hour_of_day', 'battery_level_start',
       'battery_capacity', 'charging_power', 'is_workday', 'temperature',
       'connection_duration', 'parking_type_Airport',
       'parking_type_city parking lot', 'parking_type_mini charging station',
       'parking_type_office space'],
      dtype='object')

In [7]:

def train_prediction_model(df):
    """
    Train a Random Forest model to predict connection duration
    """
    # Prepare features
    features = ['day_of_week', 'hour_of_day', 'battery_level_start', 
                'battery_capacity', 'charging_power', 'is_workday', 'temperature', 'parking_type_Airport',
       'parking_type_city parking lot', 'parking_type_mini charging station',
       'parking_type_office space']
    
    X = df[features]
    y = df['connection_duration']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Feature importance
    importance = pd.DataFrame({
        'feature': features,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    return model, mse, r2, importance, X_test, y_test, y_pred

In [8]:
df = data.copy()

In [9]:
model, mse, r2, importance, X_test, y_test, y_pred = train_prediction_model(df)

In [10]:
print(f'Mean squared error: {mse:.2f}')

Mean squared error: 2.53


In [12]:
# make a prediction for a randomly generated sample
sample = pd.DataFrame({
    'day_of_week': [3],
    'hour_of_day': [10],
    'battery_level_start': [50],
    'battery_capacity': [60],
    'charging_power': [22],
    'is_workday': [1],
    'temperature': [25],
    'parking_type_Airport': [0],
    'parking_type_city parking lot': [1],
    'parking_type_mini charging station': [0],
    'parking_type_office space': [0]
})
pred = model.predict(sample)
print(f'Predicted connection duration: {pred[0]:.2f} hours')

Predicted connection duration: 1.23 hours


In [13]:
# save the model
import pickle

with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [14]:
# load the model and make a prediction
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)
    
pred = model.predict(sample)


In [15]:
pred

array([1.2288934])