# Data Preprocessing

This notebook will be used for data preprocessing tasks.


In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("data/ev_battery_charging_data.csv")

# Show first few rows
df.head()


In [None]:
# Check info
df.info()


In [None]:
# Check if any missing values
df.isnull().sum()


In [None]:
# Basic statistics
df.describe()


## Data Cleaning


In [None]:
# Drop missing rows
df = df.dropna()

# Drop duplicate rows
df = df.drop_duplicates()

df.shape  # to see remaining rows and columns


## Define Features and Target


In [None]:
# Separate features (X) and target (y)
# The target is 'Optimal Charging Duration Class'
X = df.drop('Optimal Charging Duration Class', axis=1)  # input features
y = df['Optimal Charging Duration Class']               # target variable

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"\nTarget distribution:")
print(y.value_counts().sort_index())


## Train-Test Split


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"\nTraining target distribution:")
print(y_train.value_counts().sort_index())
print(f"\nTest target distribution:")
print(y_test.value_counts().sort_index())
