# 🔌 Electricity Bill Prediction Starter Notebook

This notebook is a starter template for building a Machine Learning model to predict monthly electricity bills based on appliance usage, city, company, and other metadata.

Feel free to modify, extend, and improve it.

In [None]:
# 📦 Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
# 📥 Step 2: Load the dataset
df = pd.read_csv("electricity_bill_dataset.csv")
df.head()

In [None]:
# 🔎 Step 3: Explore the dataset
df.info()
df.describe()

In [None]:
# ⚙️ Step 4: Encode categorical variables (City, Company)
df['City'] = df['City'].astype('category').cat.codes
df['Company'] = df['Company'].astype('category').cat.codes

# Check for missing values
print(df.isnull().sum())

In [None]:
# 🔀 Step 5: Train-Test Split
X = df.drop('ElectricityBill', axis=1)
y = df['ElectricityBill']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 🤖 Step 6: Train a Linear Regression model (optional baseline)
model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [None]:
# 📊 Step 7: Evaluate the model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("RMSE:", rmse)
print("R2 Score:", r2)

# Scatter plot
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Bill")
plt.ylabel("Predicted Bill")
plt.title("Actual vs Predicted Electricity Bill")
plt.grid(True)
plt.show()