In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer


In [19]:

# 1. Load Data
df = pd.read_csv(r"E:\Carbon_footprint\training_models\datasets\Carbon_Emission.csv")

# 2. Setup Data
X = df.drop(columns=['CarbonEmission'])
y = df['CarbonEmission']

# 3. Define Preprocessing (OneHot for text, Imputer for missing)
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(exclude=['object']).columns

preprocessor = ColumnTransformer([
    ('num', SimpleImputer(strategy='mean'), numerical_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

# 4. Train Model
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=50, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

print(f"Carbon Model R2 Score: {model.score(X_test, y_test):.4f}")

Carbon Model R2 Score: 0.9204


In [23]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# 1. Load Data (ignoring quotes to force comma split)
df = pd.read_csv(r"E:\Carbon_footprint\training_models\datasets\household_power_consumption.csv", 
                 sep=',', quoting=3, low_memory=False, nrows=50000)

# 2. Clean Quotes (Fixing the "Date,Time..." issue)
df.columns = df.columns.str.replace('"', '').str.strip()
# Remove quotes from data values
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].str.replace('"', '')

# 3. Feature Engineering
df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)
df['hour'] = df['datetime'].dt.hour
df['month'] = df['datetime'].dt.month
df = df.drop(columns=['Date', 'Time', 'datetime'])

# 4. Handle Missing & Train
df = df.dropna()
X = df.drop(columns=['Global_active_power'])
y = df['Global_active_power']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=50, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)

print(f"Energy Model R2 Score: {model.score(X_test, y_test):.4f}")

Energy Model R2 Score: 0.9992


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# ==========================================
# 1. LOAD & CLEAN DATA (Your working logic)
# ==========================================
# Loading 50k rows to prevent MemoryError
df = pd.read_csv(r"E:\Carbon_footprint\training_models\datasets\household_power_consumption.csv", 
                 sep=',', quoting=3, low_memory=False, nrows=100000)

# Clean Quotes from Headers
df.columns = df.columns.str.replace('"', '').str.strip()

# Clean Quotes from Data
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].str.replace('"', '')

# Feature Engineering
df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)
df['hour'] = df['datetime'].dt.hour
df['month'] = df['datetime'].dt.month
df = df.drop(columns=['Date', 'Time', 'datetime'])

# Handle Missing & Numeric Conversion
cols_numeric = ['Global_active_power', 'Global_reactive_power', 'Voltage', 
                'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']

for col in cols_numeric:
    df[col] = pd.to_numeric(df[col], errors='coerce')

df = df.dropna()

# ==========================================
# 2. PREPARE DATA FOR LSTM
# ==========================================
X = df.drop(columns=['Global_active_power'])
y = df['Global_active_power']

# LSTMs require data to be scaled (0 to 1)
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

# Reshape input to be 3D [samples, time steps, features]
# Here we treat each row as 1 time step with multiple features
X_lstm = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X_lstm, y_scaled, test_size=0.2, random_state=42)

# ==========================================
# 3. BUILD AND TRAIN LSTM MODEL
# ==========================================
print("Building LSTM Model...")
model = Sequential()
# 50 neurons in the LSTM layer
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1)) # Output layer (predicting 1 value)

model.compile(optimizer='adam', loss='mse')

print("Training LSTM (this may take a moment)...")
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1, validation_split=0.1)

# ==========================================
# 4. EVALUATE
# ==========================================
y_pred = model.predict(X_test)

# Inverse transform to get actual Power values back (instead of 0-1)
y_test_actual = scaler_y.inverse_transform(y_test)
y_pred_actual = scaler_y.inverse_transform(y_pred)

print(f"Energy Model R2 Score: {r2_score(y_test_actual, y_pred_actual):.4f}")

Building LSTM Model...
Training LSTM (this may take a moment)...
Epoch 1/10


  super().__init__(**kwargs)


[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 7.8984e-04 - val_loss: 3.0091e-05
Epoch 2/10
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 3.5567e-05 - val_loss: 3.4796e-05
Epoch 3/10
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.5669e-05 - val_loss: 2.9965e-05
Epoch 4/10
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.5388e-05 - val_loss: 3.0776e-05
Epoch 5/10
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.4991e-05 - val_loss: 2.9136e-05
Epoch 6/10
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.4926e-05 - val_loss: 3.1109e-05
Epoch 7/10
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.4773e-05 - val_loss: 3.0710e-05
Epoch 8/10
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 3.446