<a href="https://colab.research.google.com/github/sahilfatima/Power-Consumption-in-Tetouan-City/blob/main/Power_Consumption.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'power-consumption:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4858116%2F8200719%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240423%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240423T120615Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D5834f033f3120d3476c3975f8e3a9dc19c9fceffad9f8480f4e7d7eef7587aa62b2a6f2325e68008a99a0695a63c48213136c1b5013818a0ae0d574d75f32a103ef1283218724672846ef46bf09a33c290da31d1f4cf0061041a1cb9aa4d0a01ff8207a806ee73b077d78eba72c0eb7e8e955831eca459a63e2f3ea5b6480983ac0260365113cfcfd2764472751bc61ce55d13f1dedd9e6dc3f2f124d04249f3c11f22c6f409ebe069b89e66b78439277a0c72df6678802aa1d0cf0db816847f1a05a4352a4e61b16de8595352bc589ed691fb737fa796670384583e0d593fc8c1789b5dbf9e430f9bf7a9a63c9f5d9ba019b912410cc18fcea860924f4cd84f'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


# <div style="text-align: center; background-color:skyblue; font-family:Georgia, serif; color: black; padding: 20px;line-height: 1;border-radius:5px; border: 2px solid black;">Power Consumption of Tetouan City</div>


In [None]:
#Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from statsmodels.tsa.seasonal import STL

# Loading Data

In [None]:
df = pd.read_csv('/kaggle/input/power-consumption/Tetuan City power consumption.csv')

In [None]:
df

# Data Preprocessing

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
# Checking unique values
for i in df.columns:
    print(i,':',df[i].nunique())

In [None]:
# Checking null values
df.isnull().sum()

Cleaned data

# Convert datatype into dattime

In [None]:
df['DateTime'] = pd.to_datetime(df['DateTime'])

In [None]:
df

In [None]:
df.info()

In [None]:
# Temperature
plt.figure(figsize=(10,6))
plt.plot(df['DateTime'], df['Temperature'], color='blue')
plt.title('Temperature over Time')
plt.xlabel('DateTime')
plt.ylabel('Temperature')


In [None]:
plt.figure(figsize=(10,6))
plt.plot(df['DateTime'], df['Humidity'], color='green')
plt.title('Humidity over Time')
plt.xlabel('DateTime')
plt.ylabel('Humidity')

In [None]:
# Power Consumption
plt.figure(figsize=(10,6))
plt.scatter(df['DateTime'],df['Zone 2  Power Consumption'], label='Zone 2  Power Consumption', color='red')

# Set labels and title
plt.xlabel('DateTime')
plt.ylabel('Value')
plt.title('Time Series Data')
plt.legend()

plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.plot(df['DateTime'], df['Zone 1 Power Consumption'], color='red')
plt.title('Power Consumption (Zone 1) over Time')
plt.xlabel('DateTime')
plt.ylabel('Power Consumption')


In [None]:
from statsmodels.tsa.seasonal import STL

# Assuming your DataFrame is named df and 'DateTime' is already in datetime format
variable_of_interest = 'Temperature'
df_resampled = df.set_index('DateTime').resample('D').mean()

# Perform STL decomposition on the resampled data
stl_result = STL(df_resampled[variable_of_interest], seasonal=13).fit()

# Plot the original time series data
plt.plot(df_resampled.index, df_resampled[variable_of_interest], label='Original', color='blue')
plt.title('Original Time Series (Daily)')
plt.xlabel('DateTime')
plt.ylabel(variable_of_interest)
plt.legend()

In [None]:
df_resampled

In [None]:
# Perform STL decomposition
from statsmodels.tsa.seasonal import seasonal_decompose

# Perform seasonal decomposition using seasonal_decompose
result = seasonal_decompose(df.set_index('DateTime')[variable_of_interest], model='additive', period=13)

plt.plot(df['DateTime'], result.trend, label='Trend', color='red')
plt.title('Trend Component')
plt.xlabel('DateTime')
plt.ylabel('Trend')
plt.legend()


In [None]:
result

In [None]:
plt.plot(df['DateTime'], result.seasonal, label='Seasonal', color='green')
plt.title('Seasonal Component')
plt.xlabel('DateTime')
plt.ylabel('Seasonal')
plt.legend()

In [None]:
plt.plot(df['DateTime'], result.resid, label='Residual', color='purple')
plt.title('Residual Component')
plt.xlabel('DateTime')
plt.ylabel('Residual')
plt.legend()

# Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
df.columns

In [None]:
df

In [None]:
X = df[['Temperature', 'Humidity', 'Wind Speed', 'general diffuse flows','Zone 1 Power Consumption',
       'Zone 2  Power Consumption', 'diffuse flows']]
y = df['Zone 3  Power Consumption']

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Initialize the StandardScaler
scaler = StandardScaler()
# Fit and transform the training data
X_train_scaled = scaler.fit_transform(X_train)
# Transform the testing data
X_test_scaled = scaler.transform(X_test)

In [None]:
# Initialize the Linear Regression model
model = LinearRegression()
# Train the model on the scaled training data
model.fit(X_train_scaled, y_train)

In [None]:
# Make predictions on the scaled testing data
y_pred = model.predict(X_test_scaled)


In [None]:
mse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Mean Squared Error (Linear Regression):", mse)
r2_R = r2_score(y_test, y_pred)
print(f'R-squared: {r2_R:.4f}')

# Random Forest Regressor

In [None]:
# Initialize the Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

In [None]:
# Make predictions on the testing set
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
mse = np.sqrt(mean_squared_error(y_test, y_pred))
print("Mean Squared Error (Random Forest Regressor):", mse)
r2_R = r2_score(y_test, y_pred)
print(f'R-squared: {r2_R:.4f}')