In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
# Loading dataset
data_ebill = pd.read_csv('11kV bill_units.csv')
data_ebill

In [None]:
data_ebill.loc[27,'Units_kWh']=358912
data_ebill.loc[25,'Units_kWh']=378448

In [None]:
data_ebill = data_ebill[data_ebill.Month.notna()]

In [None]:
data_ebill.shape

In [None]:
data_ebill.columns

In [None]:
data_ebill.dtypes

In [None]:
data_ebill.info()

In [None]:
data_ebill.Year = data_ebill.Year.astype(int).astype(str)
data_ebill.head()

In [None]:
data_ebill['Standardized_Date'] = data_ebill['Month']+'-'+data_ebill['Year']
data_ebill.head()

In [None]:
data_ebill['Standardized_Date'] = pd.to_datetime(data_ebill['Standardized_Date'])+ pd.offsets.MonthEnd(0)
data_ebill.head()

In [None]:
data = pd.read_csv('final_data_in_ML.csv',index_col='Standardized_Date',parse_dates=True)
data

## Energy consumption across zones 

The 24-hour period is divided into three distinct zones, each with different energy rates. Zone 1 covers the time from 6 AM to 6 PM, Zone 2 spans from 6 PM to 10 PM, and Zone 3 from 10 PM to 6 AM. Zone 2 is identified as the peak period, while Zone 3 is considered the off-peak period.  
In terms of energy costs, the rate of consumption in Zone 2 is 1.5 times higher than in Zone 1, while in Zone 3, the rate is 0.75 times that of Zone 1. This tiered pricing structure encourages energy conservation during peak hours and optimizes usage during off-peak times. 
The platform compares the energy consumption across zones and identifies opportunities for energy savings 

In [None]:
def assign_zone(hour):
    if 6 <= hour < 18:
        return '1'
    elif 18 <= hour < 22:
        return '2'
    else:
        return '3'

In [None]:
data['Hour'] = pd.to_datetime(data['STANDARDIZED_TIME']).dt.hour

In [None]:
data['Zone'] = data['Hour'].apply(assign_zone)

In [None]:
data_month_start = data.groupby(['Zone']).resample('M').agg({'RAW WATER FLOW IN ML':'sum',
                                   'CLEAR WATER SUMP LEVEL IN Meter':'mean',
                                   'CLEAR WATER PUMPING FLOW ML':'sum',
                                   'TREATED WATER PRODUCTION IN ML':'sum', 
                                    'remarks category':lambda x: x.unique()}).reset_index()
data_month_start.head()

In [None]:
data_month_start_pivot = data_month_start.pivot(index='Standardized_Date',columns=['Zone'],values=['RAW WATER FLOW IN ML','CLEAR WATER SUMP LEVEL IN Meter','CLEAR WATER PUMPING FLOW ML','TREATED WATER PRODUCTION IN ML','remarks category'])
data_month_start_pivot

In [None]:
data_month_start_pivot.columns = data_month_start_pivot.columns.to_flat_index()
data_month_start_pivot = data_month_start_pivot.rename(columns={('RAW WATER FLOW IN ML','1'):'RAW WATER FLOW IN ML_Z1',
            ('RAW WATER FLOW IN ML', '2'):'RAW WATER FLOW IN ML_Z2',
            ('RAW WATER FLOW IN ML', '3'):'RAW WATER FLOW IN ML_Z3',
            ('CLEAR WATER SUMP LEVEL IN Meter', '1'):'CLEAR WATER SUMP LEVEL IN Meter_Z1',
            ('CLEAR WATER SUMP LEVEL IN Meter', '2'):'CLEAR WATER SUMP LEVEL IN Meter_Z2',
            ('CLEAR WATER SUMP LEVEL IN Meter', '3'):'CLEAR WATER SUMP LEVEL IN Meter_Z3',
            ('CLEAR WATER PUMPING FLOW ML', '1'):'CLEAR WATER PUMPING FLOW ML_Z1',
            ('CLEAR WATER PUMPING FLOW ML', '2'):'CLEAR WATER PUMPING FLOW ML_Z2',
            ('CLEAR WATER PUMPING FLOW ML', '3'):'CLEAR WATER PUMPING FLOW ML_Z3',
            ('TREATED WATER PRODUCTION IN ML', '1'):'TREATED WATER PRODUCTION IN ML_Z1',
            ('TREATED WATER PRODUCTION IN ML', '2'):'TREATED WATER PRODUCTION IN ML_Z2',
            ('TREATED WATER PRODUCTION IN ML', '3'):'TREATED WATER PRODUCTION IN ML_Z3',
            ('remarks category', '1'):'remarks category_Z1',
            ('remarks category', '2'):'remarks category_Z2',
            ('remarks category', '3'):'remarks category_Z3'})

## Merging dataframes

In [None]:
data_month_ebill = data_month_start_pivot.merge(data_ebill,on='Standardized_Date')
data_month_ebill.shape

In [None]:
data_month_ebill = data_month_ebill[['Standardized_Date','Month','Year','RAW WATER FLOW IN ML_Z1',
       'RAW WATER FLOW IN ML_Z2', 'RAW WATER FLOW IN ML_Z3',
       'CLEAR WATER SUMP LEVEL IN Meter_Z1',
       'CLEAR WATER SUMP LEVEL IN Meter_Z2',
       'CLEAR WATER SUMP LEVEL IN Meter_Z3', 'CLEAR WATER PUMPING FLOW ML_Z1',
       'CLEAR WATER PUMPING FLOW ML_Z2', 'CLEAR WATER PUMPING FLOW ML_Z3',
       'TREATED WATER PRODUCTION IN ML_Z1',
       'TREATED WATER PRODUCTION IN ML_Z2',
       'TREATED WATER PRODUCTION IN ML_Z3', 'remarks category_Z1',
       'remarks category_Z2', 'remarks category_Z3','Avg_Consumption (kWh)', 'Units_kWh',
        'Z1 (kWh)', 'Z2 (kWh)', 'Z3 (kWh)','Energy Charge']]
data_month_ebill.shape

In [None]:
data_month_ebill.dtypes

In [None]:
data_month_ebill['Energy Charge'] = data_month_ebill['Energy Charge'].astype(float)

In [None]:
# data_month_ebill['charge_per_unit'] = data_month_ebill['Energy Charge']/ data_month_ebill['Units_kWh']

In [None]:
## Specific energy consumption: amount of units taken to produce 1ML of treated water
# data_month_ebill['specific_energy_consumption'] = (data_month_ebill['Units_kWh']/data_month_ebill['TREATED WATER PRODUCTION IN ML']).round(2)
## unit_cost: the cost for producing 1ML of treated water
# data_month_ebill['unit_cost'] = (data_month_ebill['Energy Charge']/data_month_ebill['TREATED WATER PRODUCTION IN ML']).round(2)
# data_month_ebill

In [None]:
data_month_ebill['Year'] = data_month_ebill['Standardized_Date'].dt.year
data_month_ebill['Month'] = data_month_ebill['Standardized_Date'].dt.month
data_month_ebill['Month-Year'] = data_month_ebill['Standardized_Date'].dt.strftime('%m-%Y')

In [None]:
data_month_ebill.head()

In [None]:
data_month_ebill.dtypes

In [None]:
# data_month_ebill1 = data_month_ebill.fillna(data_month_ebill.rolling(3, min_periods=1, center=True).mean())

In [None]:
# data_month_ebill == data_month_ebill1

In [None]:
plt.figure(figsize=(15,12))

plt.subplot(2,2,1)
plt.plot(data_month_ebill[data_month_ebill['Year']==2022]['Month'], data_month_ebill[data_month_ebill['Year']==2022]['Z1 (kWh)'], label='Consumption', marker='o',color='g')
plt.xticks(rotation=45)
plt.title("2022")
plt.legend()

plt.subplot(2,2,2)
plt.plot(data_month_ebill[data_month_ebill['Year']==2022]['Month'], data_month_ebill[data_month_ebill['Year']==2022]['CLEAR WATER PUMPING FLOW ML_Z1'], label='Clear water pumping flow', marker='o',color='r')
plt.xticks(rotation=45)
plt.title("2022")
plt.legend()

plt.subplot(2,2,3)
plt.plot(data_month_ebill[data_month_ebill['Year']==2023]['Month'], data_month_ebill[data_month_ebill['Year']==2023]['Z1 (kWh)'], label='Consumption', marker='o',color='b')
plt.xticks(rotation=45)
plt.title("2023")
plt.legend()

plt.subplot(2,2,4)
plt.plot(data_month_ebill[data_month_ebill['Year']==2023]['Month'], data_month_ebill[data_month_ebill['Year']==2023]['CLEAR WATER PUMPING FLOW ML_Z1'], label='Clear water pumping flow', marker='o',color='y')
plt.xticks(rotation=45)
plt.title("2023")
plt.legend()

plt.show()

In [None]:
# x = data_month_ebill[data_month_ebill['Year']==2022]['Month']
# y1 = (data_month_ebill[data_month_ebill['Year']==2022]['Units_kWh'])/100
# y2 = data_month_ebill[data_month_ebill['Year']==2022]['CLEAR WATER PUMPING FLOW ML']

# fig = px.bar(data_month_ebill, x=x, y=[y1, y2], barmode='group') #title='Raw Water Pumped during various zones in ML')
# fig.show()

In [None]:
# from sklearn import preprocessing
# scaler = preprocessing.MinMaxScaler()

In [None]:
# data_month_ebill[['Clear Water Pumping scale', 'Consumption scale']] = scaler.fit_transform(data_month_ebill[['CLEAR WATER PUMPING FLOW ML', 'Units_kWh']])

In [None]:
# data_month_ebill.to_csv('data_month_ebill_cleaned.csv',index=False)

In [None]:
# data_ebill_2022 = data_month_ebill[data_month_ebill['Year']==2022]
# # y1 = data_month_ebill[data_month_ebill['Year']==2022]['Consumption scale']
# # y2 = data_month_ebill[data_month_ebill['Year']==2022]['Clear Water Pumping scale']
# fig = px.bar(data_ebill_2022, x='Month', y=['Consumption scale', 'Clear Water Pumping scale'], barmode='group') #title='Raw Water Pumped during various zones in ML')

# fig.show()

In [None]:
# data_ebill_2022 = data_month_ebill[data_month_ebill['Year']==2023]
# # y1 = data_month_ebill[data_month_ebill['Year']==2022]['Consumption scale']
# # y2 = data_month_ebill[data_month_ebill['Year']==2022]['Clear Water Pumping scale']
# fig = px.bar(data_ebill_2022, x='Month', y=['Consumption scale', 'Clear Water Pumping scale'], barmode='group') #title='Raw Water Pumped during various zones in ML')

# fig.show()

In [None]:
# data_ebill_2022_2023 = data_month_ebill[(data_month_ebill['Year']==2022)|(data_month_ebill['Year']==2023)]
# fig = px.bar(data_ebill_2022_2023, x='Month-Year', y=['Consumption scale', 'Clear Water Pumping scale'], barmode='group') #title='Raw Water Pumped during various zones in ML')

# fig.show()

In [None]:
# data_ebill_2022_2023 = data_month_ebill[(data_month_ebill['Year']==2022)|(data_month_ebill['Year']==2023)]
# fig = px.bar(data_ebill_2022_2023, x='Month-Year', y=['Consumption scale', 'Clear Water Pumping scale'], barmode='group') #title='Raw Water Pumped during various zones in ML')

# fig.show()

In [None]:
# sorted_data = data_month_ebill.sort_values(by='CLEAR WATER PUMPING FLOW ML')

# fig = px.line(sorted_data, x="CLEAR WATER PUMPING FLOW ML", y="Units_kWh", hover_data=['Standardized_Date','RAW WATER FLOW IN ML'])
# fig.show()

In [None]:
data_month_ebill.dtypes

In [None]:
data[(data.index>='2022-03-01')&(data.index<'2022-04-01')&(data['remarks category']!='No remarks')].shape

In [None]:
data[(data.index>='2022-04-01')&(data.index<'2022-05-01')&(data['remarks category']!='No remarks')]#.shape

In [None]:
data[(data.index>='2023-10-01')&(data.index<'2023-11-01')&(data['remarks category']!='No remarks')].shape

In [None]:
data[(data.index>='2023-12-01')&(data.index<'2024-01-01')&(data['remarks category']!='No remarks')].shape

In [None]:
plt.figure(figsize=(10,8))
plt.plot(data_month_ebill[data_month_ebill['Standardized_Date']<'2023-01-01']['Standardized_Date'], (data_month_ebill[data_month_ebill['Standardized_Date']<'2023-01-01']['Units_kWh'])/100, label='consumption', marker='o',color='g')
plt.plot(data_month_ebill[data_month_ebill['Standardized_Date']<'2023-01-01']['Standardized_Date'], data_month_ebill[data_month_ebill['Standardized_Date']<'2023-01-01']['CLEAR WATER PUMPING FLOW ML_Z1'], label='clear water', marker='o',color='r')
plt.legend()
plt.xticks(rotation=45)

In [None]:
# plt.figure(figsize=(10,8))
# plt.plot(data_month_ebill[(data_month_ebill['Standardized_Date']>='2023-01-01')&(data_month_ebill['Standardized_Date']<'2024-01-01')]['Standardized_Date'], (data_month_ebill[(data_month_ebill['Standardized_Date']>='2023-01-01')&(data_month_ebill['Standardized_Date']<'2024-01-01')]['Units_kWh'])/100, label='consumption', marker='o',color='b')
# plt.plot(data_month_ebill[(data_month_ebill['Standardized_Date']>='2023-01-01')&(data_month_ebill['Standardized_Date']<'2024-01-01')]['Standardized_Date'], data_month_ebill[(data_month_ebill['Standardized_Date']>='2023-01-01')&(data_month_ebill['Standardized_Date']<'2024-01-01')]['CLEAR WATER PUMPING FLOW ML'], label='clear water', marker='o',color='y')
# plt.legend()
# plt.xticks(rotation=45)

In [None]:
# plt.plot(data_month_ebill[data_month_ebill['Standardized_Date']<'2023-01-01']['Standardized_Date'], data_month_ebill[data_month_ebill['Standardized_Date']<'2023-01-01']['CLEAR WATER PUMPING FLOW ML'], label='CLEAR WATER PUMPING FLOW ML', marker='o',color='g')
# plt.ylabel('clear water pumping flow in ML')
# plt.xticks(rotation=45)
# plt.show()

## Calculate columns

In [None]:
data_month_ebill['charge_per_unit'] = data_month_ebill['Energy Charge']/ data_month_ebill['Units_kWh']

In [None]:
data_month_ebill['specific_energy_consumption'] = (data_month_ebill['Units_kWh']/(data_month_ebill['TREATED WATER PRODUCTION IN ML_Z1'])+(data_month_ebill['TREATED WATER PRODUCTION IN ML_Z2'])+(data_month_ebill['TREATED WATER PRODUCTION IN ML_Z3']))#.round(2)
#unit_cost: the cost for producing 1ML of treated water
data_month_ebill['unit_cost'] = (data_month_ebill['Energy Charge']/(data_month_ebill['TREATED WATER PRODUCTION IN ML_Z1'])+(data_month_ebill['TREATED WATER PRODUCTION IN ML_Z2'])+(data_month_ebill['TREATED WATER PRODUCTION IN ML_Z3']))#.round(2)
data_month_ebill

## Completed

In [None]:
Specific energy consumption = kwh/treated water
unit cost = cost/treated water