In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#Loading dataset

In [None]:
df=pd.read_csv("Cleaned JICA Pattuvam Data.csv")

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df['MONTH-YEAR'] = pd.to_datetime(df['MONTH-YEAR'], format='%B-%Y')
df['MONTH-YEAR'].unique()

In [None]:
min_date = df['DATE'].min()
max_date = df['DATE'].max()

print("Minimum Date:", min_date)
print("Maximum Date:", max_date)


In [None]:
df.YEAR.unique()

In [None]:
df['RAW WATER FLOW IN m3/h'].value_counts()


In [None]:
values = sorted(df['RAW WATER FLOW IN m3/h'].unique(),reverse=True)
values

In [None]:
df[df['RAW WATER FLOW IN m3/h']==62411.58]

In [None]:
df[df['DATE']=='2023-11-11']

In [None]:
df['CLEAR WATER SUMP LEVEL IN M'].value_counts()

In [None]:
df['CLEAR WATER PUMPING FLOW m3/h'] = pd.to_numeric(df['CLEAR WATER PUMPING FLOW m3/h'], errors='coerce')
sorted(df['CLEAR WATER PUMPING FLOW m3/h'].unique())

In [None]:
df['TREATED WATER PRODUCTION IN m3/h'].value_counts()

In [None]:
sorted(df['TREATED WATER PRODUCTION IN m3/h'].unique(),reverse=True)

In [None]:
df[df['TREATED WATER PRODUCTION IN m3/h']==190912.1]

In [None]:
df[df["DATE"]=='2023-03-20']

In [None]:
df['REMARKS'].value_counts()

# RAW WATER FLOW IN m3/h

# VOLUME OF RAW WATER FLOW IN EACH YEAR

In [None]:
df['RAW WATER FLOW IN m3/h'] = pd.to_numeric(df['RAW WATER FLOW IN m3/h'], errors='coerce')
yearly_raw_water_flow = df.groupby('YEAR')['RAW WATER FLOW IN m3/h'].agg('sum').reset_index(name='Volume_of_raw_water_flow').sort_values(by="Volume_of_raw_water_flow",ascending=False)
yearly_raw_water_flow

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(yearly_raw_water_flow['YEAR'], yearly_raw_water_flow['Volume_of_raw_water_flow'], color='skyblue')
plt.xlabel('Year')
plt.ylabel('Volume of Raw Water Flow')
plt.title('Volume of Raw Water Flow for Each Year')
plt.xticks(yearly_raw_water_flow['YEAR'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()


#  TOTAL  VOLUME OF RAW WATER FLOW IN EACH MONTH 

In [None]:
# df['RAW WATER FLOW IN m3/h'] = pd.to_numeric(df['RAW WATER FLOW IN m3/h'], errors='coerce')
month_yearly_raw_water_flow = df.groupby('MONTH-YEAR')['RAW WATER FLOW IN m3/h'].agg('sum').reset_index(name='Volume_of_raw_water_flow')
month_yearly_raw_water_flow

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(14, 6))
plt.plot(month_yearly_raw_water_flow['MONTH-YEAR'],month_yearly_raw_water_flow['Volume_of_raw_water_flow'].astype(float), marker='o', color='blue')
plt.xlabel('Month-Year')
plt.ylabel('Volume of Raw Water Flow')
plt.title('Volume of Raw Water Flow for Each Month-Year')
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.7)
for i in range(len(month_yearly_raw_water_flow)):
    plt.text(month_yearly_raw_water_flow['MONTH-YEAR'][i], month_yearly_raw_water_flow['Volume_of_raw_water_flow'][i], str(month_yearly_raw_water_flow['Volume_of_raw_water_flow'][i]), ha='center', va='bottom')

plt.tight_layout()
plt.show()



In [None]:
sorted(df[df['MONTH-YEAR'] == '2024-03-01']['RAW WATER FLOW IN m3/h'].unique())


In [None]:
sorted(df[df['MONTH-YEAR'] == '2022-07-01']['RAW WATER FLOW IN m3/h'].unique())

# TOTAL VOLUME OF RAW WATER FLOW ON DAILY BASIS

In [None]:
df['RAW WATER FLOW IN m3/h'] = pd.to_numeric(df['RAW WATER FLOW IN m3/h'], errors='coerce')
daily_raw_water_flow = df.groupby('DATE')['RAW WATER FLOW IN m3/h'].agg('sum').reset_index(name='Volume_of_raw_water_flow').sort_values(by="Volume_of_raw_water_flow",ascending=False)
daily_raw_water_flow

# THE MAXIMUM VOLUME OF RAW WATER FLOWED IN EACH HOUR

In [None]:
def convert_time(time_str):
    parts = time_str.split()
    if len(parts) == 1:
        if len(parts[0]) == 1:
            return f'0{parts[0]}:00:00'
        elif len(parts[0]) == 2:
            return f'{parts[0]}:00:00'
    elif len(parts) == 2:
        return f'{parts[0]}:{parts[1]}:00'
    return time_str
df['TIME']= df['TIME'].apply(convert_time)

In [None]:
df['RAW WATER FLOW IN m3/h'] = pd.to_numeric(df['RAW WATER FLOW IN m3/h'], errors='coerce')

hourly_raw_water_flow = df.groupby('TIME')['RAW WATER FLOW IN m3/h'].agg('max').reset_index(name='Volume_of_raw_water_flow').sort_values(by="TIME",ascending=False)
hourly_raw_water_flow

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(hourly_raw_water_flow['TIME'], hourly_raw_water_flow['Volume_of_raw_water_flow'])
plt.title('Hourly Volume of Raw Water Flow')
plt.xlabel('Time')
plt.ylabel('Volume (m3)')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()


In [None]:
df['RAW WATER FLOW IN m3/h'] = pd.to_numeric(df['RAW WATER FLOW IN m3/h'], errors='coerce')
hourly_raw_water_flow = df.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].agg('max').reset_index(name='Volume_of_raw_water_flow').sort_values(by="Volume_of_raw_water_flow",ascending=False)
hourly_raw_water_flow

# COMPARING RAW WATER FLOW IN YEAR 2022 VS 2023

In [None]:
df2022=df[df['YEAR']==2022]
df2022.head(50)
df2023=df[df['YEAR']==2023]
df2022_filtered = df2022[df2022['REMARKS'] != 'No Remarks']
# df2022_filtered
# df2022

In [None]:
monthly_raw_water_flow_2022 = df2022.groupby('MONTH-YEAR')['RAW WATER FLOW IN m3/h'].agg('sum').reset_index(name='Volume_of_raw_water_flow')
monthly_raw_water_flow_2022

In [None]:
monthly_raw_water_flow_2023 = df2023.groupby('MONTH-YEAR')['RAW WATER FLOW IN m3/h'].agg('sum').reset_index(name='Volume_of_raw_water_flow')
monthly_raw_water_flow_2023

In [None]:
import matplotlib.pyplot as plt

# Plot for 2022
plt.figure(figsize=(14, 6))
plt.plot(monthly_raw_water_flow_2022['MONTH-YEAR'], monthly_raw_water_flow_2022['Volume_of_raw_water_flow'].astype(float), marker='o', color='red', label='2022')

# Plot for 2023
plt.plot(monthly_raw_water_flow_2023['MONTH-YEAR'], monthly_raw_water_flow_2023['Volume_of_raw_water_flow'].astype(float), marker='o', color='green', label='2023')

plt.xlabel('Month-Year')
plt.ylabel('Volume of Raw Water Flow')
plt.title('Volume of Raw Water Flow for Each Month-Year')
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
df_raw_water_min = df[df['MONTH-YEAR'] == '2022-07-01'] 
df_raw_water_min = df_raw_water_min.groupby(['DATE', 'TIME'])[['REMARKS', 'RAW WATER FLOW IN m3/h']].max().reset_index().sort_values(by='DATE')
df_raw_water_min
plt.figure(figsize=(12, 6))

# Plot raw water flow
plt.plot(df_raw_water_min['DATE'], df_raw_water_min['RAW WATER FLOW IN m3/h'], marker='o', color='blue', label='Raw Water Flow')

# Add REMARKS as text
for i, txt in enumerate(df_raw_water_min['REMARKS']):
    plt.annotate(txt, (df_raw_water_min['TIME'][i], df_raw_water_min['RAW WATER FLOW IN m3/h'][i]))

# Customize plot
plt.xlabel('Time')
plt.ylabel('Raw Water Flow (m3/h)')
plt.title('Raw Water Flow for each date with Time')
plt.legend()
plt.grid(True)
plt.xticks(rotation=100)
plt.tight_layout()
plt.show()


In [None]:
df_remarks = df_raw_water_min[df_raw_water_min['REMARKS'] != 'No Remarks']
df_hours = df_remarks.groupby(['DATE', 'REMARKS']).size().reset_index(name='hours')
df_hours


In [None]:
df_raw_water_max = df[df['MONTH-YEAR'] == '2023-05-01'] 
df_raw_water_max = df_raw_water_max.groupby(['DATE', 'TIME'])[['REMARKS', 'RAW WATER FLOW IN m3/h']].max().reset_index().sort_values(by='DATE')
df_raw_water_max
plt.figure(figsize=(12, 6))

# Plot raw water flow
plt.plot(df_raw_water_max['DATE'], df_raw_water_max['RAW WATER FLOW IN m3/h'], marker='o', color='blue', label='Raw Water Flow')

# Add REMARKS as text
for i, txt in enumerate(df_raw_water_max['REMARKS']):
    plt.annotate(txt, (df_raw_water_max['TIME'][i], df_raw_water_max['RAW WATER FLOW IN m3/h'][i]))

# Customize plot
plt.xlabel('Time')
plt.ylabel('Raw Water Flow (m3/h)')
plt.title('Raw Water Flow for each date with Time')
plt.legend()
plt.grid(True)
plt.xticks(rotation=100)
plt.tight_layout()
plt.show()


In [None]:
df_remarks = df_raw_water_max[df_raw_water_max['REMARKS'] != 'No Remarks']
df_hours = df_remarks.groupby(['DATE', 'REMARKS']).size().reset_index(name='hours')
df_hours


In [None]:
df_max_day1=df_raw_water_max[df_raw_water_max['DATE']=='2023-05-08']
df_max_day1 = df_max_day1.groupby('TIME')['RAW WATER FLOW IN m3/h'].max().reset_index()

df_max_day2=df_raw_water_max[df_raw_water_max['DATE']=='2023-05-09']
df_max_day2 = df_max_day2.groupby('TIME')['RAW WATER FLOW IN m3/h'].max().reset_index()

df_max_day3=df_raw_water_max[df_raw_water_max['DATE']=='2023-05-10']
df_max_day3 = df_max_day3.groupby('TIME')['RAW WATER FLOW IN m3/h'].max().reset_index()

df_max_day4=df_raw_water_max[df_raw_water_max['DATE']=='2023-05-11']
df_max_day4 = df_max_day4.groupby('TIME')['RAW WATER FLOW IN m3/h'].max().reset_index()

df_max_day5=df_raw_water_max[df_raw_water_max['DATE']=='2023-05-12']
df_max_day5 = df_max_day5.groupby('TIME')['RAW WATER FLOW IN m3/h'].max().reset_index()

df_max_day6=df_raw_water_max[df_raw_water_max['DATE']=='2023-05-13']
df_max_day6 = df_max_day6.groupby('TIME')['RAW WATER FLOW IN m3/h'].max().reset_index()


plt.figure(figsize=(12, 6))

# Plot raw water flow
plt.plot(df_max_day1['TIME'], df_max_day1['RAW WATER FLOW IN m3/h'], marker='o',color='c',label='May 08 2023')

plt.plot(df_max_day2['TIME'], df_max_day2['RAW WATER FLOW IN m3/h'], marker='o',color='g',label='May 09 2023')

plt.plot(df_max_day3['TIME'], df_max_day3['RAW WATER FLOW IN m3/h'], marker='o',color='y',label='May 10 2023')

plt.plot(df_max_day4['TIME'], df_max_day4['RAW WATER FLOW IN m3/h'], marker='o',color='m',label='May 11 2023')

plt.plot(df_max_day5['TIME'], df_max_day5['RAW WATER FLOW IN m3/h'], marker='o',color='k',label='May 12 2023')

plt.plot(df_max_day6['TIME'], df_max_day6['RAW WATER FLOW IN m3/h'], marker='o',color='b',label='May 13 2023')

# Add labels and title
plt.xlabel('Time')
plt.ylabel('Raw Water Flow (m3/h)')
plt.title('Raw Water Flow on May, 2023')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


# 2022 

In [None]:
# Considering each month of 2022

df2022_1 = df[df['MONTH-YEAR'] == '2022-01-01']
df2022_1_NR=df2022_1[df2022_1['REMARKS']!= 'No Remarks']
df2022_1_NR=df2022_1_NR.groupby(['DATE','TIME'])['REMARKS'].max().reset_index()
df2022_1_NR.DATE.unique()

In [None]:
df2022_1=df2022_1.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index().sort_values(by='RAW WATER FLOW IN m3/h',ascending=False)
df2022_1

In [None]:
df2022_1=df2022_1.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index().sort_values(by='TIME',ascending=True)
df2022_1_max=df2022_1[df2022_1['DATE']=='2022-01-03']
plt.figure(figsize=(12, 6))

# Plotting the maximum raw water flow for each hour
plt.plot(df2022_1_max['TIME'], df2022_1_max['RAW WATER FLOW IN m3/h'], marker='o')
plt.xlabel('Time')
plt.ylabel('Maximum Raw Water Flow (m3/h)')
plt.title('Maximum Raw Water Flow in January 2022(JAN 3)')
plt.grid(True)
plt.xticks(rotation=45)
plt.show()

# COMPARING MAXIMUM RAW WATER FLOW IN 2 MONTHS

In [None]:
df2022_2 = df[df['MONTH-YEAR'] == '2022-02-01']
df2022_2_NR=df2022_2[df2022_2['REMARKS']!= 'No Remarks']
df2022_2_NR=df2022_2_NR.groupby(['DATE','TIME'])['REMARKS'].max().reset_index()
df2022_2_NR.DATE.unique()

In [None]:

df2022_2=df2022_2.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index().sort_values(by='TIME',ascending=True)
# df2022_2
df2022_2_max=df2022_2[df2022_2['DATE']=='2022-02-28']
df2022_2_max
# df2022_1_NR=df2022_1[df2022_1['REMARKS']!= 'No Remarks']
# df2022_1_NR.DATE.unique()

In [None]:
plt.figure(figsize=(12, 6))

# Plotting the maximum raw water flow for each hour in January 2022
# plt.plot(df2022_1_max['TIME'], df2022_1_max['RAW WATER FLOW IN m3/h'], label='January 2022', marker='o')
plt.plot(df2022_1_max['TIME'], df2022_1_max['RAW WATER FLOW IN m3/h'], label='January 2022',marker='o')
# Plotting the maximum raw water flow for each hour in February 2022label='January 2022',
plt.plot(df2022_2_max['TIME'], df2022_2_max['RAW WATER FLOW IN m3/h'], label='February 2022', marker='o')

plt.xlabel('Time')
plt.ylabel('Maximum Raw Water Flow (m3/h)')
plt.title('Maximum Raw Water Flow in January and February 2022')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.show()

# COMPARING RAW WATER FLOW IN A WEEK

In [None]:
df2022_day1 = df2022[df2022['DATE'] == '2022-01-02']

df2022_day1[df2022_day1['REMARKS']!= 'No Remarks']
df2022_day2 = df2022[df2022['DATE'] == '2022-01-03']
df2022_day2[df2022_day2['REMARKS']!= 'No Remarks']
# No Remarks is present in these days

In [None]:
df2022_day1 = df2022[df2022['DATE'] == '2022-01-02']
df2022_day1 = df2022_day1.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

df2022_day2 = df2022[df2022['DATE'] == '2022-01-03']
df2022_day2 = df2022_day2.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

df2022_day3 = df2022[df2022['DATE'] == '2022-01-04']
df2022_day3 = df2022_day3.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()
plt.figure(figsize=(12, 6))


df2022_day4 = df2022[df2022['DATE'] == '2022-01-05']
df2022_day4 = df2022_day4.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

df2022_day5 = df2022[df2022['DATE'] == '2022-01-06']
df2022_day5 = df2022_day5.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

df2022_day6 = df2022[df2022['DATE'] == '2022-01-07']
df2022_day6 = df2022_day6.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()


df2022_day7 = df2022[df2022['DATE'] == '2022-01-08']
df2022_day7 = df2022_day7.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

plt.figure(figsize=(12, 6))


# plt.plot(df2022_day1['TIME'], df2022_day1['RAW WATER FLOW IN m3/h'], label='January 2, 2022', marker='o',color='c')

# # Plotting the raw water flow for January 3, 2022
plt.plot(df2022_day2['TIME'], df2022_day2['RAW WATER FLOW IN m3/h'], label='January 3, 2022', marker='o',color='y')

plt.plot(df2022_day3['TIME'], df2022_day3['RAW WATER FLOW IN m3/h'], label='January 4, 2022', marker='o',color='m')

plt.plot(df2022_day4['TIME'], df2022_day4['RAW WATER FLOW IN m3/h'], label='January 5, 2022', marker='o',color='g')

plt.plot(df2022_day5['TIME'], df2022_day5['RAW WATER FLOW IN m3/h'], label='January 6, 2022', marker='o',color='k')

plt.plot(df2022_day6['TIME'], df2022_day6['RAW WATER FLOW IN m3/h'], label='January 7, 2022', marker='o',color='r')

plt.plot(df2022_day7['TIME'], df2022_day7['RAW WATER FLOW IN m3/h'], label='January 8, 2022', marker='o', color='b')

plt.xlabel('Time')





plt.ylabel('Raw Water Flow (m3/h)')
# plt.title('Raw Water Flow on January 2 and 3, 2022')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.show()


In [None]:
df2023_day1 = df2023[df2023['DATE'] == '2023-01-02']
df2023_day1 = df2023_day1.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

df2023_day2 = df2023[df2023['DATE'] == '2023-01-03']
df2023_day2 = df2023_day2.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

df2023_day3 = df2023[df2023['DATE'] == '2023-01-04']
df2023_day3 = df2023_day3.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()
plt.figure(figsize=(12, 6))


df2023_day4 = df2023[df2023['DATE'] == '2023-01-05']
df2023_day4 = df2023_day4.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

df2023_day5 = df2023[df2023['DATE'] == '2023-01-06']
df2023_day5 = df2023_day5.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

df2023_day6 = df2023[df2023['DATE'] == '2023-01-07']
df2023_day6 = df2023_day6.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()


df2023_day7 = df2023[df2023['DATE'] == '2023-01-08']
df2023_day7 = df2023_day7.groupby(['DATE','TIME'])['RAW WATER FLOW IN m3/h'].max().reset_index()

plt.figure(figsize=(12, 6))


plt.plot(df2023_day1['TIME'], df2023_day1['RAW WATER FLOW IN m3/h'], label='January 2, 2023', marker='o',color='c')


plt.plot(df2023_day2['TIME'], df2023_day2['RAW WATER FLOW IN m3/h'], label='January 3, 2023', marker='o',color='y')

# plt.plot(df2023_day3['TIME'], df2023_day3['RAW WATER FLOW IN m3/h'], label='January 4, 2023', marker='o',color='m')

plt.plot(df2023_day4['TIME'], df2023_day4['RAW WATER FLOW IN m3/h'], label='January 5, 2023', marker='o',color='g')

plt.plot(df2023_day5['TIME'], df2023_day5['RAW WATER FLOW IN m3/h'], label='January 6, 2023', marker='o',color='k')

plt.plot(df2023_day6['TIME'], df2023_day6['RAW WATER FLOW IN m3/h'], label='January 7, 2023', marker='o',color='r')

plt.plot(df2023_day7['TIME'], df2023_day7['RAW WATER FLOW IN m3/h'], label='January 8, 2023', marker='o', color='b')

plt.xlabel('Time')





plt.ylabel('Raw Water Flow (m3/h)')
# plt.title('Raw Water Flow on January 2 and 3, 2022')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.show()

# RAW WATER FLOW @ 1AM

In [None]:
df_time_1 = df[df['TIME']=='01:00:00']
df_time_1 =df_time_1.groupby('DATE')['RAW WATER FLOW IN m3/h'].max().reset_index()

plt.plot(df_time_1['DATE'], df_time_1['RAW WATER FLOW IN m3/h'], label='1 AM', marker='o', color='c')


# CLEAR WATER SUMP LEVEL IN M

In [None]:
df['CLEAR WATER SUMP LEVEL IN M'].value_counts()

In [None]:
df1 = df[df['CLEAR WATER SUMP LEVEL IN M'] != 'NO DATA']
Yearwise_max_CLEAR_WATER_SUMP_LEVEL= df1.groupby('YEAR')['CLEAR WATER SUMP LEVEL IN M'].agg('max').reset_index(name='CLEAR_WATER_SUMP_LEVEL_IN_M').sort_values(by="CLEAR_WATER_SUMP_LEVEL_IN_M",ascending=False)
Yearwise_max_CLEAR_WATER_SUMP_LEVEL

In [None]:
# df['CLEAR WATER SUMP LEVEL IN M'] = pd.to_numeric(df['CLEAR WATER SUMP LEVEL IN M'], errors='coerce')
df1 = df[df['CLEAR WATER SUMP LEVEL IN M'] != 'NO DATA']
hourly_CLEAR_WATER_SUMP_LEVEL= df1.groupby('TIME')['CLEAR WATER SUMP LEVEL IN M'].agg('max').reset_index(name='CLEAR_WATER_SUMP_LEVEL_IN_M').sort_values(by="TIME",ascending=True)
hourly_CLEAR_WATER_SUMP_LEVEL

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(hourly_CLEAR_WATER_SUMP_LEVEL['TIME'], hourly_CLEAR_WATER_SUMP_LEVEL['CLEAR_WATER_SUMP_LEVEL_IN_M'], marker='o')
plt.title('Hourly Clear Water Sump Level')
plt.xlabel('Time')
plt.ylabel('Clear Water Sump Level (m)')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
df1 = df[df['CLEAR WATER SUMP LEVEL IN M'] != 'NO DATA']
hourly_CLEAR_WATER_SUMP_LEVEL= df1.groupby(['TIME','DATE'])['CLEAR WATER SUMP LEVEL IN M'].agg('max').reset_index(name='CLEAR_WATER_SUMP_LEVEL_IN_M').sort_values(by="TIME",ascending=False)
hourly_CLEAR_WATER_SUMP_LEVEL

In [None]:
df2022_day1 = df2022[df2022['DATE'] == '2022-01-04']
df2022_day1 =df2022_day1.groupby(['DATE','TIME'])['CLEAR WATER SUMP LEVEL IN M'].max().reset_index()
# df2022_day1_rs=df2022_day1.groupby(['RAW WATER FLOW IN m3/h'])['CLEAR WATER SUMP LEVEL IN M'].max().reset_index()

df2022_day2 = df2022[df2022['DATE'] == '2022-01-05']
df2022_day2 =df2022_day2.groupby(['DATE','TIME'])['CLEAR WATER SUMP LEVEL IN M'].max().reset_index()
df2022_day2

df2022_day3 = df2022[df2022['DATE'] == '2022-01-06']
df2022_day3 =df2022_day3.groupby(['DATE','TIME'])['CLEAR WATER SUMP LEVEL IN M'].max().reset_index()
df2022_day3

df2022_day4 = df2022[df2022['DATE'] == '2022-01-07']
df2022_day4 =df2022_day4.groupby(['DATE','TIME'])['CLEAR WATER SUMP LEVEL IN M'].max().reset_index()
df2022_day4


plt.plot(df2022_day1['TIME'], df2022_day1['CLEAR WATER SUMP LEVEL IN M'].astype(float),label='January 4, 2022',marker='o')

plt.plot(df2022_day2['TIME'], df2022_day2['CLEAR WATER SUMP LEVEL IN M'].astype(float),label='January 5, 2022',marker='o')

plt.plot(df2022_day3['TIME'], df2022_day3['CLEAR WATER SUMP LEVEL IN M'].astype(float),label='January 6, 2022',marker='o')

plt.plot(df2022_day4['TIME'], df2022_day4['CLEAR WATER SUMP LEVEL IN M'].astype(float),label='January 7, 2022',marker='o')

# plt.plot(df2022_day1_rs['RAW WATER FLOW IN m3/h'], df2022_day1_rs['CLEAR WATER SUMP LEVEL IN M'], marker='o', color='blue')


plt.xlabel('Time')
plt.ylabel('CLEAR WATER SUMP LEVEL IN M')
plt.title('Clear water sump level in Consecutive Days')
plt.grid(True)
plt.legend()
plt.xticks(rotation=45)
plt.show()

In [None]:
df1 = df[df['CLEAR WATER SUMP LEVEL IN M'] != 'NO DATA']
Daily_CLEAR_WATER_SUMP_LEVEL= df1.groupby('DATE')['CLEAR WATER SUMP LEVEL IN M'].agg('max').reset_index(name='CLEAR_WATER_SUMP_LEVEL_IN_M').sort_values(by="CLEAR_WATER_SUMP_LEVEL_IN_M",ascending=False)
Daily_CLEAR_WATER_SUMP_LEVEL

In [None]:
df[df['DATE']=='2023-11-25']

# CLEAR WATER PUMPING FLOW m3/h 

In [None]:
df['CLEAR WATER PUMPING FLOW m3/h'] = pd.to_numeric(df['CLEAR WATER PUMPING FLOW m3/h'], errors='coerce')
yearly_clear_water_flow = df.groupby('YEAR')['CLEAR WATER PUMPING FLOW m3/h'].agg('sum').reset_index(name='Volume_of_clear_water_flow').sort_values(by="Volume_of_clear_water_flow",ascending=False)
yearly_clear_water_flow

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(yearly_clear_water_flow['YEAR'], yearly_clear_water_flow['Volume_of_clear_water_flow'], color='skyblue')
plt.xlabel('Year')
plt.ylabel('Volume of Clear Water Flow')
plt.title('Volume of Clear Water Flow for Each Year')
plt.xticks(yearly_raw_water_flow['YEAR'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
df['CLEAR WATER PUMPING FLOW m3/h'] = pd.to_numeric(df['CLEAR WATER PUMPING FLOW m3/h'], errors='coerce')
month_yearly_clear_water_flow = df.groupby('MONTH-YEAR')['CLEAR WATER PUMPING FLOW m3/h'].agg('sum').reset_index(name='Volume_of_clear_water_flow').sort_values(by="Volume_of_clear_water_flow",ascending=False)
month_yearly_clear_water_flow

In [None]:
plt.figure(figsize=(14, 6))
# plt.bar(month_yearly_clear_water_flow['MONTH-YEAR'], month_yearly_clear_water_flow['Volume_of_clear_water_flow'], color='blue', alpha=1)
bars = plt.bar(month_yearly_clear_water_flow['MONTH-YEAR'], month_yearly_clear_water_flow['Volume_of_clear_water_flow'], color='blue')

for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, height, f'{height}', ha='center', va='bottom')
plt.ylabel('Volume of Clear Water Flow')
plt.title('Volume of Clear Water Flow for Each Month-Year')
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
df2022_day3 = df2022[df2022['DATE'] == '2022-01-06']
df2022_day3 =df2022_day3.groupby(['DATE','TIME','CLEAR WATER SUMP LEVEL IN M'])['CLEAR WATER PUMPING FLOW m3/h'].max().reset_index()

df2022_day3['CLEAR WATER PUMPING FLOW m3/h']

In [None]:
df2022_day3 = df2022[df2022['DATE'] == '2022-01-06']
df2022_day3 =df2022_day3.groupby(['DATE','TIME','CLEAR WATER SUMP LEVEL IN M'])['CLEAR WATER PUMPING FLOW m3/h'].max().reset_index()

plt.figure(figsize=(14, 10))
plt.plot(df2022_day5['TIME'], df2022_day5['RAW WATER FLOW IN m3/h'], label='raw water flow', marker='o',color='m')

plt.plot(df2022_day3['TIME'], df2022_day3['CLEAR WATER PUMPING FLOW m3/h'].astype(float),label='clear water flow',marker='o')
for i in range(len(df2022_day3)):
    plt.text(df2022_day3['TIME'][i], df2022_day3['CLEAR WATER PUMPING FLOW m3/h'].astype(float)[i], str(df2022_day3['CLEAR WATER SUMP LEVEL IN M'][i]), fontsize=13, color='red')
plt.grid(True)
plt.legend()
plt.xticks(rotation=45)
plt.show()

# TREATED WATER PRODUCTION IN m3/h

In [None]:
df['TREATED WATER PRODUCTION IN m3/h'].unique()

In [None]:
yearly_clear_water_production = df.groupby('YEAR')['TREATED WATER PRODUCTION IN m3/h'].agg('sum').reset_index(name='Volume_of_clear_water_production').sort_values(by="Volume_of_clear_water_production",ascending=False)
yearly_clear_water_production

In [None]:
plt.figure(figsize=(10, 6))
plt.bar(yearly_clear_water_production['YEAR'], yearly_clear_water_production['Volume_of_clear_water_production'], color='skyblue')
plt.xlabel('Year')
plt.ylabel('Volume of Clear Water Production')
plt.title('Volume of Clear Water Production for Each Year')
plt.xticks(yearly_clear_water_production['YEAR'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
month_yearly_treated_water_production = df.groupby('MONTH-YEAR')['TREATED WATER PRODUCTION IN m3/h'].agg('sum').reset_index(name='VOLUME OF TREATED WATER PRODUCTION')
month_yearly_treated_water_production

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(14, 6))
plt.plot(month_yearly_treated_water_production['MONTH-YEAR'],month_yearly_treated_water_production['VOLUME OF TREATED WATER PRODUCTION'].astype(float), marker='o', color='blue')
plt.xlabel('Month-Year')
plt.ylabel('Volume of treated water production')
plt.title('Volume of treated water production for Each Month-Year')
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.7)
for i in range(len(month_yearly_treated_water_production)):
    plt.text(month_yearly_treated_water_production['MONTH-YEAR'][i], month_yearly_treated_water_production['VOLUME OF TREATED WATER PRODUCTION'][i], str(month_yearly_treated_water_production['VOLUME OF TREATED WATER PRODUCTION'][i]), ha='center', va='bottom')

plt.tight_layout()
plt.show()


In [None]:
# Convert 'TREATED WATER PRODUCTION IN m3/h' to numeric
df['TREATED WATER PRODUCTION IN m3/h'] = pd.to_numeric(df['TREATED WATER PRODUCTION IN m3/h'], errors='coerce')

# Group by date and calculate the sum of treated water production for each day
daily_treated_water_production = df.groupby('DATE')['TREATED WATER PRODUCTION IN m3/h'].agg('sum').reset_index(name='Volume_of_treated_water_production').sort_values(by="Volume_of_treated_water_production", ascending=False)
daily_treated_water_production


In [None]:
plt.figure(figsize=(10, 6))

# Plot treated water production
plt.plot(month_yearly_treated_water_production['MONTH-YEAR'], month_yearly_treated_water_production['VOLUME OF TREATED WATER PRODUCTION'].astype(float), marker='o', color='blue', label='Treated Water Production')

# Plot raw water flow
plt.plot(month_yearly_raw_water_flow['MONTH-YEAR'], month_yearly_raw_water_flow['Volume_of_raw_water_flow'].astype(float), marker='o', color='red', label='Raw Water Flow')

# Add labels and title
plt.xlabel('Month-Year')
plt.ylabel('Volume (m3)')
plt.title('Treated Water Production vs Raw Water Flow')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
sorted(df['REMARKS'].unique())