In [2]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
from datetime import timedelta
import time
import sqlite3
import matplotlib.pyplot as plt

In [105]:
def epoch_timestring(epoch_time):
    return time.strftime('%H:%M:%S', time.localtime(int(epoch_time))) #%d%B%YT%H:%M:%S

def date_epoch(date_string):
    utc_time = dt.strptime(date_string, "%Y-%m-%d")
    epoch_start_time = utc_time.timestamp()
    return epoch_start_time + 900, epoch_start_time + 86400

In [4]:
def downsample(data, factor):
    np_data = np.asarray(data)
    np_sliced = np_data[::factor]

    return np_sliced

In [78]:
def pumpstartstop(np_data, downsample_f = 1):
    np_data = downsample(np_data, downsample_f)
    pump_start = []
    pump_stop = []
    pump_dict = {}
    last_ind,last_val = 0,0
    for i,val in enumerate(np.diff(np_data)):
        if val > 10:
            if last_val!=1:
                pump_start.append(i)
                pump_dict[i]=1
                last_val = 1
                last_ind = i
        if val < -10:
            if last_val == -1:
                pump_stop.remove(last_ind)
                pump_dict.pop(last_ind)
                pump_stop.append(i)
                pump_dict[i]=-1
                last_ind = i
            else:
                pump_stop.append(i)
                pump_dict[i]=-1
                last_ind = i
                last_val = -1
    return pump_start, pump_stop, pump_dict

In [106]:
# date format '2019-03-06', all for all available dates

def pump_stat(date_string, source, downsample_factor = 150):
    conn = sqlite3.connect('/home/sohamp/project_course_sem6/git/Dash Server/water_data.db')
    cursor = conn.cursor()

    if date_string == 'all':
        start_epoch, end_epoch = 0, 2000000000#date_epoch(date_string)
    else:
        start_epoch, end_epoch = date_epoch(date_string)
    time_list, current_flow = None, None
    if source == 'CWPS':
        cursor.execute(
            'SELECT timeEpoch, Current_Flow, Daily_Flow FROM cwps WHERE timeEpoch>{} AND timeEpoch<{}'
                        .format(start_epoch, end_epoch))
        np_flow_cwps = np.asarray(cursor.fetchall())
        current_flow = np_flow_cwps[:,1]
        time_list = list(map(epoch_timestring, list(np_flow_cwps[:, 0])))

    elif source == 'WSC_FW':
        cursor.execute(
            'SELECT timeEpoch, current_flow, daily_flow FROM wsc1_fwp WHERE timeEpoch>{} AND timeEpoch<{}'
                        .format(start_epoch, end_epoch))
        np_flow_wsc1_fw = np.asarray(cursor.fetchall())
        current_flow = np_flow_wsc1_fw[:,1]
        time_list = list(map(epoch_timestring, list(np_flow_wsc1_fw[:, 0])))

    elif source == 'WSC_RW':
        cursor.execute(
            'SELECT timeEpoch, current_flow, daily_flow FROM wsc1_rwp WHERE timeEpoch>{} AND timeEpoch<{}'
                        .format(start_epoch, end_epoch))
        np_flow_wsc1_rw = np.asarray(cursor.fetchall())
        current_flow = np_flow_wsc1_rw[:,1]*4
        # reason for multiplication by 4: The recycled water flows are lesser than that for fresh and cwps water
        # To measure changes in slope, this amplification makes task easier
        time_list = list(map(epoch_timestring, list(np_flow_wsc1_rw[:, 0])))

    else: #error
        print("Incoorrect source, enter CWPS, WSC_FW, WSC_RW")
        return -1
    
    pump_start, pump_stop, pump_dict = pumpstartstop(current_flow, downsample_factor)
    downsampled_time = downsample(time_list, downsample_factor)

    pump_action = pd.DataFrame(columns=['source','date','time', 'action'])

    action_ = {1:'start', -1: 'stop'}
    for k in pump_dict:
        pump_action = pump_action.append({'source':source,'date': date_string,'time':downsampled_time[k], 'action': action_[pump_dict[k]]}, ignore_index=True)

    return pump_action

In [112]:
d_string = '2019-03-05'
cwps = pump_stat(d_string, 'CWPS', 150)
wsc_fw = pump_stat(d_string, 'WSC_FW', 150)
wsc_rw = pump_stat(d_string, 'WSC_RW', 150)

In [113]:
cwps

Unnamed: 0,source,date,time,action
0,CWPS,2019-03-05,13:37:20,start
1,CWPS,2019-03-05,14:19:55,stop
2,CWPS,2019-03-05,17:10:07,start
3,CWPS,2019-03-05,19:05:36,stop
4,CWPS,2019-03-05,19:23:51,start
5,CWPS,2019-03-05,19:48:13,stop


In [111]:
cwps

Unnamed: 0,source,date,time,action
0,CWPS,2019-03-06,13:37:32,start
1,CWPS,2019-03-06,14:26:12,stop
2,CWPS,2019-03-06,16:58:14,start
3,CWPS,2019-03-06,18:59:47,stop
4,CWPS,2019-03-06,19:11:56,start
5,CWPS,2019-03-06,19:36:16,stop


#### Trial run code for debugging

In [75]:
downsample_factor = 150
conn = sqlite3.connect('/home/sohamp/project_course_sem6/git/Dash Server/water_data.db')
cursor = conn.cursor()
start_epoch, end_epoch = date_epoch(d_string)
cursor.execute(
    'SELECT timeEpoch, current_flow, daily_flow FROM wsc1_rwp WHERE timeEpoch>{} AND timeEpoch<{}'
                .format(start_epoch, end_epoch))
np_flow_wsc1_rw = np.asarray(cursor.fetchall())
current_flow = np_flow_wsc1_rw[:,1]*5
time_list = list(map(epoch_timestring, list(np_flow_wsc1_rw[:, 0])))

pump_start, pump_stop, pump_dict = pumpstartstop(current_flow, downsample_factor)
downsampled_time = downsample(time_list, downsample_factor)

pump_action = pd.DataFrame(columns=['time', 'action'])

action_ = {1:'start', -1: 'stop'}
for k in pump_dict:
    pump_action = pump_action.append({'time':downsampled_time[k], 'action': action_[pump_dict[k]]}, ignore_index=True)
