In [7]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from influxdb import *
#import Config as cg
import datetime
from influxdb import DataFrameClient


In [8]:
INFLUX_DB_IP = '10.12.97.178'
INFLUX_DB_PORT = 8086
INFLUX_DB = 'demoDb'
TARGET_MEASUREMENT = 'EM_live'

In [9]:
con_obj = InfluxDBClient(host=INFLUX_DB_IP, port=INFLUX_DB_PORT, database=INFLUX_DB)
query = 'select * from ' + TARGET_MEASUREMENT + ' where time > now() - 1d '
df = pd.DataFrame(con_obj.query(query, chunked=True, chunk_size=10000).get_points())
df['time'] = df['time'].astype('datetime64[ns]')

In [10]:
df.head()

Unnamed: 0,time,DeviceID,EM_Active Power (kW),EM_Current Avg (A),EM_Current Ph1 (A),EM_Current Ph2 (A),EM_Current Ph3 (A),EM_Energy (kWh),EM_Frequency (Hz),EM_Power Factor,...,EM_Voltage Ph2-Ph3 (V),EM_Voltage Ph3-N (V),Mean_THD,SCS,imbalance_current,imbalance_volt,mean_current,mean_volt,mean_voltage,temp_rise
0,2020-06-03 08:44:24.262408960,EM2,0.0,0.0,0.0,0.0,0.0,3000.0,49.9,1.0,...,416.5,241.5,0.0,0,0.0,0.303733,0.0,417.033333,240.8,0.184507
1,2020-06-03 08:44:24.376338944,EM4,253.5,399.3,399.6,396.4,401.9,140761.0,50.0,0.9,...,395.2,227.8,6.9,1,0.651139,0.253678,399.3,394.2,227.566667,0.128705
2,2020-06-03 08:44:24.830064128,EM10,42713.2,96.7,104.8,95.8,89.5,126181.7,50.0,0.9,...,413.0,237.7,0.0,1,8.376422,0.2826,96.7,412.833333,238.366667,0.159725
3,2020-06-03 08:44:25.094914048,EM5,342.0,547.6,574.8,517.7,550.3,178596.0,50.0,0.9,...,392.6,226.9,4.766667,1,4.967129,0.033947,547.6,392.766667,226.733333,0.002305
4,2020-06-03 08:44:25.481678080,EM6,186.0,1.0,0.9,1.0,1.0,269003.5,50.0,0.2,...,395.5,228.5,0.0,0,3.448276,0.185576,0.966667,395.166667,228.166667,0.068877


In [11]:

class Line_Voltage_Mean():

    def __init__(self, Upper_threshold, lower_threshold):
        #self.DFDBClient = DataFrameClient(host=cg.INFLUX_DB_IP, port=cg.INFLUX_DB_PORT, database=cg.INFLUX_DB)
        self.Upper_threshold = Upper_threshold
        self.lower_threshold = lower_threshold

    
    def avg_stability(self, df):
        df['diff'] = abs(df.groupby('DeviceID')['mean_current'].diff())
        df = df.fillna(0)
        df['big_spike']=np.where(df["diff"] > (df["diff"].mean() * 1.1), 1, 0)
        e=pd.DataFrame(df[df['big_spike']==1].groupby('DeviceID')['big_spike'].count().astype(np.float64)).reset_index()
        d = pd.DataFrame(df.groupby('DeviceID')['diff'].agg(['mean','min','max'])).reset_index().merge(e,on='DeviceID')
        d=d.rename(columns={"mean": "average_spike", "min": "minimum_spike","max": "maximum_spike","big_spike":"big_spike_count"}) 
        return d

    def time_as_index(self, df):
        t = pd.DataFrame(df.groupby(['DeviceID'])['time'].max())
        t.reset_index(inplace=True)
        return t

    def time(self, df):
        df['time'] = pd.to_datetime(df['time'])
        df['Time_max'] = df['time'].dt.time
        a = df.iloc[list(df.groupby('DeviceID')['mean_volt'].idxmax())][['Time_max', 'DeviceID']]
        b = df.iloc[list(df.groupby('DeviceID')['mean_volt'].idxmin())][['Time_max', 'DeviceID']]
        a.index = a['DeviceID']
        a = a.drop('DeviceID', axis=1)
        a['Time_min'] = list(b['Time_max'])
        return a
    
    def categorization_time(self, df):
        bins = [0, self.lower_threshold, self.Upper_threshold, np.inf]
        names = ['low', 'normal', 'high']
        d = dict(enumerate(names, 1))
        df['Status'] = np.vectorize(d.get)(np.digitize(df['mean_volt'], bins))
        return df


    def calculate_count(self, df):
        df = self.categorization_time(df)
        c = pd.DataFrame(df.groupby(['DeviceID', 'Status']).size()).reset_index()
        c = pd.pivot_table(index='DeviceID', columns='Status', values=0, data=c, aggfunc=np.sum)
        c = c.rename(columns={"low": "low_count", "normal": "normal_count","high": "high_count"})
        c.reset_index(inplace=True)
        return c

    
    def duration(self, df):
        x = pd.DataFrame(df.groupby([(df.Status != df.Status.shift()).cumsum()])['time'].apply(
            lambda x: (x.iloc[-1] - x.iloc[0]).total_seconds() / 60))
        x['Status'] = df.loc[df.Status.shift(-1) != df.Status]['Status'].values
        x.reset_index(drop=True, inplace=True) 
        return x
    
    def calculate_time(self,df):
        df = self.categorization_time(df)
        y = df.groupby(['DeviceID']).apply(self.duration).reset_index()
        y = pd.pivot_table(index='DeviceID', columns='Status', values='time', data=y, aggfunc=np.sum).reset_index()
        y = y.rename(columns={"low": "low_duration", "normal": "normal_duration","high": "high_duration"})
        return y

    def summary_data(self,df):
        x = df.groupby('DeviceID')['mean_volt'].describe()
        x.columns = ['Total_Count', 'Average', 'SD', 'Minimum', '25th_percentile', 'Median', '75th_percentile',
                     'Maximum']
        x['IQR']= x['75th_percentile']-x['25th_percentile']
        return x

    
    def output(self,df):
        q=self.time_as_index(df)
        df0 = df[(df['mean_volt'] == 0)]
        df = df[(df['mean_volt'] > 0)]
        x=self.summary_data(df) 
        x = q.merge(x, on = "DeviceID", how = "outer")
        y = self.calculate_time(df)
        x = x.merge(y, on = "DeviceID", how = "outer")
        z=self.calculate_count(df)
        x = x.merge(z, on = "DeviceID", how = "outer")
        w=self.avg_stability(df)
        x = x.merge(w, on = "DeviceID", how = "outer")
        v=self.time(df)
        x = x.merge(v, on = "DeviceID", how = "outer")
        if(len(df0) != 0):
            o = pd.DataFrame(df0.groupby(['DeviceID']).size(), columns = ['count_0']).reset_index()
            x = x.merge(o, on='DeviceID', how='outer')
            t = pd.DataFrame(df0.groupby(['DeviceID']).apply(self.zero_duration),columns=['Duration_0']).reset_index()
            x = x.merge(t,on='DeviceID',how='outer')      
        x = x.fillna(0)
        x.set_index('time', inplace=True)       
        return x
        




In [12]:

cat = Line_Voltage_Mean(456,375)
t = cat.output(df)
print(t)





                              DeviceID  Total_Count     Average        SD  \
time                                                                        
2020-06-04 08:44:16.103353856      EM1      24165.0  428.319319  7.502930   
2020-06-04 08:44:18.409726976     EM10      16954.0  413.956669  1.940166   
2020-06-04 08:44:22.583246080      EM2      26433.0  427.968089  7.332985   
2020-06-04 08:44:20.072410112     EM22      21117.0  394.517660  3.169496   
2020-06-04 08:44:14.806929152     EM23      19960.0  414.598083  1.284486   
2020-06-04 08:44:20.458280192      EM4      27470.0  396.415695  1.826353   
2020-06-04 08:44:19.998454016      EM5      26627.0  395.799156  2.916057   
2020-06-04 08:44:20.195437824      EM6      21485.0  396.395898  1.795153   
2020-06-04 08:44:22.818311936      EM7      22631.0  395.108026  2.891747   
2020-06-04 08:44:21.715572224      EM9      17649.0  395.776418  2.540794   

                                  Minimum  25th_percentile      Median  \
t

In [13]:
df0 = df[(df['mean_volt'] == 0)]
df0

Unnamed: 0,time,DeviceID,EM_Active Power (kW),EM_Current Avg (A),EM_Current Ph1 (A),EM_Current Ph2 (A),EM_Current Ph3 (A),EM_Energy (kWh),EM_Frequency (Hz),EM_Power Factor,...,EM_Voltage Ph2-Ph3 (V),EM_Voltage Ph3-N (V),Mean_THD,SCS,imbalance_current,imbalance_volt,mean_current,mean_volt,mean_voltage,temp_rise
