Main Code

In [None]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt

df = pd.read_csv(r"electrical_meter_data.csv")
print(f"INFO:    Total Data: {df.shape[0]}")

numeric_columns = list(df.select_dtypes(include=np.number).columns)
numeric_columns.remove('location_id')

# CHECKING FOR POWER OUTAGE
def power_outage():
    df['Power_Outage'] = (df[numeric_columns] == 0).all(axis=1)
    if df['Power_Outage'].any():
        print(f'WARNING: {df['Power_Outage'].sum()} Power Outage cases')
    else:
        print('INFO:    There is NO Power Outage.')
power_outage()

# CHECKING FOR ABNORMAL VALUES
def abnormal_values():
    df['Over_Voltage'] = df[['Voltage_PhaseR', 'Voltage_PhaseY', 'Voltage_PhaseB']].ge(250).any(axis=1)
    if df['Over_Voltage'].any():
        print(f'WARNING: {df["Over_Voltage"].sum()} Over Voltage cases')

    df['Under_Voltage'] = df[['Voltage_PhaseR', 'Voltage_PhaseY', 'Voltage_PhaseB']].min(axis=1).between(0.1, 209)
    if df['Under_Voltage'].any():
        print(f'WARNING: {df["Under_Voltage"].sum()} Under Voltage cases')

    df['High_Surge_Current'] = df[['Current_PhaseR', 'Current_PhaseY', 'Current_PhaseB']].ge(16).any(axis=1)
    if df['High_Surge_Current'].any():
        print(f'WARNING: {df["High_Surge_Current"].sum()} High Surge Current cases')
    
    df['High_Real_Power_Consumption_kW'] = df['Real_Power_kW'].ge(6570)
    if df['High_Real_Power_Consumption_kW'].any():
        print(f'WARNING: {df["High_Real_Power_Consumption_kW"].sum()} High Real Power Consumption cases')
   
    df['High_App_Power_Consumption_kVA'] = df['Apparent_Power_kVA'].ge(6580)
    if df['High_App_Power_Consumption_kVA'].any():
        print(f'WARNING: {df["High_App_Power_Consumption_kVA"].sum()} High Apparent Power Consumption cases')
   
    df['High_Inst_Real_Power_Consumption'] = df['Inst_Real_Power'].ge(4.72)
    if df['High_Inst_Real_Power_Consumption'].any():
        print(f'WARNING: {df["High_Inst_Real_Power_Consumption"].sum()} High Inst. Real Power Consumption cases')
  
    df['High_Inst_App_Power_Consumption'] = df['Inst_Apparent_Power'].ge(4.73)
    if df['High_Inst_App_Power_Consumption'].any():
        print(f'WARNING: {df["High_Inst_App_Power_Consumption"].sum()} High Inst. Apparent Power Consumption cases')
  
    df['Abnormal_Frequency'] = df['Frequency'].between(0.01, 45) | df['Frequency'].ge(60)
    if df['Abnormal_Frequency'].any():
        print(f'WARNING: {df["Abnormal_Frequency"].sum()} Abnormal Frequency cases')
abnormal_values()

# CHECKING POWER FACTOR EFFICIENCY
def power_factor():
    df['Poor_PF'] = df['Cumm_Power_Factor'].between(0.8501, 0.95)
    df['Bad_PF'] = df['Cumm_Power_Factor'].between(0.01, 0.85)
    if df['Bad_PF'].any() or df['Poor_PF'].any():
        print(f'WARNING: {df['Bad_PF'].sum()+df['Poor_PF'].sum()} Poor / Bad PF detected')
    else:
        print('INFO:    No POOR / BAD PF data found.')
    df["Power_Factor_Calculated"] = df["Real_Power_kW"] / df["Apparent_Power_kVA"]
    df['Valid_Power_Factor'] = np.abs(df["Power_Factor_Calculated"] - df["Cumm_Power_Factor"]) <= (df["Power_Factor_Calculated"] * 0.05)  
    if not df['Valid_Power_Factor'].all():
        print(f"WARNING: {(df['Valid_Power_Factor']==False).sum()} INCONSISTENCIES in the POWER FACTOR calculations")
        df.drop('Power_Factor_Calculated', axis=1, inplace=True)
    else:
        print("INFO:    All POWER FACTOR calculations are CONSISTENT.")
power_factor()

# CHECKING FOR NEGATIVE AND NULL VALUES
def negative_null_values():
    df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
    df['Negative_Values'] = df[numeric_columns].lt(0).any(axis=1)
    df['Null_Values'] = df[numeric_columns].isnull().any(axis=1)
    if df['Negative_Values'].any() | df['Null_Values'].any():
        print(f"WARNING: {df['Negative_Values'].sum()+df['Null_Values'].sum()} ERROR VALUES(negative or null values) cases.")     

# ESTIMATION & EDITING
        df[df[numeric_columns] < 0] = np.nan  
        df[numeric_columns] = df[numeric_columns].interpolate()
        df[numeric_columns] = df[numeric_columns].bfill().ffill()   
        print("INFO:    Cleaned data saved.")
    else:
        print('INFO:    NO Negative & Null numerical values.')
negative_null_values()

df.to_csv("Meter_Data.csv", index = False)
print('INFO:    Process SUCCESSFUL')

Graph Plots

In [None]:
df = pd.read_csv(r"electrical_meter_data.csv")
df.set_index('data.creation_time', inplace=True)

# Plot Voltage
plt.figure(figsize=(15, 4))
df[['Voltage_PhaseR', 'Voltage_PhaseY', 'Voltage_PhaseB']].plot()
plt.title('Voltage over Time')
plt.xlabel('Time')
plt.ylabel('Voltage (V)')
plt.legend(['PhaseR', 'PhaseY', 'PhaseB'])
plt.grid(True)
plt.show()

# Plot Current
plt.figure(figsize=(15, 4))
df[['Current_PhaseR', 'Current_PhaseY', 'Current_PhaseB']].plot()
plt.title('Current over Time')
plt.xlabel('Time')
plt.ylabel('Current (A)')
plt.legend(['PhaseR', 'PhaseY', 'PhaseB'])
plt.grid(True)
plt.show()

# Plot Instantaneous Power
plt.figure(figsize=(15, 4))
df[['Inst_Apparent_Power', 'Inst_Real_Power']].plot()
plt.title('Instantaneous Power over Time')
plt.xlabel('Time')
plt.ylabel('Power')
plt.legend([ 'Instantaneous Apparent Power', 'Instantaneous Real Power'])
plt.grid(True)
plt.show

# Plot Power
plt.figure(figsize=(15, 4))
df[['Real_Power_kW', 'Apparent_Power_kVA']].plot()
plt.title('Power over Time')
plt.xlabel('Time')
plt.ylabel('Power')
plt.legend(['Real Power', 'Apparent Power'])
plt.grid(True)
plt.show()

# Plot Power Factor
plt.figure(figsize=(15, 4))
df['Cumm_Power_Factor'].plot()
plt.title('Power Factor over Time')
plt.xlabel('Time')
plt.ylabel('Power Factor')
plt.legend(['Power Factor'])
plt.grid(True)
plt.show()