In [1]:
import pandas as pd
import numpy as np
from IPython.display import display

In [19]:
headers: list = ["Date", "Time", "Global_active_power", "Global_reactive_power", "Voltage", 
                 "Global_intensity", "Sub_metering_1", "Sub_metering_2", "Sub_metering_3"]

pd_types: dict = {"Date": str, "Time": str, "Global_active_power": float, "Global_reactive_power": float,
              "Voltage": float, "Global_intensity": float, "Sub_metering_1": float, "Sub_metering_2": float,
              "Sub_metering_1": float}

np_types: list[tuple] = [("Date", "U10"), ("Time", "U8"), ("Global_active_power",
              "float64"), ("Global_reactive_power", "float64"), ("Voltage",
              "float64"), ("Global_intensity", "float64"), ("Sub_metering_1",
              "float64"), ("Sub_metering_2", "float64"), ("Sub_metering_3",
              "float64")]

In [26]:
pd_df = pd.read_csv("household_power_consumption.txt", delimiter=';',
                 dtype=pd_types, na_values="?",  header=1, names=headers)

#Dropping NaN values
# pd_df.dropna(inplace=True)

#Changing NaN values
pd_df.fillna(pd_df.mean(numeric_only=True), inplace=True)

print("All entries with Global_active_power higher than 5 kW")
display(pd_df[pd_df["Global_active_power"] > 5])

print("All entries with Voltage higher than 235 V")
display(pd_df[pd_df["Voltage"] > 235])

print("All entries with Global_intensity between 19-20 A")
display(pd_df[(pd_df["Global_intensity"].between(19, 20)) & (pd_df["Sub_metering_2"]> pd_df["Sub_metering_3"])])

np_df = np.genfromtxt("household_power_consumption.txt", 
       missing_values=["?", np.nan], delimiter=';', 
       dtype=np_types, encoding="UTF=8", names=True)

display(np_df)

All entries with Global_active_power higher than 5 kW


Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
0,16/12/2006,17:25:00,5.360,0.436,233.63,23.0,0.0,1.0,16.0
1,16/12/2006,17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
2,16/12/2006,17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
10,16/12/2006,17:35:00,5.412,0.470,232.78,23.2,0.0,1.0,17.0
11,16/12/2006,17:36:00,5.224,0.478,232.99,22.4,0.0,1.0,16.0
...,...,...,...,...,...,...,...,...,...
2069355,22/11/2010,18:40:00,5.408,0.150,231.50,23.6,48.0,0.0,0.0
2069356,22/11/2010,18:41:00,5.528,0.144,232.48,24.6,53.0,0.0,0.0
2071585,24/11/2010,07:50:00,5.172,0.050,235.18,22.0,0.0,38.0,17.0
2071586,24/11/2010,07:51:00,5.750,0.000,234.40,24.6,0.0,39.0,17.0


All entries with Voltage higher than 235 V


Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
3,16/12/2006,17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0
4,16/12/2006,17:29:00,3.520,0.522,235.02,15.0,0.0,2.0,17.0
5,16/12/2006,17:30:00,3.702,0.520,235.09,15.8,0.0,1.0,17.0
6,16/12/2006,17:31:00,3.700,0.520,235.22,15.8,0.0,1.0,17.0
13,16/12/2006,17:38:00,4.054,0.422,235.24,17.6,0.0,1.0,17.0
...,...,...,...,...,...,...,...,...,...
2075253,26/11/2010,20:58:00,0.946,0.000,240.43,4.0,0.0,0.0,0.0
2075254,26/11/2010,20:59:00,0.944,0.000,240.00,4.0,0.0,0.0,0.0
2075255,26/11/2010,21:00:00,0.938,0.000,239.82,3.8,0.0,0.0,0.0
2075256,26/11/2010,21:01:00,0.934,0.000,239.70,3.8,0.0,0.0,0.0


All entries with Global_intensity between 19-20 A


Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
44,16/12/2006,18:09:00,4.464,0.136,234.66,19.0,0.0,37.0,16.0
459,17/12/2006,01:04:00,4.582,0.258,238.08,19.6,0.0,13.0,0.0
463,17/12/2006,01:08:00,4.618,0.104,239.61,19.6,0.0,27.0,0.0
474,17/12/2006,01:19:00,4.636,0.140,237.37,19.4,0.0,36.0,0.0
475,17/12/2006,01:20:00,4.634,0.152,237.17,19.4,0.0,35.0,0.0
...,...,...,...,...,...,...,...,...,...
2071588,24/11/2010,07:53:00,4.666,0.000,235.72,19.8,0.0,39.0,17.0
2071589,24/11/2010,07:54:00,4.694,0.000,236.78,19.8,0.0,39.0,18.0
2071590,24/11/2010,07:55:00,4.602,0.000,237.08,19.4,0.0,40.0,17.0
2071591,24/11/2010,07:56:00,4.536,0.000,237.03,19.0,0.0,39.0,17.0


array([('16/12/2006', '17:24:00', 4.216, 0.418, 234.84, 18.4, 0., 1., 17.),
       ('16/12/2006', '17:25:00', 5.36 , 0.436, 233.63, 23. , 0., 1., 16.),
       ('16/12/2006', '17:26:00', 5.374, 0.498, 233.29, 23. , 0., 2., 17.),
       ...,
       ('26/11/2010', '21:00:00', 0.938, 0.   , 239.82,  3.8, 0., 0.,  0.),
       ('26/11/2010', '21:01:00', 0.934, 0.   , 239.7 ,  3.8, 0., 0.,  0.),
       ('26/11/2010', '21:02:00', 0.932, 0.   , 239.55,  3.8, 0., 0.,  0.)],
      dtype=[('Date', '<U10'), ('Time', '<U8'), ('Global_active_power', '<f8'), ('Global_reactive_power', '<f8'), ('Voltage', '<f8'), ('Global_intensity', '<f8'), ('Sub_metering_1', '<f8'), ('Sub_metering_2', '<f8'), ('Sub_metering_3', '<f8')])