In [1]:
import numpy as np
import pandas as pd
# Synthetic daily data
data = {
    "day": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
    "avg_temp_F": [65, 68, 72, 75, 80, 85, 78],
    "peak_load_MW": [120, 125, 132, 138, 145, 150, 142],
    "cars_per_capita": [0.6, 0.65, 0.7, 0.72, 0.75, 0.8, 0.78],
    "driving_right": [True, True, True, True, True, False, False]
}

df = pd.DataFrame(data)
df


Unnamed: 0,day,avg_temp_F,peak_load_MW,cars_per_capita,driving_right
0,Mon,65,120,0.6,True
1,Tue,68,125,0.65,True
2,Wed,72,132,0.7,True
3,Thu,75,138,0.72,True
4,Fri,80,145,0.75,True
5,Sat,85,150,0.8,False
6,Sun,78,142,0.78,False


In [3]:
# Equality
df["driving_right"] == True

# Greater / less than
df["avg_temp_F"] > 75

# Compare arrays
df["peak_load_MW"] > df["peak_load_MW"].mean()


0    False
1    False
2    False
3     True
4     True
5     True
6     True
Name: peak_load_MW, dtype: bool

In [5]:
# High temperature AND high load
high_temp_high_load = (df["avg_temp_F"] > 75) & (df["peak_load_MW"] > 140)

df[high_temp_high_load]

Unnamed: 0,day,avg_temp_F,peak_load_MW,cars_per_capita,driving_right
4,Fri,80,145,0.75,True
5,Sat,85,150,0.8,False
6,Sun,78,142,0.78,False


In [7]:
# High temperature OR high load
df[(df["avg_temp_F"] > 80) | (df["peak_load_MW"] > 145)]


Unnamed: 0,day,avg_temp_F,peak_load_MW,cars_per_capita,driving_right
5,Sat,85,150,0.8,False


In [9]:
# NOT condition
df[~df["driving_right"]]

Unnamed: 0,day,avg_temp_F,peak_load_MW,cars_per_capita,driving_right
5,Sat,85,150,0.8,False
6,Sun,78,142,0.78,False


In [11]:
def load_category(load):
    if load < 130:
        return "Low Load"
    elif load < 145:
        return "Medium Load"
    else:
        return "High Load"

df["load_category"] = df["peak_load_MW"].apply(load_category)
df

Unnamed: 0,day,avg_temp_F,peak_load_MW,cars_per_capita,driving_right,load_category
0,Mon,65,120,0.6,True,Low Load
1,Tue,68,125,0.65,True,Low Load
2,Wed,72,132,0.7,True,Medium Load
3,Thu,75,138,0.72,True,Medium Load
4,Fri,80,145,0.75,True,High Load
5,Sat,85,150,0.8,False,High Load
6,Sun,78,142,0.78,False,Medium Load


In [13]:
df[df["driving_right"] == True]

Unnamed: 0,day,avg_temp_F,peak_load_MW,cars_per_capita,driving_right,load_category
0,Mon,65,120,0.6,True,Low Load
1,Tue,68,125,0.65,True,Low Load
2,Wed,72,132,0.7,True,Medium Load
3,Thu,75,138,0.72,True,Medium Load
4,Fri,80,145,0.75,True,High Load


In [15]:
df[df["cars_per_capita"] > 0.7]

Unnamed: 0,day,avg_temp_F,peak_load_MW,cars_per_capita,driving_right,load_category
3,Thu,75,138,0.72,True,Medium Load
4,Fri,80,145,0.75,True,High Load
5,Sat,85,150,0.8,False,High Load
6,Sun,78,142,0.78,False,Medium Load


In [17]:
df[(df["cars_per_capita"] > 0.7) & (df["avg_temp_F"] > 75)]

Unnamed: 0,day,avg_temp_F,peak_load_MW,cars_per_capita,driving_right,load_category
4,Fri,80,145,0.75,True,High Load
5,Sat,85,150,0.8,False,High Load
6,Sun,78,142,0.78,False,Medium Load


In [19]:
df.groupby("load_category")["peak_load_MW"].mean()

load_category
High Load      147.500000
Low Load       122.500000
Medium Load    137.333333
Name: peak_load_MW, dtype: float64