# Groupby for bar charts

## Load data

In [1]:
path = '../../data/EIA/fuel_type_data_california.parquet'

In [2]:
import pandas as pd
df = pd.read_parquet(path)

df

Unnamed: 0,period,respondent,respondent-name,fueltype,type-name,value,value-units
0,2025-04-22 20:00:00-07:00,CAL,California,BAT,Battery storage,179,megawatthours
1,2025-04-22 20:00:00-07:00,CAL,California,COL,Coal,216,megawatthours
...,...,...,...,...,...,...,...
452546,2018-12-31 17:00:00-07:00,CAL,California,WAT,Hydro,2880,megawatthours
452547,2018-12-31 17:00:00-07:00,CAL,California,WND,Wind,2628,megawatthours


In [10]:
df = df.set_index('period').sort_index()
df = df.loc['2024', ['type-name', 'value']]
df.columns = ['technology', 'energy']

df

Unnamed: 0_level_0,technology,energy
period,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01 00:00:00-07:00,Wind,243
2024-01-01 00:00:00-07:00,Hydro,3541
...,...,...
2024-12-31 23:00:00-07:00,Hydro,4125
2024-12-31 23:00:00-07:00,Wind,476


## Calculate temporal properties

In [2]:
#import sys 
#sys.path.append('C:/Users/mmart/Documents/MM Computer files/Programming/LITimeSeries/practical-python-for-time-series-analysis-5452014/modules') import utils

from modules import utils
df = utils.add_time_features(df)

df

ModuleNotFoundError: No module named 'prophet'

## Steps

### Aggregate data with groupby

In [5]:
cols_cat = ['hour', 'technology']
cols_num = 'energy'

r = df.groupby(cols_cat)[cols_num].agg(func='sum')
r

hour  technology
0     Coal           158031
      Hydro         1694126
                     ...   
23    Solar            5230
      Wind          1058245
Name: energy, Length: 192, dtype: int64

### Reset index to transform result into a data frame

In [10]:
r = r.reset_index()
r

Unnamed: 0,hour,technology,energy
0,0,Coal,158031
1,0,Hydro,1694126
...,...,...,...
190,23,Solar,5230
191,23,Wind,1058245


### Bar chart

In [12]:
import plotly.express as px
fig = px.bar(
    data_frame=r,
    x='hour',
    y=cols_num,
    color='technology',
)

fig

## Different parameters to visualize the bar chart

### Aggregate by 3 categorical columns

In [None]:
cols_cat = ['hour', 'technology', 'month']
cols_num = 'energy'

r = df.groupby(cols_cat)[cols_num].agg(func='sum')
r = r.reset_index()
r

Unnamed: 0,hour,technology,month,energy
0,0,Coal,1,7974
1,0,Coal,2,6698
...,...,...,...,...
2302,23,Wind,11,56956
2303,23,Wind,12,53996


### Plot with facets

#### Columns

In [13]:
fig = px.bar(
    data_frame=r,
    x="hour",
    y=cols_num,
    color="technology",
    facet_col="month",
    facet_col_wrap=4,
)

fig

#### Rows

In [14]:
fig = px.bar(
    data_frame=r,
    x="hour",
    y=cols_num,
    color="technology",
    facet_row="month",
    height=3000
)

fig

### Aggregate by 4 categorical columns

In [15]:
cols_cat = ['hour', 'technology', 'month', 'weekday']
cols_num = 'energy'

r = df.groupby(cols_cat)[cols_num].agg(func='sum')
r = r.reset_index()
r

Unnamed: 0,hour,technology,month,weekday,energy
0,0,Coal,1,0,1380
1,0,Coal,1,1,1397
...,...,...,...,...,...
16126,23,Wind,12,5,8895
16127,23,Wind,12,6,12396


In [16]:
fig = px.bar(
    data_frame=r,
    x="hour",
    y=cols_num,
    color="technology",
    facet_row="month",
    facet_col="weekday",
    height=3000
)

fig