# Groupby for bar charts

## Load data

In [46]:
path = '../../data/EIA/fuel_type_data_california.parquet'

In [47]:
import pandas as pd
df = pd.read_parquet(path)

df

Unnamed: 0,period,respondent,respondent-name,fueltype,type-name,value,value-units
0,2025-04-22 20:00:00-07:00,CAL,California,BAT,Battery storage,179,megawatthours
1,2025-04-22 20:00:00-07:00,CAL,California,COL,Coal,216,megawatthours
...,...,...,...,...,...,...,...
452546,2018-12-31 17:00:00-07:00,CAL,California,WAT,Hydro,2880,megawatthours
452547,2018-12-31 17:00:00-07:00,CAL,California,WND,Wind,2628,megawatthours


In [48]:
import pandas as pd
df = pd.read_parquet(path)
df = df.set_index('period').sort_index()
df = df.loc['2024',['type-name','value']]
df.columns =  ['technology','energy']

## Calculate temporal properties

In [49]:
from modules import utils
df = utils.add_time_features(df)
df

Unnamed: 0_level_0,technology,energy,year,month,day,hour,weekday,weekend
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-01-01 00:00:00-07:00,Wind,243,2024,1,1,0,0,False
2024-01-01 00:00:00-07:00,Hydro,3541,2024,1,1,0,0,False
...,...,...,...,...,...,...,...,...
2024-12-31 23:00:00-07:00,Hydro,4125,2024,12,31,23,1,False
2024-12-31 23:00:00-07:00,Wind,476,2024,12,31,23,1,False


## Steps

### Aggregate data with groupby

In [50]:
c_cat = ['technology','hour']
c_num = 'energy'
r = df.groupby(by=c_cat)[c_num].agg(func='sum')


### Reset index to transform result into a data frame

In [51]:
r = r.reset_index()

### Bar chart

In [56]:
import plotly.express as px
px.bar(
    data_frame=r,
    x='hour',
    y='energy',
    color='technology',
)

## Different parameters to visualize the bar chart

### Aggregate by 3 categorical columns

In [None]:
import plotly.express as px

c_cat = ['technology','hour','month']
c_num = 'energy'
r = df.groupby(by=c_cat)[c_num].agg(func='sum')
r = r.reset_index()
r



Unnamed: 0,technology,hour,month,energy
0,Coal,0,1,7974
1,Coal,0,2,6698
...,...,...,...,...
2302,Wind,23,11,56956
2303,Wind,23,12,53996


### Plot with facets

In [64]:
fig = px.bar(
     data_frame=r,
     x='hour',
     y='energy',
     color='technology',
     facet_col='month',
     facet_col_wrap=4
 )

fig.update_layout(width=1000, height=800)

#### Columns

#### Rows

### Aggregate by 4 categorical columns