In [1]:
import pandas as pd
import altair as alt

# Show all rows
pd.set_option('display.max_rows', None)

# Show all columns
pd.set_option('display.max_columns', None)

# Set max column width to display long text without truncation
pd.set_option('display.max_colwidth', None)

# [Online Data Catalog State Hybrid Workforce](https://catalog.data.gov/dataset/statewide-hybrid-workforce)

Statewide telework data. Last updated 11/24/2024

In [2]:
telework_day_tr = pd.read_csv("../data/vw_export_telework_days_trend.csv")
telework_day_det = pd.read_csv("../data/vw_export_telework_days_detail.csv")
hybrid_tr = pd.read_csv("../data/vw_export_state_hybrid_workforce_trend.csv")
in_office_tr = pd.read_csv("../data/vw_export_in_office_days_trend.csv")
in_office_det = pd.read_csv("../data/vw_export_in_office_days_detail.csv")
employee_class = pd.read_csv("../data/vw_export_employees_by_classification.csv")
agency_dept_det = pd.read_csv("../data/vw_export_detail_by_agency_department.csv")
sub_status = pd.read_csv("../data/vw_export_data_submission_status.csv")
cbid = pd.read_csv("../data/cbid-listing.csv")

all_data = [
    telework_day_tr,
    telework_day_det,
    hybrid_tr,
    in_office_tr,
    in_office_det,
    employee_class,
    agency_dept_det,
    sub_status,
    cbid
    ]

## [DGS Telework Data](https://data.ca.gov/dataset/dgs-telework-data)
- last updated October 25, 2021


In [14]:
dgs_agg_week = pd.read_csv("https://data.ca.gov/datastore/dump/eea6715b-0a13-4bfc-8e42-1cd4f8481ac8?bom=True")
dgs_telework_status= pd.read_csv("https://data.ca.gov/datastore/dump/ca341c2a-5ffa-44ac-972b-9dcff31a1a33?bom=True")

In [15]:
display(dgs_agg_week.info(),
dgs_telework_status.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84 entries, 0 to 83
Data columns (total 11 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   _id                               84 non-null     int64  
 1   Week                              84 non-null     object 
 2   Department                        84 non-null     object 
 3   All Staff                         84 non-null     object 
 4   Telework Eligible Staff           84 non-null     object 
 5   Full-time Teleworkers             84 non-null     object 
 6   Part-time Teleworkers             84 non-null     int64  
 7   Working Days                      84 non-null     object 
 8   Telework Days                     84 non-null     object 
 9   Teleworker Average 1 Way Miles    84 non-null     float64
 10  Teleworker Average 1 Way Minutes  84 non-null     float64
dtypes: float64(2), int64(2), object(7)
memory usage: 7.3+ KB
<class 'pandas.c

None

None

In [3]:
for i in all_data:
    print(i.columns)

Index(['Report Month', 'Department', 'Avg Telework Days/Week'], dtype='object')
Index(['Report Month', 'Department', '0 Days', '1 Day', '2 Days', '3 Days',
       '4 Days', '5 Days', 'Total'],
      dtype='object')
Index(['Report Month', 'All Staff', 'Not Eligible for Telework',
       'Telework Eligible', 'Eligible - Not Teleworking', 'Office Centered',
       'Remote Centered'],
      dtype='object')
Index(['Report Month', 'Department', 'Avg Office Days/Week'], dtype='object')
Index(['Report Month', 'Department', '0 Days', '1 Day', '2 Days', '3 Days',
       '4 Days', '5 Days', 'Total'],
      dtype='object')
Index(['Reporting Year', 'Reporting Month', 'Org Code', 'Department Name',
       'Class Code', 'Class Title', 'Bargaining Unit', 'Employee Count',
       'Not Telework Eligible', 'Remote Centered', 'Office Centered',
       'Eligible Not Teleworking'],
      dtype='object')
Index(['Report Month', 'Org Code', 'Department', 'All reported employees',
       'Telework eligible empl

In [4]:
melt_hybrid_tr = hybrid_tr.melt(
    id_vars="Report Month",
    var_name="employee_category",
    value_name="employee_count"
)

display(
    melt_hybrid_tr.info(),
    melt_hybrid_tr["employee_category"].value_counts()
)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162 entries, 0 to 161
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Report Month       162 non-null    object
 1   employee_category  162 non-null    object
 2   employee_count     162 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 3.9+ KB


None

employee_category
All Staff                     27
Not Eligible for Telework     27
Telework Eligible             27
Eligible - Not Teleworking    27
Office Centered               27
Remote Centered               27
Name: count, dtype: int64

In [5]:
def make_line(df:pd.DataFrame, x_col: str, y_col: str, color:str = None):
    line_chart = alt.Chart(df).mark_line(point=True).encode(
    alt.X(x_col),
    alt.Y(y_col),
    alt.Color(color) if color else alt.value("black"),
    tooltip=[x_col, y_col]
    )
                  
    return line_chart              

In [6]:
make_line(melt_hybrid_tr, x_col="Report Month", y_col="employee_count", color="employee_category")

In [7]:
make_line(telework_day_tr, x_col="Report Month", y_col="Avg Telework Days/Week", color="Department")

In [8]:
in_office_tr.columns

Index(['Report Month', 'Department', 'Avg Office Days/Week'], dtype='object')

In [9]:
make_line(in_office_tr, x_col="Report Month", y_col="Avg Office Days/Week", color="Department")