In [None]:
# if you dont already have the required libraries for this notebook, you can install them by running this codeblock
!pip install polars openpyxl pyxlsb xlsx2csv

In [1]:
#Importing os & Polars
import os
import polars as pl

In [2]:
# Reads Excel file into a Polars DataFrame
power_prices = pl.read_excel(os.path.join("..", "in", "avgprice_annual.xlsx"))

In [3]:
# Removes the first row
power_prices = power_prices.slice(1, len(power_prices) - 1)

In [4]:
# Setting names to rename columns
new_column_names = {
    "Average Price (Cents/kilowatthour) by State by Provider, 1990-2020": "Year",
    "": "State",
    "_duplicated_0": "Industry Sector Category",
    "_duplicated_1": "Residential",
    "_duplicated_2": "Commercial",
    "_duplicated_3": "Industrial",
    "_duplicated_4": "Transportation",
    "_duplicated_5": "Other",
    "_duplicated_6": "Total",
    }

# Renaming columns
power_prices = power_prices.rename(new_column_names)

In [6]:
# Casting Year as floats
power_prices = power_prices.with_columns(pl.col("Year").cast(pl.Float64))

# Only look at data from 2020 in the year column
#exclude the column for the entire US, as well as DC
power_prices = power_prices.filter((pl.col("Year") == 2020) & (pl.col("State") != "US") & (pl.col("State") != "DC"))


In [7]:
# Only look at the Total Electric Industry in the Industry Sector Column
power_prices = power_prices.filter(pl.col("Industry Sector Category") == "Total Electric Industry")

# Casting Industrial as floats
power_prices = power_prices.with_columns(pl.col("Industrial").cast(pl.Float64))

In [8]:
# Calculate the mean of the "Industrial" column
mean_value = power_prices.select(pl.col("Industrial").mean().alias("mean"))

over_mean = power_prices.filter(pl.col("Industrial") > mean_value["mean"][0])

# Filter the data frames to only contain entries under the mean
under_mean = power_prices.filter(pl.col("Industrial") < mean_value["mean"][0])

In [9]:
print(mean_value)
print(over_mean.head(11))

shape: (1, 1)
┌───────┐
│ mean  │
│ ---   │
│ f64   │
╞═══════╡
│ 7.845 │
└───────┘
shape: (10, 9)
┌────────┬───────┬─────────────────┬─────────────┬───┬────────────┬────────────────┬───────┬───────┐
│ Year   ┆ State ┆ Industry Sector ┆ Residential ┆ … ┆ Industrial ┆ Transportation ┆ Other ┆ Total │
│ ---    ┆ ---   ┆ Category        ┆ ---         ┆   ┆ ---        ┆ ---            ┆ ---   ┆ ---   │
│ f64    ┆ str   ┆ ---             ┆ str         ┆   ┆ f64        ┆ str            ┆ str   ┆ str   │
│        ┆       ┆ str             ┆             ┆   ┆            ┆                ┆       ┆       │
╞════════╪═══════╪═════════════════╪═════════════╪═══╪════════════╪════════════════╪═══════╪═══════╡
│ 2020.0 ┆ AK    ┆ Total Electric  ┆ 22.57       ┆ … ┆ 15.88      ┆ 0              ┆ NA    ┆ 19.82 │
│        ┆       ┆ Industry        ┆             ┆   ┆            ┆                ┆       ┆       │
│ 2020.0 ┆ CA    ┆ Total Electric  ┆ 20.45       ┆ … ┆ 14.27      ┆ 10.07          ┆ NA    ┆ 