In [1]:
import polars as pl
import requests
import os
import json
import string
import polars.selectors as cs
from datetime import datetime
from polars.selectors import expand_selector

[2021, 2022, 2023]

23


In [30]:
def get_ca_la_df(year: int):
    years = [2021, 2022, 2023, 2024, 2025]
    
    def remove_numbers(input_string):
        # rename columns
        # Create a translation table that maps each digit to None
        translation_table = str.maketrans("", "", "0123456789")
        # Use the translation table to remove all numbers from the input string
        result_string = input_string.translate(translation_table)
        return result_string

    try:
        assert year in years
        year_suffix = str(year)[2:4]
        url = f'https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LAD{year_suffix}_CAUTH{year_suffix}_EN_LU/FeatureServer/0/query'
        params = {
        "where":"1=1", # maps to True
        "outFields":"*", # all
        "SR":"4326", # WGS84
        "f":"json"
        }

        r = requests.get(url = url, params = params)
        if r.status_code != 200:
            raise Exception(f'API call failed {r.status_code}')
    except AssertionError:
        print(f'API call failed {r.status_code}')
    except:
        print(r.status_code)

    else:
        response = r.json()
        attrs = response.get('features')
        rows = [attr.get('attributes') for attr in attrs]
        ca_la_df = pl.DataFrame(rows).select(pl.exclude('ObjectId'))

        old_names = ca_la_df.columns
        new_names = [remove_numbers(colstring) for colstring in old_names]
        rename_dict = dict(zip(old_names, new_names))

        clean_ca_la_df = (ca_la_df
                        .rename(rename_dict))


        return clean_ca_la_df
        
        


In [31]:
ca_la_df = get_ca_la_df(2023)
# ca_la_df.columns

In [33]:
# ca_la_df

In [21]:
chargers_df = pl.read_ods('data/electric-vehicle-charging-device-statistics-july-2023.ods', sheet_name = '1a')

  CELL_ADDRESS = re.compile('^([A-Z]+)(\d+)$')


In [52]:
def rename_string(input_string):
    # Remove newline characters
    cleaned_string = input_string.replace("\n", "")
    
    # Define a translation table for punctuation marks and spaces
    trans_table = str.maketrans(string.punctuation + " ", "_" * (len(string.punctuation) + 1))
    
    # Convert to lowercase and apply the translation table
    output_string = cleaned_string.lower().translate(trans_table)
    
    # Replace consecutive underscores with a single underscore
    output_string = '_'.join(filter(None, output_string.split('_')))
    
    # Remove trailing underscores
    output_string = output_string.rstrip('_')
    
    return output_string


In [48]:
new_names = (chargers_df
 .slice(1, 1)
 .transpose()
 .to_series())

# f = open("data/names.txt", "w")
# f.write(str(list(new_names)))
# f.close()

In [38]:
old_names = chargers_df.columns

In [40]:
rename_dict = dict(zip(old_names, new_names))

In [62]:
mapper = { # dictionary for find and replace to apply in polars expression
    'x' : None
}

In [75]:
chargers_clean_1 = (chargers_df
.rename(mapping=rename_dict)
.slice(2, None) # remove first 2 rows
.select(pl.all().map_alias(lambda col_name: rename_string(col_name))) # use a renaming column to clean_names
.filter(pl.col('local_authority_region_code').is_not_null()) #strip nulls rows
.select(pl.all().map_dict(mapper, default = pl.first())) # turn all x into Null with mapper
 )

In [99]:
lad_code = f'LAD{str(year)[2:4]}CD' # column name depending on year
# have to do a stack here as can't cast strings to floats in place with_columns
chargers_clean = (chargers_clean_1
                    .select(cs.contains('local_authority'))
                    .hstack(chargers_clean_1.select(cs.matches('population|devices')).cast(pl.Float64))
                    .join(ca_la_df, left_on = 'local_authority_region_code', right_on = lad_code)
                    
                    )


In [136]:
chargers_clean.head()

local_authority_region_code,local_authority_region_name,jul_23_total_charging_devices_note_2,jul_23_per_100_000_population_note_3,apr_23_total_charging_devices,apr_23_per_100_000_population,jan_23_total_charging_devices,jan_23_per_100_000_population,oct_22_total_charging_devices,oct_22_per_100_000_population,july_22_total_charging_devices,july_22_per_100_000_population,apr_22_total_charging_devices,apr_22_per_100_000_population,jan_22_total_charging_devices_2,jan_22_per_100_000_population_3,oct_21_total_charging_devices,oct_21_per_100_000_population,july_21_total_charging_devices,july_21_per_100_000_population,apr_21_total_charging_devices,apr_21_per_100_000_population,jan_21_total_charging_devices,jan_21_per_100_000_population,oct_20_total_charging_devices,oct_20_per_100_000_population,july_20_total_charging_devices,july_20_per_100_000_population,apr_20_total_charging_devices,apr_20_per_100_000_population,jan_20_total_charging_devices,jan_20_per_100_000_population,oct_19_total_charging_devices,oct_19_per_100_000_population,LAD23NM,CAUTH23CD,CAUTH23NM
str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str
"""E06000047""","""County Durham""",259.0,49.6791,240.0,46.034687,229.0,43.924764,206.0,38.638354,174.0,32.63628,149.0,27.947159,128.0,24.008298,124.0,23.258039,116.0,21.75752,121.0,22.82614,110.0,20.751037,106.0,19.996453,105.0,19.807808,102.0,19.355573,96.0,18.21701,92.0,17.457968,"""County Durham""","""E47000010""","""North East"""
"""E06000005""","""Darlington""",80.0,73.922123,73.0,67.453937,68.0,62.833805,54.0,50.278393,49.0,45.622987,31.0,28.863522,30.0,27.932441,31.0,28.863522,31.0,28.863522,29.0,27.152795,29.0,27.152795,29.0,27.152795,28.0,26.216492,28.0,26.274797,27.0,25.336411,27.0,25.336411,"""Darlington""","""E47000006""","""Tees Valley"""
"""E06000001""","""Hartlepool""",27.0,29.166802,27.0,29.166802,24.0,25.926046,8.0,8.525513,11.0,11.72258,12.0,12.788269,13.0,13.853958,11.0,11.72258,10.0,10.656891,9.0,9.608917,8.0,8.54126,6.0,6.405945,6.0,6.405945,6.0,6.434868,5.0,5.36239,5.0,5.36239,"""Hartlepool""","""E47000006""","""Tees Valley"""
"""E06000002""","""Middlesbrough""",63.0,43.830966,58.0,40.352317,51.0,35.48221,33.0,23.357044,38.0,26.89599,34.0,24.064833,32.0,22.649255,30.0,21.233677,30.0,21.233677,30.0,21.279614,30.0,21.279614,29.0,20.570294,29.0,20.570294,29.0,20.633961,27.0,19.210929,25.0,17.787897,"""Middlesbrough""","""E47000006""","""Tees Valley"""
"""E06000057""","""Northumberland…",280.0,87.076048,264.0,82.100274,251.0,78.057458,220.0,67.938978,226.0,69.79186,198.0,61.145081,197.0,60.836267,186.0,57.439318,172.0,53.115929,153.0,47.451571,149.0,46.211008,162.0,50.24284,152.0,47.14143,149.0,46.522665,138.0,43.088106,139.0,43.400338,"""Northumberland…","""E47000011""","""North of Tyne"""


In [150]:
(chargers_clean
#  .pivot(values = ~cs.matches('local_authority|LAD|CAUTH'), columns=['LAD23NM', 'CAUTH23NM'], index='LAD23CD')
#  .select(~cs.matches('local_authority|LAD|CAUTH'))
#  .columns
.melt(id_vars = ['CAUTH23NM', 'CAUTH23CD', 'LAD23NM', 'local_authority_region_code', 'local_authority_region_name'], value_vars=cs.numeric())
# .melt(id_vars=cs.string())
# .melt(value_vars=cs.numeric())
)

TypeError: argument 'value_vars': '_selector_proxy_' object cannot be converted to 'Sequence'

In [147]:
type(cs.expand_selector(chargers_clean, cs.numeric()))

tuple

In [151]:
df = pl.DataFrame(
    {
        "a": ["x", "y", "z"],
        "b": [1, 3, 5],
        "c": [2, 4, 6],
    }
)
df.melt(id_vars="a", value_vars=cs.numeric())

TypeError: argument 'value_vars': '_selector_proxy_' object cannot be converted to 'Sequence'