In [None]:
# Import key librarys
import numpy as np
import pandas as pd
import plotly.express as px

In [None]:
# Create a list of years
years = ['16 17', '17 18', '19 20', '20 21', '21 22', '22 23']

# Create an empty list to store dataframes
frames = []

# Loop through years
for year in years:

    # Read in xlsx
    df = pd.read_excel('./foi/ECS ' + year + '.xlsx', header = [0,1])
    
    ### Tidy up data
    
    # Drop Water columns
    df = df.drop('Water', axis = 1, level = 0)

    # Drop Site Group Name column (ignore errors as column not in 22/23 file)
    df = df.drop('Site Group Name', axis = 1, level = 1, errors='ignore')

    # Rename columns
    df = df.rename(columns =
                   {'Present Consumption': 'kWh', 
                    'Sum Present Net Cost': '£',
                    'Account Commodity': 'Location',
                    'Site Name': ''})

    # Convert Oil litres into Oil kWh (assume kerosine with ratio of 10.35)
    df.loc[:, ('Oil','kWh')] *= 10.35
    
    ### Create new columns with £/kWh

    # Replace zeros with NaN to allow division
    df.replace(0, np.nan, inplace = True)
    
    # Create list of level one headers
    headers = ['Biomass', 'Electricity', 'Gas', 'Oil', 'Propane']
    
    # Loop through headers
    
    for header in headers:
        
        # Create new column with values of Cost / kWh
        
        df.loc[:, (header,'£/kWh')] = df[header]['£'] / df[header]['kWh']

    ### Convert into long format and append to list

    # Turn into long format
    df = pd.melt(df, id_vars = 'Location', var_name = ['Type', 'Unit'], value_name = 'Amount')

    # Swap space for /
    year = year.replace(' ', '/')

    # Add year
    df['Year'] = '20' + year
    
    # Append to df list
    frames.append(df)

In [None]:
# Read in oil data
df = pd.read_excel('./foi/ECS Oil Annualised Data to Mar 2016.xls', header = [0,1])

### Tidy up data

# Drop empty columns
df = df.dropna(how = "all", axis=1)

# Drop columns with substring 'Units'
df = df[df.columns.drop(list(df.filter(regex='Units')))]

# Drop duplicate columns
df = df.T.drop_duplicates().T

# Add '20' to beginning of column years (second level of multi-index)
df = df.rename(columns=lambda x: '20' + x, level=1)

# Change blackslash to forward slash (second level of multi-index)
df.columns = df.columns.set_levels(df.columns.levels[1].str.replace('\\\\', '/', regex=True), level=1)

# Rename columns
df = df.rename(columns =
               {'Consumption (kWh)': 'kWh', 
                'Cost (kWh)': '£',
                'Unnamed: 0_level_0': 'Location',
                '20Site': ''})

# Convert Oil litres into Oil kWh (assume kerosine with ratio of 10.35)
df['kWh'] *= 10.35

### Create new columns with £/kWh

# Replace zeros with NaN to allow division
df.replace(0, np.nan, inplace = True)

# Create new df with values of Cost / kWh columns
dfn = df['£'] / df['kWh']

# Add column level to index = £/kWh so that it matches format of orginal df
dfn.columns = pd.MultiIndex.from_product([['£/kWh'], dfn.columns])

# Join dfn to orginal df
df = df.join(dfn)

### Convert into long format and append to list

# Turn into long format
df = pd.melt(df, id_vars = 'Location', var_name = ['Unit', 'Year'], value_name = 'Amount')

# Add type
df['Type'] = 'Oil'

# Append to df list
frames.append(df)

In [None]:
# Read in Electricity data
df = pd.read_excel('./foi/ECS Elec Annualised Data to Mar 2016.xls', header = [0,1])

### Tidy up data

# Drop empty columns
df = df.dropna(how = "all", axis=1)

# Drop columns with substring 'Units'
df = df[df.columns.drop(list(df.filter(regex='Units')))]

# Drop duplicate columns
df = df.T.drop_duplicates().T

# Add '20' to beginning of column years (second level of multi-index)
df = df.rename(columns=lambda x: '20' + x, level=1)

# Change blackslash to forward slash (second level of multi-index)
df.columns = df.columns.set_levels(df.columns.levels[1].str.replace('\\\\', '/', regex=True), level=1)

# Rename columns
df = df.rename(columns =
               {'Consumption (kWh)': 'kWh', 
                'Cost (kWh)': '£',
                'Unnamed: 0_level_0': 'Location',
                '20Site': ''})

### Create new columns with £/kWh

# Replace zeros with NaN to allow division
df.replace(0, np.nan, inplace = True)

# Create new df with values of Cost / kWh columns
dfn = df['£'] / df['kWh']

# Add column level to index = £/kWh so that it matches format of orginal df
dfn.columns = pd.MultiIndex.from_product([['£/kWh'], dfn.columns])

# Join dfn to orginal df
df = df.join(dfn)

### Convert into long format and append to list

# Turn into long format
df = pd.melt(df, id_vars = 'Location', var_name = ['Unit', 'Year'], value_name = 'Amount')

# Add type
df['Type'] = 'Electricity'

# Append to df list
frames.append(df)

In [None]:
# Read in Biomass data
df = pd.read_excel('./foi/ECS Biomass Annualised Data to Mar 2016.xls', header = [0,1])

### Tidy up data

# Drop empty columns
df = df.dropna(how = "all", axis=1)

# Drop columns with substring 'Units'
df = df[df.columns.drop(list(df.filter(regex='Units')))]

# Drop duplicate columns
df = df.T.drop_duplicates().T

# Add '20' to beginning of column years (second level of multi-index)
df = df.rename(columns=lambda x: '20' + x, level=1)

# Change blackslash to forward slash (second level of multi-index)
df.columns = df.columns.set_levels(df.columns.levels[1].str.replace('\\\\', '/', regex=True), level=1)

# Rename columns
df = df.rename(columns =
               {'Consumption (kWh)': 'kWh', 
                'Cost (kWh)': '£',
                'Unnamed: 0_level_0': 'Location',
                '20Site': ''})

### Create new columns with £/kWh

# Replace zeros with NaN to allow division
df.replace(0, np.nan, inplace = True)

# Create new df with values of Cost / kWh columns
dfn = df['£'] / df['kWh']

# Add column level to index = £/kWh so that it matches format of orginal df
dfn.columns = pd.MultiIndex.from_product([['£/kWh'], dfn.columns])

# Join dfn to orginal df
df = df.join(dfn)

### Convert into long format and append to list

# Turn into long format
df = pd.melt(df, id_vars = 'Location', var_name = ['Unit', 'Year'], value_name = 'Amount')

# Add type
df['Type'] = 'Biomass'

# Append to df list
frames.append(df)

In [None]:
# Read in Gas data
df = pd.read_excel('./foi/ECS Gas Annualised Data to Mar 2016.xls', header = [0,1])

### Tidy up data

# Drop empty columns
df = df.dropna(how = "all", axis=1)

# Drop columns with substring 'Units'
df = df[df.columns.drop(list(df.filter(regex='Units')))]

# Drop duplicate columns
df = df.T.drop_duplicates().T

# Add '20' to beginning of column years (second level of multi-index)
df = df.rename(columns=lambda x: '20' + x, level=1)

# Change blackslash to forward slash (second level of multi-index)
df.columns = df.columns.set_levels(df.columns.levels[1].str.replace('\\\\', '/', regex=True), level=1)

# Rename columns
df = df.rename(columns =
               {'Consumption (kWh)': 'kWh', 
                'Cost (kWh)': '£',
                'Unnamed: 0_level_0': 'Location',
                '20Site': ''})

### Create new columns with £/kWh

# Replace zeros with NaN to allow division
df.replace(0, np.nan, inplace = True)

# Create new df with values of Cost / kWh columns
dfn = df['£'] / df['kWh']

# Add column level to index = £/kWh so that it matches format of orginal df
dfn.columns = pd.MultiIndex.from_product([['£/kWh'], dfn.columns])

# Join dfn to orginal df
df = df.join(dfn)

### Convert into long format and append to list

# Turn into long format
df = pd.melt(df, id_vars = 'Location', var_name = ['Unit', 'Year'], value_name = 'Amount')

# Add type
df['Type'] = 'Gas'

# Append to df list
frames.append(df)

In [None]:
# Concat frames together
dft = pd.concat(frames)

# Replace 'PS' with 'Primary School' and 'HS' with 'High School' in location column
dft['Location']= dft["Location"].replace('PS', 'Primary School', regex = True)
dft['Location']= dft["Location"].replace('HS', 'High School', regex = True)

# Replace ZHC- and (Mothballed) with blanks
dft['Location']= dft["Location"].replace('ZHC-', '', regex = True)
dft['Location']= dft["Location"].replace('ZHC -', '', regex = True)
dft['Location']= dft["Location"].replace('\(Mothballed\)', '', regex = True)

# Set New and Old to same format
dft['Location']= dft["Location"].replace('- new', ' (New)', regex = True)
dft['Location']= dft["Location"].replace('-  new', ' (New)', regex = True)
dft['Location']= dft["Location"].replace('- \(New\)', ' (New)', regex = True)
dft['Location']= dft["Location"].replace('-  \(New\)', ' (New)', regex = True)
dft['Location']= dft["Location"].replace('\(new\)', '(New)', regex = True)
dft['Location']= dft["Location"].replace('- old', ' (Old)', regex = True)
dft['Location']= dft["Location"].replace(' \(old\)', ' (Old)', regex = True)
dft['Location']= dft["Location"].replace('\( Old \)', '(Old)', regex = True)

# Remove space before / after brackets
# Commented out as seems to miss from brackets sometimes for some reason :/
#dft['Location']= dft["Location"].replace('\( ', '(', regex = True)
#dft['Location']= dft["Location"].replace(' \)', ')', regex = True)

# Replace XX- with blank
dft['Location']= dft["Location"].replace('XX-', '', regex = True)

# Replace (Not PPP) with blank
dft['Location']= dft["Location"].replace('\(Not PPP\)', '', regex = True)

# Drop rows with Location = Total
dft = dft[dft.Location != 'Total']

# Remove whitespace from the end of Location
dft['Location'] = dft['Location'].str.strip()

# Remove any rows with null values
dft = dft.dropna(how = 'any', axis = 0)

# Remove rows with zeros
dft = dft[dft['Amount'] != 0]

# Remove whitespace from the end of all subjects
dft['Location'] = dft['Location'].str.strip()
    
# Rename columns
dft['Location'] = dft['Location'].replace(
           {'Caol Campus': 'Caol Joint Campus',
            'Wick Community Campus (New)': 'Wick Joint Campus',
            'Elgin Residence ( formerly Elgin Hostel )': 'Elgin Residence',
            'Elgin Hostel': 'Elgin Residence',
            'Fort Augustus Memorial Hall (Service Point forms part)': 'Fort Augustus Memorial Hall Service Point',
            'Ft Augustus Memorial Hall Service Point': 'Fort Augustus Memorial Hall Service Point',
            'Merkinch Nursery School': 'Merkinch Family Centre',
            'Brora Interpretative Centre': 'Brora Heritage Centre',
            'Morven Youth Club': 'Castletown ( Morven ) Youth Club',
            'Rosemarkie Pavilion': 'Rosemarkie Beach Amenities',
            'Ferry Brae Pavilion North Kessock': 'Ferry Brae Sports Pavilion',
            'Spean Bridge Primary School Demountable Unit': 'Spean Bridge Primary Demountable Unit',
           })

# Sort values and reset index
dft = dft.sort_values(['Year', 'Location', 'Type'], ascending = [True, True, True]).reset_index(drop=True)

# Export to .csv
dft.to_csv('highland_energy_education_summary_long.csv')

## Draw graph

In [None]:
# Create a df for graph
dfgraph = dft[dft['Type'] == 'Electric kWh']

In [None]:
# Plot graph
# fig = px.line(dfgraph, x = "Year", y = "Amount", 
#                 color = "Location",
#                 height = 900,
#                 title = 'Highland education Electricity use 2001 - 2022'
#                 )

# # Update graph layout
# fig.update_yaxes(matches = None, rangemode = "tozero")
# fig.update_traces(mode = "markers+lines")

# # Export interactive graph
# fig.write_html('./graph_highland_education_electricity_use_2001_2022.html')

# # Show graph
# fig.show()