# Code

In [2]:
# Import key librarys
import pandas as pd
import plotly.express as px

In [3]:
# Create a list of years
years = ['16 17', '17 18', '19 20', '20 21', '21 22']

# Create an empty list to store dataframes
frames = []

# Loop through years
for year in years:

    # Read in xlsx
    df = pd.read_excel('./foi/ECS ' + year + '.xlsx', header = [0,1])
    
    # Flatten multi-index
    df.columns = ['_'.join(col) for col in df.columns.values]
    
    # Rename columns
    df = df.rename(columns=
               {'Account Commodity_Site Name': 'Location', 
                'Electricity_Present Consumption': 'Electric kWh',
                'Electricity_Sum Present Net Cost': 'Electric £',
                'Biomass_Present Consumption': 'Biomass kWh',
                'Biomass_Sum Present Net Cost': 'Biomass £',
                'Gas_Present Consumption': 'Gas kWh',
                'Gas_Sum Present Net Cost': 'Gas £',
                'Oil_Present Consumption': 'Oil kWh',
                'Oil_Sum Present Net Cost': 'Oil £',
                'Water_Present Consumption': 'Water m3',
                'Water_Sum Present Net Cost': 'Water £',
                'Propane_Present Consumption': 'Propane kWh',
                'Propane_Sum Present Net Cost': 'Propane £'
               })
    
    # Convert Oil litres into Oil kWh (assume kerosine with ratio of 10.35)
    df['Oil kWh'] *= 10.35
    
    # Drop last row with totals
    df.drop(df.tail(1).index, inplace=True)
    
    # Turn into long format
    df = pd.melt(df, id_vars = 'Location', 
                value_vars = ['Electric kWh', 'Gas kWh', 'Oil kWh', 'Propane kWh', 'Biomass kWh', 'Water m3'],
                    var_name = 'Type', value_name = 'Amount')
    
    # Swap space for /
    year = year.replace(' ', '/')
    
    # Add year
    df['Year'] = '20' + year
    
    # Append to df list
    frames.append(df)

In [4]:
# Read in oil data
df = pd.read_excel('./foi/ECS Oil Annualised Data to Mar 2016.xls', header = [0,1])

# Drop columns with substring 'Cost'
df = df[df.columns.drop(list(df.filter(regex='Cost')))]

# Select second row of multi-index
df.columns = df.columns.get_level_values(1)

# Drop last two columns
df = df.iloc[: , :-2]

# Add '20' to beginning of column names
df.rename(columns=lambda x: '20' + x, inplace=True)

# Change blackslash to forward slash
df.columns = df.columns.str.replace('\\\\', '/', regex=True)

# Rename the first two columns by index
df.columns.values[0:2] = ["Location", "Units" ]

# Convert Oil litres into Oil kWh (assume kerosine with ratio of 10.35)
df[df.select_dtypes(include=['number']).columns] *= 10.35

# Replace 'PS' with 'Primary School' and 'HS' with 'High School' in location column
df['Location']= df["Location"].replace('PS', 'Primary School', regex=True)
df['Location']= df["Location"].replace('HS', 'High School', regex=True)

# Drop last row with totals
df.drop(df.tail(1).index, inplace=True)

# Turn into long format
df = pd.melt(df, id_vars = 'Location', 
            value_vars = ['2000/01', '2001/02', '2002/03', '2003/04', '2004/05',
                       '2005/06', '2006/07', '2007/08', '2008/09', '2009/10', '2010/11',
                       '2011/12', '2012/13', '2013/14', '2014/15', '2015/16'],
                        var_name = 'Year', value_name = 'Amount')

# Add type
df['Type'] = 'Oil kWh'

# Append to df list
frames.append(df)

In [5]:
# Read in Electricity data
df = pd.read_excel('./foi/ECS Elec Annualised Data to Mar 2016.xls', header = [0,1])

# Drop columns with substring 'Cost'
df = df[df.columns.drop(list(df.filter(regex='Cost')))]

# Select second row of multi-index
df.columns = df.columns.get_level_values(1)

# Drop last two columns
df = df.iloc[: , :-1]

# Add '20' to beginning of column names
df.rename(columns=lambda x: '20' + x, inplace=True)

# Change blackslash to forward slash
df.columns = df.columns.str.replace('\\\\', '/', regex=True)

# Rename the first two columns by index
df.columns.values[0:2] = ["Location", "Units" ]

# Replace 'PS' with 'Primary School' and 'HS' with 'High School' in location column
df['Location']= df["Location"].replace('PS', 'Primary School', regex=True)
df['Location']= df["Location"].replace('HS', 'High School', regex=True)

# Drop last row with totals
df.drop(df.tail(1).index, inplace=True)

# Turn into long format
df = pd.melt(df, id_vars = 'Location', 
            value_vars = ['2001/02', '2002/03', '2003/04', '2004/05',
                           '2005/06', '2006/07', '2007/08', '2008/09', '2009/10', '2010/11',
                           '2011/12', '2012/13', '2013/14', '2014/15', '2015/16'],
                            var_name = 'Year', value_name = 'Amount')

# Add type
df['Type'] = 'Electric kWh'

# Append to df list
frames.append(df)

In [6]:
# Read in Biomass data
df = pd.read_excel('./foi/ECS Biomass Annualised Data to Mar 2016.xls', header = [0,1])

# Drop columns with substring 'Cost'
df = df[df.columns.drop(list(df.filter(regex='Cost')))]

# Select second row of multi-index
df.columns = df.columns.get_level_values(1)

# Drop last two columns
df = df.iloc[: , :-1]

# Add '20' to beginning of column names
df.rename(columns=lambda x: '20' + x, inplace=True)

# Change blackslash to forward slash
df.columns = df.columns.str.replace('\\\\', '/', regex=True)

# Rename the first two columns by index
df.columns.values[0:2] = ["Location", "Units" ]

# Replace 'PS' with 'Primary School' and 'HS' with 'High School' in location column
df['Location']= df["Location"].replace('PS', 'Primary School', regex=True)
df['Location']= df["Location"].replace('HS', 'High School', regex=True)

# Drop last row with totals
df.drop(df.tail(1).index, inplace=True)

# Turn into long format
df = pd.melt(df, id_vars = 'Location', 
            value_vars = ['2010/11', '2011/12', '2012/13', '2013/14','2014/15', '2015/16'],
                            var_name = 'Year', value_name = 'Amount')

# Add type
df['Type'] = 'Biomass kWh'

# Append to df list
frames.append(df)

In [7]:
# Read in Gas data
df = pd.read_excel('./foi/ECS Gas Annualised Data to Mar 2016.xls', header = [0,1])

# Drop columns with substring 'Cost'
df = df[df.columns.drop(list(df.filter(regex='Cost')))]

# Select second row of multi-index
df.columns = df.columns.get_level_values(1)

# Drop last two columns
df = df.iloc[: , :-1]

# Add '20' to beginning of column names
df.rename(columns=lambda x: '20' + x, inplace=True)

# Change blackslash to forward slash
df.columns = df.columns.str.replace('\\\\', '/', regex=True)

# Rename the first two columns by index
df.columns.values[0:2] = ["Location", "Units" ]

# Replace 'PS' with 'Primary School' and 'HS' with 'High School' in location column
df['Location']= df["Location"].replace('PS', 'Primary School', regex=True)
df['Location']= df["Location"].replace('HS', 'High School', regex=True)

# Drop last row with totals
df.drop(df.tail(1).index, inplace=True)

# Turn into long format
df = pd.melt(df, id_vars = 'Location', 
            value_vars = ['2010/11', '2011/12', '2012/13', '2013/14','2014/15', '2015/16'],
                            var_name = 'Year', value_name = 'Amount')

# Add type
df['Type'] = 'Gas kWh'

# Append to df list
frames.append(df)

In [8]:
# Concat frames together
dft = pd.concat(frames)

# Remove any rows with null values
#dft = dft.dropna(how = 'any', axis = 0)

# Rename columns
dft['Location'] = dft['Location'].replace(
           {'XX-Caol Community Centre': 'Caol Community Centre', 
            'ZHC-Altnaharra Primary School (Mothballed)': 'Altnaharra Primary School (Mothballed)',
            'ZHC-Badcaul Primary School': 'Badcaul Primary School',
            'ZHC-Struy Primary School': 'Struy Primary School',
            'ZHC-Wick High School': 'Wick High School',
           })

# Sort values and reset index
dft = dft.sort_values(['Year', 'Location', 'Type'], ascending = [True, True, True]).reset_index(drop=True)

# Show first 5 rows
dft.head()

Unnamed: 0,Location,Type,Amount,Year
0,Abernethy Primary School,Oil kWh,16025.493246,2000/01
1,Alness Academy ( Old ),Oil kWh,111999.396893,2000/01
2,Altnaharra Primary School (Mothballed),Oil kWh,497.575988,2000/01
3,Arisaig RC Primary School,Oil kWh,273.949341,2000/01
4,Avoch Primary School,Oil kWh,14492.703263,2000/01


In [9]:
# Export to .csv
dft.to_csv('highland_energy_education_summary_long.csv')

# Graphs

In [10]:
# Create a df for graph
dfgraph = dft[dft['Type'] == 'Electric kWh']

In [11]:
# Plot graph
fig = px.line(dfgraph, x = "Year", y = "Amount", 
                color = "Location",
                height = 900,
                title = 'Highland education Electricity use 2001 - 2022'
                )

# Update graph layout
fig.update_yaxes(matches = None, rangemode = "tozero")
fig.update_traces(mode = "markers+lines")

# Export interactive graph
fig.write_html('./graph_highland_education_electricity_use_2001_2022.html')

# Show graph
#fig.show()