# Data processing for FlexSUS

institution: DTU

author: [tilseb](mailto:tilseb@dtu.dk)

date created: 2020-01-30

licensed under: [GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007](https://www.gnu.org/licenses/gpl-3.0.html)

## Installation

1. Installation via miniconda. Get miniconda with Python 3.7 [here](https://docs.conda.io/en/latest/miniconda.html).
2. Open the anaconda promt: press `start button` , type `anaconda promt` and hit enter.
3. Navigate to the folder containing this script: `cd <path_to_folder>`
3. Use the requirements.yml file in the root directory to set up the environment: `conda env create -f requirements.yml`
5. Activate the flexus environment: `conda activate flexsus`
5. Open the notebook: `jupyter notebook`

## Description

## Content

## Script set-up

In [1]:
# import packages
import os
import pandas as pd

In [2]:
# make directories
dirs = {'output'}
for i in dirs:
    if not os.path.isdir(i):
        os.mkdir(i)

## Define output resolution

In [3]:
# set of considered countries (if empty, select all)
ccc = {'DENMARK'}

In [4]:
# set of years (if empty, select all)
yyy = {}  # 2025, 2035, 2045

## Load data

In [19]:
# get list of data files
lf = [i.split('.')[0] for i in os.listdir('data')]
#lf = os.listdir('data')

## Load and process data 

In [20]:
def readData(f):
    return pd.read_csv('data/' + f + '.csv', encoding='utf8', engine='c', low_memory=False)

In [41]:
def epsToZeros(df):
    df = df.replace('Eps', 0)
    return df

In [10]:
def filterYearAndCountry(df,y,c):
    if y: df = df.loc[~df.Dim3.isin(set(df.Dim3).difference(y)), :]
    if c: df = df.loc[~df.Dim4.isin(set(df.Dim4).difference(c)), :]
    return df

In [11]:
def makeValFloat(df):
    df.Val = df.Val.astype(float)
    return df

In [38]:
lf

['ECONOMY_ELEC_TRANSMISSION',
 'ECONOMY_GENERATION',
 'ECONOMY_HEAT_TRANSMISSION',
 'ELEC_DEMAND',
 'ELEC_PRICE',
 'ELEC_PRICE_HOURLY',
 'ELEC_TRANSMISSION_CAPACITY',
 'ELEC_TRANSMISSION_FLOW',
 'EMISSIONS_CO2',
 'ENERGY_PRODUCTION',
 'FUEL_CONSUMPTION',
 'GENERATION_CAPACITY',
 'HEAT_DEMAND',
 'HEAT_PRICE',
 'HEAT_PRICE_HOURLY',
 'HEAT_TRANSMISSION_CAPACITY',
 'HEAT_TRANSMISSION_FLOW',
 'STORAGE_CAPACITY',
 'SYSTEM_COSTS']

### ECONOMY_ELEC_TRANSMISSION

In [48]:
lf[0]

'ECONOMY_ELEC_TRANSMISSION'

In [45]:
# ECONOMY_ELEC_TRANSMISSION
df0 = readData(lf[0])
df0 = epsToZeros(df0)
df0 = filterYearAndCountry(df0,yyy,ccc)
df0 = makeValFloat(df0)
df0.head(2)

Unnamed: 0,Dim1,Dim2,Dim3,Dim4,Dim5,Dim6,Dim7,Dim8,Val
14,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,COSTS,TRANSMISSION_OPERATIONAL_COSTS,Mmoney,0.001645
15,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,COSTS,TRANSMISSION_TRADE_COSTS,Mmoney,34.881906


In [46]:
# group data frame
df0 = df0.groupby(['Dim7','Dim2'])['Val'].sum().unstack().T
df0 = df0.fillna(0)
df0.columns = pd.MultiIndex.from_product([[lf[0]], df0.columns])
df0.head(2)

Unnamed: 0_level_0,ECONOMY_ELEC_TRANSMISSION,ECONOMY_ELEC_TRANSMISSION,ECONOMY_ELEC_TRANSMISSION,ECONOMY_ELEC_TRANSMISSION
Dim7,TRANSMISSION_CAPITAL_COSTS,TRANSMISSION_OPERATIONAL_COSTS,TRANSMISSION_TRADE_COSTS,TRANSMISSION_TRADE_INCOME
Dim2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Scenario1,417.131961,0.020614,223.822992,10198.187747
Scenario10,295.729468,0.00937,392.203384,5148.391004


### ECONOMY_GENERATION

In [49]:
lf[1]

'ECONOMY_GENERATION'

In [47]:
# ECONOMY_GENERATION
df1 = readData(lf[1])
df1 = epsToZeros(df1)
df1 = filterYearAndCountry(df1,yyy,ccc)
df1 = makeValFloat(df1)
df1.head(2)

Unnamed: 0,Dim1,Dim2,Dim3,Dim4,Dim5,Dim6,Dim7,Dim8,Dim9,Dim10,Dim11,Dim12,Val
1412,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,DK1_Large,GNR_BO_BIOIL_E-85,BIOOIL,BOILERS,COSTS,GENERATION_FIXED_COSTS,Mmoney,0.220143
1413,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,DK1_Large,GNR_BO_BIOIL_E-85,BIOOIL,BOILERS,COSTS,GENERATION_OPERATIONAL_COSTS,Mmoney,0.403046


In [50]:
# group data frame
df1 = df1.groupby(['Dim11','Dim2'])['Val'].sum().unstack().T
df1 = df1.fillna(0)
df1.columns = pd.MultiIndex.from_product([[lf[1]], df1.columns])
df1.head(2)

Unnamed: 0_level_0,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION,ECONOMY_GENERATION
Dim11,ELECTRICITY_SALE,ENERGY_SPECIFIC_REVENUE,GENERATION_CAPITAL_COSTS,GENERATION_CO2_TAX,GENERATION_FIXED_COSTS,GENERATION_FUEL_COSTS,GENERATION_GRID_TARIFFS,GENERATION_OPERATIONAL_COSTS,GENERATION_OTHER_EMI_TAX,GENERATION_TAXES,GENERATION_UC_COSTS,HEAT_SALE,TOTAL_REVENUE
Dim2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
Scenario1,12210.05927,31621.444182,6940.383171,86.642451,2664.404139,2620.16481,56.057248,749.95219,0.020144,87.905015,86.366213,5405.295318,4323.459207
Scenario10,4098.555644,29221.484573,3209.519551,219.15299,1049.003067,936.82982,245.66096,538.911484,0.012272,1971.637897,103.645321,7820.944328,3645.126609


### ECONOMY_HEAT_TRANSMISSION

In [51]:
lf[2]

'ECONOMY_HEAT_TRANSMISSION'

In [52]:
# ECONOMY_HEAT_TRANSMISSION
df2 = readData(lf[2])
df2 = eps2zeros(df2)
df2 = filterYearAndCountry(df2,yyy,ccc)
df2 = makeValFloat(df2)
df2.head(2)

Unnamed: 0,Dim1,Dim2,Dim3,Dim4,Dim5,Dim6,Dim7,Dim8,Dim9,Val
33,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,DK1_Large,COSTS,TRANSMISSION_TRADE_COSTS,Mmoney,24.516919
34,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,DK1_Large,COSTS,HEAT_TRANSMISSION_OPERATIONAL_COSTS,Mmoney,1e-06


In [53]:
# group data frame
df2 = df2.groupby(['Dim8','Dim2'])['Val'].sum().unstack().T
df2 = df2.fillna(0)
df2.columns = pd.MultiIndex.from_product([[lf[2]], df2.columns])
df2.head(2)

Unnamed: 0_level_0,ECONOMY_HEAT_TRANSMISSION,ECONOMY_HEAT_TRANSMISSION,ECONOMY_HEAT_TRANSMISSION,ECONOMY_HEAT_TRANSMISSION
Dim8,HEAT_TRANSMISSION_CAPITAL_COSTS,HEAT_TRANSMISSION_OPERATIONAL_COSTS,TRANSMISSION_TRADE_COSTS,TRANSMISSION_TRADE_INCOME
Dim2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Scenario1,0.0,0.000225,630.736822,630.736822
Scenario10,0.0,0.000518,461.310127,461.310127


## ELEC_DEMAND

In [39]:
lf[3]

'ELEC_DEMAND'

In [62]:
# ELEC_DEMAND
df3 = readData(lf[3])
df3 = epsToZeros(df3)
df3 = filterYearAndCountry(df3,yyy,ccc)
df3 = makeValFloat(df3)
df3.head(2)

Unnamed: 0,Dim1,Dim2,Dim3,Dim4,Dim5,Dim6,Dim7,Val
15,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,EXOGENOUS,TWh,19.791304
16,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,ENDO_EV,TWh,0.589469


In [63]:
# group data frame
df3 = df3.groupby(['Dim6','Dim2'])['Val'].sum().unstack().T
df3 = df3.fillna(0)
df3.columns = pd.MultiIndex.from_product([[lf[3]], df3.columns])
df3.head(2)

Unnamed: 0_level_0,ELEC_DEMAND,ELEC_DEMAND,ELEC_DEMAND,ELEC_DEMAND,ELEC_DEMAND
Dim6,ENDO_ELBOILER,ENDO_EV,ENDO_HEATPUMP,ENDO_INTRASTO,EXOGENOUS
Dim2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Scenario1,0.017731,7.389184,2.440897,0.549655,100.122479
Scenario10,0.241952,7.389184,19.760165,0.0,100.122479


## ELEC_PRICE

In [64]:
lf[4]

'ELEC_PRICE'

In [68]:
# ELEC_DEMAND
df4 = readData(lf[4])
df4 = epsToZeros(df4)
df4 = filterYearAndCountry(df4,yyy,ccc)
df4 = makeValFloat(df4)
df4.head(2)

Unnamed: 0,Dim1,Dim2,Dim3,Dim4,Dim5,Dim6,Dim7,Val
5,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,AVERAGE,Money_per_MWh,54.85458
6,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK2,AVERAGE,Money_per_MWh,54.078051


In [69]:
# group data frame
df4 = df4.groupby(['Dim7','Dim2'])['Val'].mean().unstack().T
df4 = df4.fillna(0)
df4.columns = pd.MultiIndex.from_product([[lf[4]], df4.columns])
df4.head(2)

Unnamed: 0_level_0,ELEC_PRICE
Dim7,Money_per_MWh
Dim2,Unnamed: 1_level_2
Scenario1,54.064789
Scenario10,58.679853


## ELEC_PRICE_HOURLY

In [70]:
lf[5]

'ELEC_PRICE_HOURLY'

In [76]:
# ELEC_DEMAND
df5 = readData(lf[5])
df5 = epsToZeros(df5)
df5 = filterYearAndCountry(df5,yyy,ccc)
df5 = makeValFloat(df5)
df5.head(2)

Unnamed: 0,Dim1,Dim2,Dim3,Dim4,Dim5,Dim6,Dim7,Dim8,Val
625,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,S01,T073,Money_per_MWh,36.322361
626,ScenarioResults_181_270,Scenario181,2025,DENMARK,DK1,S01,T076,Money_per_MWh,29.86931


In [77]:
# group data frame
df5 = df5.groupby(['Dim6','Dim2'])['Val'].mean().unstack().T
df5 = df5.fillna(0)
df5.columns = pd.MultiIndex.from_product([[lf[5]], df5.columns])
df5.head(2)

Unnamed: 0_level_0,ELEC_PRICE_HOURLY,ELEC_PRICE_HOURLY,ELEC_PRICE_HOURLY,ELEC_PRICE_HOURLY,ELEC_PRICE_HOURLY
Dim6,S01,S11,S22,S32,S43
Dim2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Scenario1,31.226674,74.843637,46.551809,46.288509,71.413314
Scenario10,34.390692,78.171579,51.718559,54.24849,74.869942
