In [None]:
import os 
from pathlib import Path
from textwrap import wrap
import pandas as pd
import altair as alt


project_dir = Path(os.path.abspath('')).resolve().parent
data_dir = project_dir.joinpath("data")
plots_dir = data_dir.joinpath("output/deliverable_plots")

cleaned_csv = data_dir.joinpath('output/clean_anon_survey.csv')
survey_df = pd.read_csv(cleaned_csv, sep=';', na_values=['N/A', ''])
print(set(survey_df['WLplants']))

questions_csv = data_dir.joinpath('EN_questionlist.csv')
questions_df = pd.read_csv(questions_csv, sep=';')

if not os.path.exists(plots_dir):
    os.mkdir(plots_dir)

pd.set_option('display.max_rows', None)

def get_percent_df(source_df: pd.DataFrame, code):

    percent_df = source_df.groupby([code]).size() / source_df.shape[0]
    percent_df = percent_df.reset_index(name='percent')
    percent_df['labels'] = percent_df['percent'].apply(lambda x : str(round(x * 100,1)) + '%')

    return percent_df

{'Of most', 'From some', nan, 'Of none', 'Of all'}


# 0 Operation

First, The consistency of different questions is investigated

There are 11 inconsistent responses regarding the operation of CSOs
There are 10 inconsistent responses regarding the measurement in CSOs

In [21]:
# code = 'OperatesCSO'
columns = ['OperatesCSO1', 'OperatesCSO2', 'NCSOs','MeasuresCSO', 'MTavail', 'NinstrumCSO']
title = 'Number of CSOs >= 1'
title = wrap(title, width=70)
answers_sort = ['Yes', 'No']


# Do they operate CSOs 1 (from number of csos)
survey_df['NCSOs'] = pd.to_numeric(survey_df['NCSOs'].replace({'No': '0'}), errors='coerce') 
survey_df.loc[survey_df['NCSOs'] > 0, 'OperatesCSO1'] = 'Yes'
survey_df.loc[survey_df['NCSOs'] == 0, 'OperatesCSO1'] = 'No'

# Do they operate CSOs 2 (direct question)
survey_df.loc[:,'OperatesCSO2'] = 'No'
survey_df.loc[survey_df['facilities[SQ002]'] == 'Yes', 'OperatesCSO2'] = 'Yes'
survey_df.loc[survey_df['facilities[SQ003]'] == 'Yes', 'OperatesCSO2'] = 'Yes'

# Are there measurements in the CSOs
survey_df['NinstrumCSO'] = pd.to_numeric(survey_df['NinstrumCSO'].replace({'No': '0'}), errors='coerce') 
survey_df.loc[survey_df['NinstrumCSO'] > 0, 'MeasuresCSO'] = 'Yes'
survey_df.loc[survey_df['NinstrumCSO'] == 0, 'MeasuresCSO'] = 'No'


source_df = survey_df
# source_df = source_df[~source_df[columns].isna()]
nlabel = f"N = {source_df.shape[0]}"

csos = source_df[columns]
csos[(csos['MeasuresCSO'] != csos['MTavail']) & csos['MeasuresCSO'].notna() & csos['MTavail'].notna() ] # 10 inconsistencies
csos[(csos['OperatesCSO1'] != csos['OperatesCSO2']) & csos['OperatesCSO1'].notna() & csos['OperatesCSO2'].notna() ] # 11 Inconsistencies
csos[((csos['OperatesCSO1'] != csos['OperatesCSO2']) & csos['OperatesCSO1'].notna() & csos['OperatesCSO2'].notna()) & ((csos['MeasuresCSO'] != csos['MTavail']) & csos['MeasuresCSO'].notna() & csos['MTavail'].notna() )] #0 common Inconsistencies
csos[(csos['OperatesCSO1'] == 'No') & (csos['MeasuresCSO'] == 'Yes')] # 0 Inconsistency
csos[(csos['OperatesCSO1'].isna()) & (csos['MeasuresCSO'] == 'Yes')] # 0 Inconsistency

# Take best knowledge
# NinstrumCSO --> MeasuresCSO and NCSOs --> OperatesCSO1 preferred 
csos['OperatesCSO'] = csos['OperatesCSO1']

#Fill other values with Best knowledge OperatesCSO2
csos.loc[csos['OperatesCSO'].isna(), 'OperatesCSO'] = csos.loc[csos['OperatesCSO'].isna(), 'OperatesCSO2']

csos['MeasuresCSO_best'] = csos['MeasuresCSO']

# No measurement is possible if no CSO operated
csos.loc[(csos['OperatesCSO'] == 'No') & (csos['MeasuresCSO_best'].isna()), 'MeasuresCSO_best'] = 'No'

# The left nans are filled with MTavail
csos.loc[csos['MeasuresCSO_best'].isna(), 'MeasuresCSO_best'] = csos.loc[csos['MeasuresCSO_best'].isna(), 'MTavail']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  csos['OperatesCSO'] = csos['OperatesCSO1']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  csos['MeasuresCSO_best'] = csos['MeasuresCSO']


In [22]:

percent_df_operates1 = get_percent_df(csos[~csos['OperatesCSO1'].isna()], 'OperatesCSO1')
percent_df_operates2 = get_percent_df(csos[~csos['OperatesCSO2'].isna()], 'OperatesCSO2')
percent_df_operates3 = get_percent_df(csos[~csos['OperatesCSO'].isna()], 'OperatesCSO')
percent_df_measures1 = get_percent_df(csos[(~csos['MeasuresCSO'].isna()) & (csos['OperatesCSO'] == 'Yes')], 'MeasuresCSO')
percent_df_measures2 = get_percent_df(csos[(~csos['MTavail'].isna()) & (csos['OperatesCSO'] == 'Yes')], 'MTavail')
percent_df_measures3 = get_percent_df(csos[(~csos['MeasuresCSO_best'].isna()) & (csos['OperatesCSO'] == 'Yes')], 'MeasuresCSO_best')

print(percent_df_operates1)
print(percent_df_operates2)
print(percent_df_operates3)
print(percent_df_measures1)
print(percent_df_measures2)
print(percent_df_measures3)



  OperatesCSO1   percent labels
0           No  0.072993   7.3%
1          Yes  0.927007  92.7%
  OperatesCSO2   percent labels
0           No  0.156463  15.6%
1          Yes  0.843537  84.4%
  OperatesCSO   percent labels
0          No  0.095238   9.5%
1         Yes  0.904762  90.5%
  MeasuresCSO  percent labels
0          No  0.18254  18.3%
1         Yes  0.81746  81.7%
  MTavail  percent labels
0      No     0.26  26.0%
1     Yes     0.74  74.0%
  MeasuresCSO_best  percent labels
0               No  0.19084  19.1%
1              Yes  0.80916  80.9%


In [23]:
code = 'OperatesCSO'
title = 'Survey respondents who operate CSOs'
title = wrap(title, width=70)
answers_sort = ['Yes', 'No']

source_df = csos[~csos['OperatesCSO'].isna()]
source_df = source_df[~source_df[code].isna()]
nlabel = f"N = {source_df.shape[0]}"

percent_df = get_percent_df(source_df, code)

title = title +  [nlabel]

coden = code+':N'

bars1 = alt.Chart(percent_df, title=title).mark_bar(color='#1f78b4').encode(
    x=alt.X(coden, scale=alt.Scale()).axis(labelLimit=500, title='', labelAngle=0).sort(answers_sort),
    y=alt.Y("percent:Q",scale=alt.Scale(domain=[0, 1])).axis(format='%', title=''),
    color=alt.Color(legend=None)
).properties(
    width=100,
    height=150 
)

text1 = bars1.mark_text(align='left', dx=-15, dy=-10).encode(
    x=alt.X(coden).sort(answers_sort),
    color=alt.Color(legend=None),
    text=alt.Text('labels'))

chart1 = (bars1 + text1).resolve_scale(color='independent')

In [24]:
def get_facilities(row):
    if row['facilities[SQ002]'] == 'Yes' or row['facilities[SQ003]'] == 'Yes':
        if row['facilities[SQ004]'] == 'Yes' or row['facilities[SQ005]'] == 'Yes':
            return 'WWTP, sewerage system, CSOs'
        else:
            return 'WWTP and CSOs'
    else:
        if row['facilities[SQ004]'] == 'Yes' or row['facilities[SQ005]'] == 'Yes':
            return 'WWTP, sewerage system'
        else:
            return 'WWTP only'

survey_df['facilities'] = survey_df.apply(get_facilities, axis=1)

code = 'facilities'
title = 'What facilities does your utility or wastewater association operate?'
title = wrap(title, width=100)
answers_sort = ['WWTP only', 'WWTP & CSOs', 'WWTP & sewerage system', 'WWTP, sewerage system & CSOs']

source_df = survey_df
source_df = source_df[~source_df[code].isna()]
nlabel = f"N = {source_df.shape[0]}"

percent_df = get_percent_df(source_df, code)

title = title +  [nlabel]

coden = code+':N'

bars2 = alt.Chart(percent_df, title=title).mark_bar(color='#1f78b4').encode(
    x=alt.X("percent:Q").axis(format='%', title=''),
    y=alt.Y(coden, scale=alt.Scale()).axis(labelLimit=500, title='', labelAngle=0).sort(answers_sort),
    color=alt.Color(legend=None)
).properties(
    height=150  # Set the width of the chart (you can adjust this value)
)

text2 = bars2.mark_text(align='left', dx=2).encode(
    x=alt.X("percent:Q"),
    color=alt.Color(legend=None),
    text=alt.Text('labels'))

chart2 = (bars2 + text2).resolve_scale(color='independent')

In [25]:
(chart1 | chart2).save(plots_dir.joinpath('0_Operation.svg'))
chart1 | chart2

# 1 Data available in CSOs

MTavail	
5	
Added value through the use of measurement technology
Is measurement technology available in your combined sewer overflows?
Yes', 'No

MvarCSO[SQ001-5/other]	
Monitoring technology for CSOs	21	multiselect Measured variables and derived quantities in Combined sewer overflows (CSO) that the wastewater association records


In [26]:
code = 'MeasuresCSO_best'
title = 'Share of CSO operators with measurement technology'
title = wrap(title, width=70)
answers_sort = ['Yes', 'No']

source_df = csos[(~csos['MeasuresCSO_best'].isna()) & (csos['OperatesCSO'] == 'Yes')]
source_df = source_df[~source_df[code].isna()]
nlabel = f"N = {source_df.shape[0]}"

percent_df = get_percent_df(source_df, code)

title = title +  [nlabel]

coden = code+':N'

bars1 = alt.Chart(percent_df, title=title).mark_bar(color='#1f78b4').encode(
    x=alt.X(coden, scale=alt.Scale()).axis(labelLimit=500, title='', labelAngle=0).sort(answers_sort),
    y=alt.Y("percent:Q",scale=alt.Scale(domain=[0, 1])).axis(format='%', title=''),
    color=alt.Color(legend=None)
).properties(
    width=100,
    height=150 
)

text1 = bars1.mark_text(align='left', dx=-15, dy=-10).encode(
    x=alt.X(coden).sort(answers_sort),
    color=alt.Color(legend=None),
    text=alt.Text('labels'))

chart1 = (bars1 + text1).resolve_scale(color='independent')

In [27]:
question_df = questions_df[questions_df['Question_ID'] == 21]
question = question_df.iloc[0].to_dict()
question_cols = question_df['Code'].to_list()
question = "Measured variables and derived quantities in CSOs"
title = wrap(question, width=70)
answers_sort = question_df['Answers'].to_list()

source_df = survey_df[question_cols]
source_df = source_df[source_df.any(axis=1)]
source_df = source_df.replace('No', pd.NA)

percent_df = source_df.count()
percent_df = percent_df.reset_index(name='count')
percent_df = percent_df.rename(columns={'index': 'Code'})
percent_df['percent'] = percent_df['count'] / source_df.shape[0]
percent_df['labels'] = percent_df['percent'].apply(lambda x : str(round(100*x,1)) + '%')
percent_df = percent_df.merge(question_df[['Code', 'Answers']], how='left', on='Code')

nlabel = f"N = {source_df.shape[0]}"

title = title + [nlabel]

bars2 = alt.Chart(percent_df, title=title).mark_bar(color='#1f78b4').encode(
    x=alt.X("percent:Q", scale=alt.Scale(domain=[0, 1])).axis(format='%', title=''),
    y=alt.Y('Answers').axis(labelLimit=500, title='').sort(answers_sort),
    color=alt.Color(legend=None)
).properties(
    height=150 
)

text2 = bars2.mark_text(align='left', dx=2).encode(
    x=alt.X('percent:Q'),
    color=alt.Color(legend=None),
    text=alt.Text('labels'))

chart2 = (bars2 + text2).resolve_scale(color='independent')

In [28]:
(chart1 | chart2).save(plots_dir.joinpath('1_CSOMonitoring.svg'))
chart1 | chart2

# 2 Water level Data transmission

WLplants	4	CSO data transmission and management	25	singleselect	From how many installations are the water level/spill data transmitted directly to the process control system (DCS) of the WWTP?

WLmeas[SQ001]	4	CSO data transmission and management	24	multiselect	How is the water level/spill data transmitted by the measuring devices? [About fiber optics]

In [29]:
code = 'WLplants'
title = 'From how many installations are the data transmitted directly to the DCS of the WWTP?'
title = wrap(title, width=60)
answers_sort = ['Of none','From some','Of most','Of all']

source_df = survey_df
print(set(source_df['WLplants']))
source_df = source_df[~source_df[code].isna()]
nlabel = f"N = {source_df.shape[0]}"

percent_df = get_percent_df(source_df, code)

title = title +  [nlabel]

coden = code+':N'

bars1 = alt.Chart(percent_df, title=title).mark_bar(color='#1f78b4').encode(
    x=alt.X("percent:Q").axis(format='%', title=''),
    y=alt.Y(coden, scale=alt.Scale()).axis(labelLimit=500, title='', labelAngle=0).sort(answers_sort),
    color=alt.Color(legend=None)
).properties(
    height=150  # Set the width of the chart (you can adjust this value)
)

text1 = bars1.mark_text(align='left', dx=2).encode(
    x=alt.X("percent:Q"),
    color=alt.Color(legend=None),
    text=alt.Text('labels'))

chart1 = (bars1 + text1).resolve_scale(color='independent')

{'Of most', 'From some', nan, 'Of none', 'Of all'}


In [30]:
question_df = questions_df[questions_df['Question_ID'] == 24]
question = question_df.iloc[0].to_dict()
question_cols = question_df['Code'].to_list()
question = "How is the water level/spill data transmitted by the measuring devices?"
title = wrap(question, width=70)
answers_sort = question_df['Answers'].to_list()

source_df = survey_df[question_cols]
source_df = source_df[source_df.any(axis=1)]
source_df = source_df.replace('No', pd.NA)


percent_df = source_df.count()
percent_df = percent_df.reset_index(name='count')
percent_df = percent_df.rename(columns={'index': 'Code'})
print(percent_df)
percent_df['percent'] = percent_df['count'] / source_df.shape[0]
percent_df['labels'] = percent_df['percent'].apply(lambda x : str(round(x * 100,1)) + '%')
percent_df = percent_df.merge(question_df[['Code', 'Answers']], how='left', on='Code')

nlabel = f"N = {source_df.shape[0]}"

title = title + [nlabel]

bars2 = alt.Chart(percent_df, title=title).mark_bar(color='#1f78b4').encode(
    x=alt.X("percent:Q").axis(format='%', title=''),
    y=alt.Y('Answers').axis(labelLimit=500, title='').sort(answers_sort),
    color=alt.Color(legend=None)
).properties(
    height=150
)

text2 = bars2.mark_text(align='left', dx=2).encode(
    x=alt.X('percent:Q'),
    color=alt.Color(legend=None),
    text=alt.Text('labels'))

chart2 = (bars2 + text2).resolve_scale(color='independent')

            Code  count
0  WLmeas[SQ001]     50
1  WLmeas[SQ002]     50
2  WLmeas[SQ003]     92
3  WLmeas[SQ004]      8
4  WLmeas[SQ005]     21
5  WLmeas[other]     14


In [31]:
(chart1 | chart2).save(plots_dir.joinpath('2_Transmission.svg'))
chart1 | chart2

# 3 What is the data used for?

WLassess	4	CSO data transmission and management	26	singleselect	How often are the water level/spill data evaluated for the assessment of performance?	How often are the water level/spill data evaluated for the assessment of performance?	In near real-time', 'Daily', 'Once a month', 'Several times a year', 'Annually', 'The evaluation is carried out irregularly, e.g. during the planning or revision of the integrated drainage plan', 'Never

ORGshare[SQ001]	4	CSO data transmission and management	30	multiselect	Our organization shares the water level data with third parties …   [No, we do not]


In [32]:
code = 'WLassess'
title = 'How often are the data evaluated for performance assessment?'
title = wrap(title, width=100)
answers_sort = ['In near real-time', 'Daily', 'Once a month', 'Several times a year', 'Annually', 'Irregularily', 'Never']

source_df = survey_df
source_df[code] = source_df[code].replace('The evaluation is carried out irregularly, e.g. during the planning or revision of the integrated drainage plan', 'Irregularily')
print(set(source_df['WLplants']))
source_df = source_df[~source_df[code].isna()]
nlabel = f"N = {source_df.shape[0]}"

percent_df = get_percent_df(source_df, code)

title = title +  [nlabel]

coden = code+':N'

bars1 = alt.Chart(percent_df, title=title).mark_bar(color='#1f78b4').encode(
    x=alt.X("percent:Q").axis(format='%', title=''),
    y=alt.Y(coden, scale=alt.Scale()).axis(labelLimit=500, title='', labelAngle=0).sort(answers_sort),
    color=alt.Color(legend=None)
).properties(
    height=150  # Set the width of the chart (you can adjust this value)
)

text1 = bars1.mark_text(align='left', dx=2).encode(
    x=alt.X("percent:Q"),
    color=alt.Color(legend=None),
    text=alt.Text('labels'))

chart1 = (bars1 + text1).resolve_scale(color='independent')

{'Of most', 'From some', nan, 'Of none', 'Of all'}


In [33]:
question_df = questions_df[questions_df['Question_ID'] == 30]
question = question_df.iloc[0].to_dict()
question_cols = question_df['Code'].to_list()
question = "Our organization shares the water level data with third parties … "
title = wrap(question, width=70)
answers_sort = question_df['Answers'].to_list()

source_df = survey_df[question_cols]
source_df = source_df[source_df.any(axis=1)]
source_df = source_df.replace('No', pd.NA)


percent_df = source_df.count()
percent_df = percent_df.reset_index(name='count')
percent_df = percent_df.rename(columns={'index': 'Code'})
print(percent_df)
percent_df['percent'] = percent_df['count'] / source_df.shape[0] 
percent_df['labels'] = percent_df['percent'].apply(lambda x : str(round(x * 100 ,1)) + '%')
percent_df = percent_df.merge(question_df[['Code', 'Answers']], how='left', on='Code')

nlabel = f"N = {source_df.shape[0]}"

title = title + [nlabel]

bars2 = alt.Chart(percent_df, title=title).mark_bar(color='#1f78b4').encode(
    x=alt.X("percent:Q").axis(format='%', title=''),
    y=alt.Y('Answers').axis(labelLimit=500, title='').sort(answers_sort),
    color=alt.Color(legend=None)
).properties(
    height=150 
)

text2 = bars2.mark_text(align='left', dx=2).encode(
    x=alt.X('percent:Q'),
    color=alt.Color(legend=None),
    text=alt.Text('labels'))

chart2 = (bars2 + text2).resolve_scale(color='independent')

              Code  count
0  ORGshare[SQ001]     63
1  ORGshare[SQ002]     46
2  ORGshare[SQ003]     30
3  ORGshare[SQ004]     36
4  ORGshare[other]      8


In [34]:
(chart1 | chart2).save(plots_dir.joinpath('3_Datause.svg'))
chart1 | chart2

# 4 Motivation for measurement technology

Decided not to delete answers with more than 3 reasons because it would eliminate 17 responses

In [35]:
question_df = questions_df[questions_df['Question_ID'] == 30]
question = 'Motivations for use of measurement technology in plants'
question_cols1 = [ 'MEASplants[SQ001]', 'MEASplants[SQ002]', 'MEASplants[SQ003]', 'MEASplants[SQ004]', 'MEASplants[SQ005]', 'MEASplants[SQ006]','MEASplants[SQ007]', 'MEASplants[SQ008]', 'MEASplants[SQ009]', 'MEASplants[other]']
question_cols2 =['METex[SQ001]', 'METex[SQ002]', 'METex[SQ003]', 'METex[SQ004]', 'METex[SQ005]', 'METex[SQ006]', 'METex[SQ007]', 'METex[SQ008]', 'METex[SQ009]', 'METex[other]']
source_df = survey_df[[*question_cols1, *question_cols2]]

# Merge corresponding columns
for col1, col2 in zip(question_cols1, question_cols2):
    source_df[col1] = source_df[col1].combine_first(source_df[col2])

order = [ 'MEASplants[SQ001]', 'MEASplants[SQ002]', 'MEASplants[SQ004]', 'MEASplants[SQ005]', 'MEASplants[SQ006]', 'MEASplants[SQ003]','MEASplants[SQ007]', 'MEASplants[SQ008]', 'MEASplants[SQ009]', 'MEASplants[other]']

source_df = source_df[order]

source_df = source_df[source_df.any(axis=1)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  source_df[col1] = source_df[col1].combine_first(source_df[col2])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  source_df[col1] = source_df[col1].combine_first(source_df[col2])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  source_df[col1] = source_df[col1].combine_first(source_df[col2])
A value is

In [36]:
title = wrap(question, width=100)

question = question_df.iloc[0].to_dict()


source_df = source_df.replace('No', pd.NA)

print(source_df.shape[0])

percent_df = source_df.count()
percent_df = percent_df.reset_index(name='count')
percent_df = percent_df.rename(columns={'index': 'Code'})

percent_df['percent'] = percent_df['count'] / source_df.shape[0]
percent_df['labels'] = percent_df['percent'].apply(lambda x : str(round(x * 100,1)) + '%')
percent_df = percent_df.merge(questions_df[['Code', 'Answers']], how='left', on='Code')

print(percent_df)

# Add a Group column to assign colors
percent_df['Incentive'] = percent_df['Code'].apply(lambda x: (
    "Operation" if x in ["MEASplants[SQ001]", "MEASplants[SQ002]", "MEASplants[SQ004]", "MEASplants[SQ005]", "MEASplants[SQ006]"]
    else "Planning" if x in [ 'MEASplants[SQ003]',"MEASplants[SQ007]", "MEASplants[SQ008]"]
    else "Regulation" if x == "MEASplants[SQ009]"
    else "Other"
))

# Define colors for each group
color_scale = alt.Scale(
    domain=['Operation', 'Planning', 'Regulation', "Other"],
    range=["#1f78b4", "#b2df8a", "#33a02c", "#a6cee3"]  
)
#a6cee3
#1f78b4
#b2df8a
#33a02c

nlabel = f"N = {source_df.shape[0]}"

title = title + [nlabel]




bars=alt.Chart(percent_df,title=title).mark_bar().encode(
    y=alt.Y('Answers:N', sort=order).axis(labelLimit=500, title=''),
    x=alt.X('percent:Q', scale=alt.Scale(domain=[0, 1])).axis(format='%', title=''),
    color=alt.Color("Incentive:N", scale=color_scale)
)

text = bars.mark_text(align='left', dx=2).encode(
    x=alt.X('percent:Q'),
    text=alt.Text('labels'),
    color = alt.Color(legend=None))

chart = (bars + text).resolve_scale(color='independent').configure_legend(orient="bottom-right").properties(
    title=alt.TitleParams(
        text=title,
        anchor="start"  # Align the title to the left
    )
)

chart.save(plots_dir.joinpath('4_Motivation.svg'))
chart

106
                Code  count   percent labels  \
0  MEASplants[SQ001]     66  0.622642  62.3%   
1  MEASplants[SQ002]     23  0.216981  21.7%   
2  MEASplants[SQ004]     75  0.707547  70.8%   
3  MEASplants[SQ005]     47  0.443396  44.3%   
4  MEASplants[SQ006]     48  0.452830  45.3%   
5  MEASplants[SQ003]      9  0.084906   8.5%   
6  MEASplants[SQ007]     30  0.283019  28.3%   
7  MEASplants[SQ008]      7  0.066038   6.6%   
8  MEASplants[SQ009]     34  0.320755  32.1%   
9  MEASplants[other]      3  0.028302   2.8%   

                                             Answers  
0  Making the functioning of the sewage system vi...  
1     Costs and time savings for operating personnel  
2          Continuous monitoring to detect incidents  
3     Dynamic discharge control of wastewater plants  
4                Integrated sewer network management  
5     Preservation of the value of rainwater systems  
6            Basis of assessment for future planning  
7                      Prev

# 6 Governance

TSguide
REGapply
REGenf

Is the legal basis available for performance monitoring of CSOs (law?) 
Is a legally binding implementing ordinance/ regulation available?
Are there technical standards and guidelines available?
Are the existing regulations applied by the operator?
Are the existing regulations are enforced by all actors?



In [37]:
# Define the list of codes and their respective titles
codes_and_titles = {
    "CSOlaw": "Is the legal basis available for performance monitoring of CSOs (law?) ",
    "impREG": "Is a legally binding implementing ordinance/ regulation available?",
    "TSguide": "Are there technical standards and guidelines available?",
    "REGapply": "Are the existing regulations applied by the operator?",
    "REGenf": "Are the existing regulations enforced by all actors?"
}

# Define the answer order for sorting
answers_sort = ["Yes", "No"]

# Iterate through each code and generate a chart
charts = []

for code, title_text in codes_and_titles.items():
    # Wrapping the title for readability
    title_wrapped = wrap(title_text, width=40)

    # Filter the dataframe for non-null values in the current code column
    source_df = survey_df[~survey_df[code].isna()]

    # Calculate the number of responses for the title
    nlabel = f"N = {source_df.shape[0]}"

    # Generate percent_df using your custom function (ensure get_percent_df is defined)
    percent_df = get_percent_df(source_df, code)

    # Append N label to the title
    full_title = title_wrapped + [nlabel]

    # Define the x-axis encoding for the current code
    coden = code + ":N"

    # Create the bar chart
    bars = alt.Chart(percent_df, title=full_title).mark_bar(color='#1f78b4').encode(
        x=alt.X(coden, scale=alt.Scale()).axis(labelLimit=500, title='', labelAngle=0).sort(answers_sort),
        y=alt.Y("percent:Q", scale=alt.Scale(domain=[0, 1])).axis(format='%', title=''),
        color=alt.Color(legend=None)
    ).properties(
        width=100,
        height=150  # Adjust chart height if needed
    )

    # Add percentage labels to the bars
    text = bars.mark_text(align='left', dx=-15, dy=-10).encode(
        x=alt.X(coden).sort(answers_sort),
        color=alt.Color(legend=None),
        text=alt.Text('labels')
    )

    # Combine bars and text into a single chart
    chart = (bars + text).resolve_scale(color='independent')

    # Append the chart to the list
    charts.append(chart)

# Concatenate all charts horizontally
final_chart = alt.vconcat(alt.hconcat(*charts[0:3]), alt.hconcat(*charts[3:]))

final_chart.save(plots_dir.joinpath('6_Governance.svg'))
final_chart

# 7 Benefit of monitoring / public participation

A possible vision of the future:  "Stormwater systems and sewer networks are equipped with a large number of measuring devices. The measurement signals are transmitted live to a control system via a nationwide wireless network, automatically checked, evaluated and archived. This permanent monitoring of the systems is made possible by the advancing technological development in the areas of communication, control and automation technology."  What do you personally think of the vision described?

Vision 1: "Real-Time Monitoring and Evaluation of Stormwater and Sewer Systems"


I am in favor of this vision and am generally open to the use of such modern technologies for the facilities of our wastewater association$I reject this vision because I do not think much of the use of such modern technologies for the assets of our wastewater association$I support this vision on the following condition:

Vision 2: "Publicly Accessible CSO Data for Enhanced Collaboration and Sustainability"


Another possible vision of the future:   "In the future, monitoring data from CSOs are made publicly available on a transparent and easily accessible platform that empowers communities and stakeholders with real-time information about sewer systems and their impact on the environment. By openly sharing data on sewer infrastructure, performance, and maintenance, we aim to foster collaboration, innovation, and informed decision-making, ultimately enhancing the efficiency, sustainability, and resilience of our wastewater management practices."  What do you personally think of the vision described?


I am in favor of this vision, because I generally fin that transparency improves effectiveness and aids innovation$I reject this vision because I think data from wastewater infrastructures should not be openly available$I support this vision on the following condition:


In [38]:
# Define the list of codes and their respective titles
codes_and_titles = {
    "SSvision": "Vision 1: Real-Time Monitoring and Evaluation of Stormwater and Sewer Systems",
    "CSOpublic": "Vision 2: Publicly Accessible CSO Data for Enhanced Collaboration and Sustainability"
}

# Define the answer order for sorting
answers_sort = ["Yes", "No", "Yes, if..."]

replacements = {
    'I am in favor of this vision and am generally open to the use of such modern technologies for the facilities of our wastewater association': 'Yes',
    'I reject this vision because I do not think much of the use of such modern technologies for the assets of our wastewater association': 'No',
    'I support this vision on the following condition:': 'Yes, if...',
    'I am in favor of this vision, because I generally fin that transparency improves effectiveness and aids innovation': 'Yes',
    'I reject this vision because I think data from wastewater infrastructures should not be openly available': 'No',
    'I support this vision on the following condition:': 'Yes, if...',
}

# Iterate through each code and generate a chart
charts = []

for code, title_text in codes_and_titles.items():
    # Wrapping the title for readability
    title_wrapped = wrap(title_text, width=50)

    # Filter the dataframe for non-null values in the current code column
    source_df = survey_df[~survey_df[code].isna()]
    source_df = source_df.replace(replacements, regex=False)

    # Calculate the number of responses for the title
    nlabel = f"N = {source_df.shape[0]}"

    # Generate percent_df using your custom function (ensure get_percent_df is defined)
    percent_df = get_percent_df(source_df, code)

    # Append N label to the title
    full_title = title_wrapped + [nlabel]

    # Define the x-axis encoding for the current code
    coden = code + ":N"

    # Create the bar chart
    bars = alt.Chart(percent_df, title=full_title).mark_bar(color='#1f78b4').encode(
        x=alt.X(coden, scale=alt.Scale()).axis(labelLimit=500, title='', labelAngle=0).sort(answers_sort),
        y=alt.Y("percent:Q", scale=alt.Scale(domain=[0, 1])).axis(format='%', title=''),
        color=alt.Color(legend=None)
    ).properties(
        width=100,
        height=150  # Adjust chart height if needed
    )

    # Add percentage labels to the bars
    text = bars.mark_text(align='left', dx=-15, dy=-10).encode(
        x=alt.X(coden).sort(answers_sort),
        color=alt.Color(legend=None),
        text=alt.Text('labels')
    )

    # Combine bars and text into a single chart
    chart = (bars + text).resolve_scale(color='independent')

    # Append the chart to the list
    charts.append(chart)

# Concatenate all charts horizontally
final_chart = alt.hconcat(*charts)

final_chart.save(plots_dir.joinpath('7_Vision.svg'))
final_chart