# Auswertung der Rohdaten

Ergebnisse der Umfrage zum Klimaneutralen Stromsystem / Erneuerbaren Energiesystem.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm

In [None]:
# Einstellungen zur Darstellung
# Farbpalette (kann später mit eigenen Farben angepasst werden)
# https://matplotlib.org/stable/tutorials/colors/colormap-manipulation.html
cmap = cm.get_cmap('viridis')

In [None]:
# Gruppierung der Ergebnisse
links = ['E-Mailverteiler', 'internes-Netzwerk', 'Pressemitteilung']
states = ['vollständig', 'abgebrochen']
# Für Grafiken
nrows = 1 #len(states)
ncols = 1 #len(links)

In [None]:
# Laden der Umfrageergebnisse
# Funktion zum Bereinigen der Ergebnisse
def cleanse(df):
    mask = (df['duration']>0) \
        & (df['lastpage']>0) \
        & (df['dispcode'].isin([31,32,33,34,22]))
    return df.loc[mask]

# Funktion zum Laden, Filtern und Bereinigen der Daten
def load_df(link, skipped):
    df = pd.read_csv('data/2022_06_RLI_Klimaneutrales Stromsystem_{}_Ergebnisse.csv'.format(link),
                     sep=';', header=2, usecols=list(range(3,123)))
    df = cleanse(df)
    skip_codes = [22] if skipped else [31,32,33,34]
    return df.loc[df['dispcode'].isin(skip_codes)]

# Tabelle mit Datentabellen
'''data = pd.DataFrame(data={link: [load_df(link, skip) for skip in range(len(states))]
                          for link in links},
                    index=states)'''
data = pd.concat([load_df(link, skipped) for link in links for skipped in [True, False]]).reset_index()

# Access a DataFrame with df.loc[<vollständig/abgebrochen>, <Umfragelink-typ>]

In [None]:
# Importiere Codebook in dict format
from src import rls_umfrage_auswertung
codebook = rls_umfrage_auswertung.main_preprocessing_codebook(display_codebook = False)

In [None]:
# Codes, die Bedeuten, dass keine Angabe gemacht wurde
exclude_codes = [-77, -99, -66, 0, '0', '-66', '-99', '-77']

In [None]:
def pie_plots(code, title=None, sub=1):
    key = codebook[code]['subquestion'][sub]['columns']
    codes = codebook[code]['subquestion'][sub]['multiple-choice-options']
    fig, axis = plt.subplots(nrows, ncols, figsize=(5*ncols,4*nrows))
    data[key] = data[key].astype(str).sort_values().replace(codes)
    data.loc[~data[key].isin(exclude_codes)].groupby(key).count()['dispcode'].plot.pie(ax=axis, cmap=cmap)
    if not title:
        title = codebook[code]['question'][:min(len(codebook[code]['question']), 150)] \
            + '\n' + codebook[code]['subquestion'][sub]['question']
    axis.set_title(title)

In [None]:
def print_data(key, agg, title=None, codes=None, gap=False):
    if title:
        print(str(title))
        if gap: print('')
    if codes:
        data[key] = data[key].astype(str).replace(codes)
    if agg == list:
        result = list(data.loc[~data[key].isin(exclude_codes), key])
    else:
        result = data.loc[~data[key].isin(exclude_codes), key].agg(agg)
    string = '{}: {}'.format('alle', result)
    print(string)
    if gap: print('')
    return string

## Generelle Angaben zu den Ergebnisgruppen

In [None]:
pie_plots(40)

In [None]:
print_data('v_306', np.mean, title='Durchschnittliche Jahre der Erfahrung')

# Teil 1

Beschreibung generell

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[4], number_of_most_common_words_displayed=20)

In [None]:
pie_plots(5)

In [None]:
# Korrelation von CCS iAv Synonym-Frage
data.plot.scatter(x='v_137', y='v_425', alpha=.1)

In [None]:
# Atom
data.plot.scatter(x='v_137', y='v_426', alpha=.1)

In [None]:
string = print_data('v_138', list, title=codebook[6]['question'], gap=True)

In [None]:
# Visualization of word string above
drop = rls_umfrage_auswertung.get_lemma(string, number_of_most_common_words_displayed=20)

In [None]:
pie_plots(7)

In [None]:
# Korrelation 1,5° i.A.v. Synonym
data.plot.scatter(x='v_137', y='v_139', alpha=.05)

In [None]:
string = print_data('v_434', list, title=codebook[8]['question'], gap=True)

In [None]:
drop = rls_umfrage_auswertung.get_lemma(string, number_of_most_common_words_displayed=20)

In [None]:
pie_plots(9)

In [None]:
string = print_data('v_171', list, title=codebook[10]['question'], gap=True)

In [None]:
drop = rls_umfrage_auswertung.get_lemma(string, number_of_most_common_words_displayed=20)

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[11], number_of_most_common_words_displayed=20)

In [None]:
print_data('v_148', np.mean, codebook[12]['subquestion'][1]['question'])

In [None]:
print_data('v_149', np.mean, codebook[12]['subquestion'][2]['question'])

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=13)

In [None]:
pie_plots(13)

In [None]:
pie_plots(13, sub=2)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=14)

In [None]:
pie_plots(14)

In [None]:
pie_plots(14,sub=2)

In [None]:
pie_plots(15)

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[16], number_of_most_common_words_displayed=20)

# Teil Gesellschaft

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=17)

In [None]:
pie_plots(17)

In [None]:
pie_plots(17,sub=2)

In [None]:
pie_plots(17, sub=3)

In [None]:
pie_plots(17, sub=4)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=18)

In [None]:
pie_plots(18)

In [None]:
pie_plots(18, sub=2)

In [None]:
pie_plots(18,sub=3)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=19)

In [None]:
pie_plots(19)

In [None]:
pie_plots(19, sub=2)

In [None]:
pie_plots(19, sub=3)

In [None]:
pie_plots(19, sub=4)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=20)

In [None]:
pie_plots(20, sub=1)

In [None]:
pie_plots(20, sub=2)

In [None]:
pie_plots(20, sub=3)

In [None]:
pie_plots(20, sub=4)

In [None]:
pie_plots(20, sub=5)

In [None]:
pie_plots(20, sub=6)

In [None]:
pie_plots(20, sub=7)

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[21], number_of_most_common_words_displayed=20)

# Teil Wirtschaft

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[22], number_of_most_common_words_displayed=20)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=23)

In [None]:
pie_plots(23, sub=1)

In [None]:
pie_plots(23, sub=2)

In [None]:
pie_plots(23, sub=3)

In [None]:
pie_plots(23, sub=4)

In [None]:
pie_plots(23, sub=5)

In [None]:
pie_plots(23, sub=6)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=24)

In [None]:
pie_plots(24, sub=1)

In [None]:
pie_plots(24, sub=2)

In [None]:
pie_plots(24, sub=3)

In [None]:
pie_plots(24, sub=4)

In [None]:
pie_plots(24, sub=5)

In [None]:
pie_plots(24, sub=6)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=25)

In [None]:
pie_plots(25, sub=1)

In [None]:
pie_plots(25, sub=2)

In [None]:
pie_plots(25, sub=3)

In [None]:
pie_plots(25, sub=4)

In [None]:
pie_plots(25, sub=5)

In [None]:
pie_plots(25, sub=6)

In [None]:
pie_plots(25, sub=7)

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[26], number_of_most_common_words_displayed=20)

# Teil Technik

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[27], number_of_most_common_words_displayed=20)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=28)

In [None]:
pie_plots(28, sub=1)

In [None]:
pie_plots(28, sub=2)

In [None]:
pie_plots(28, sub=3)

In [None]:
pie_plots(28, sub=4)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=29)

In [None]:
pie_plots(29, sub=1)

In [None]:
pie_plots(29, sub=2)

In [None]:
pie_plots(29, sub=3)

In [None]:
pie_plots(29, sub=4)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=30)

In [None]:
pie_plots(30, sub=1)

In [None]:
pie_plots(30, sub=2)

In [None]:
pie_plots(30, sub=3)

In [None]:
pie_plots(30, sub=4)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=31)

In [None]:
pie_plots(31, sub=1)

In [None]:
pie_plots(31, sub=2)

In [None]:
pie_plots(31, sub=3)

In [None]:
pie_plots(31, sub=4)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=32)

In [None]:
pie_plots(32, sub=1)

In [None]:
pie_plots(32, sub=2)

In [None]:
pie_plots(32, sub=3)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=33)

In [None]:
pie_plots(33, sub=1)

In [None]:
pie_plots(33, sub=2)

In [None]:
pie_plots(33, sub=3)

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[34], number_of_most_common_words_displayed=20)

# Teil Strommarktdesign

In [None]:
# Keine Priorisierung der Felder (1., 2., 3.)
rls_umfrage_auswertung.create_wordclouds(codebook, survey_data=data, survey_group='alle', question_number_list=[35], number_of_most_common_words_displayed=20)

In [None]:
pie_plots(36)

In [None]:
string = print_data('v_205', list, title=codebook[37]['question'], gap=True)

In [None]:
drop = rls_umfrage_auswertung.get_lemma(string, number_of_most_common_words_displayed=20)

In [None]:
rls_umfrage_auswertung.create_stacked_bar_chart_percent(data, codebook, question_number=38)

In [None]:
pie_plots(38, sub=1)

In [None]:
pie_plots(38, sub=2)

In [None]:
pie_plots(38, sub=3)

In [None]:
pie_plots(38, sub=4)

In [None]:
pie_plots(38, sub=5)

In [None]:
pie_plots(38, sub=6)

In [None]:
string = print_data('v_273', list, title=codebook[39]['question'], gap=True)

In [None]:
drop = rls_umfrage_auswertung.get_lemma(string, number_of_most_common_words_displayed=20)