In [52]:
from pathlib import Path
from top2vec import Top2Vec
import pickle
import numpy as np

from gensim.models import Phrases
from gensim.models.phrases import Phraser
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import strip_tags

In [53]:
DATA_DIR = Path('./data')
def load(filename):
    f = open(DATA_DIR/filename,"rb")
    return pickle.load(f)
    
def save(data, filename):
    with open(DATA_DIR/filename, 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [54]:
def add_voor_tegen_index(df):
    print(len(df))
    stem_column = [c for c in df.columns if 'Stem_' in c and c != 'Stem_persoon']
    stem_column_adj = [c[5:] for c in df.columns if 'Stem_' in c and c != 'Stem_persoon']
    stem_array = df[stem_column].values.tolist()
    assert len(stem_array[0]) == len(stem_column)
    voor = [[stem_column_adj[i] for i, stem in enumerate(motie) if stem == 1] for motie in stem_array]
    tegen = [[stem_column_adj[i] for i, stem in enumerate(motie) if stem == 0] for motie in stem_array]
    df['Partijen_Voor'] = voor
    df['Partijen_Tegen'] = tegen
    df['Index']=list(range(len(df)))
    return df

In [55]:
def get_df():
    file = open("moties_processed_df.pickle","rb")
    df = pickle.load(file)
    print('before removal empty texts',len(df))

    # remove moties without text
    mask = (df['Text']=='') | (df['Text'].isna())
    df = df.loc[~mask]
    print('after removal empty texts',len(df))

    # df = df[df['Kabinet']=='Rutte IV']
    print('after selecting latest cabinet', len(df))
    df = add_voor_tegen_index(df)
    df['BesluitTekst'] = df['BesluitTekst'].str.replace('.','')
    return df, df['ClippedText'].values

## Model Training

In [56]:
df, documents = get_df()
# got the code for making the bigram part from https://lppier.github.io/
sentence_stream = [simple_preprocess(strip_tags(doc), deacc=True) for doc in documents]
bigram = Phrases(sentence_stream, min_count=30)
bigram_phraser = Phraser(bigram)

indieners = {indiener[-1].lower() for indiener in df['Indiener_persoon'].str.split() if indiener}
years = {word for doc in sentence_stream for word in doc if word.startswith('x') and not word.startswith('xin')}
manual_stopwords = {'faber', 'lacin', 'kroger', 'iv', 'beschikt', 'die', 'vaststelling', 'lid','vi', 'viii', 'iii', 'iv', 'ii', 'i', 'kamer', 'regering tevens', 'regering', 'gehoord','beraadslaging', 'overweegt', 'overwegende', "gehoord","regering","gesproken","horen","mondkapjes","spreken","regeringen","gesprekken","commissiedebat","discussies","discussie","hoort","regeringsbeleid","besprekingen","overheden","overheid","overheids","overheidsbeleid","gesprek","zeggen","dialoog","geluid","uitspreekt","bureaucratische","spreekt","openbare","audiovisuele","gehoor","bestuursakkoord","stil","minuten","parlement","parlementen","officieel","bureaucratie","democratie","officiele","batenanalyse","parlementair","begrotings","governance","democratische","budgetrecht","democratisch","kabinetsreactie","belastingplan","wetsvoorstellen","wetsvoorstel","thema","dicht","risicoanalyse","bestuursrecht","bestuursorganen","european","nederlandsche","europees","nederlandse","europa","nederlanden","eurogroep","nederlander","nederland","holland","hogescholen","nederlands","taal","no","kinder","po","nee","verzoek","verzoeken","gevraagde","gevraagd","aangevraagd","verzoekt","europese","eurozone","claims","euro","eurocommissaris","vergunning","instemming","educatieve","eisen","voorstel","aanvrager","afgewezen","toepassing","voorstelt","aanvragen","vereist","werkloosheid","energietoeslag","ongewenste","aanvraag","behoefte","for","voorgesteld","weigert","automatisch","wachten","binnenkort","graag","gat","dji","mond","non","imago","toch","makkelijk","gestuurd","onmogelijk","nu","pas","gemakkelijk","co","eenvoudig","dende","natuurlijk","vervolg","uitzetten","zeker","opriep","budgettaire","fiscale","gefinancierde","financieel","gefinancierd","duizenden","financiele","financierings","ongekend","whw","ja","nertsen","zo","inderdaad","gevaarlijk","budget","begrotingen","budgetten","begroting","overgebleven","uitgaven","kostenbesparing","subsidies","overtreden","overtreding","onverminderd","overtredingen","weren","obstakels","afgeschaft","thans","rulings","uitsluiting","subsidieren","over","reduceren","overschrijden","onwenselijk","hinder","to","preventief","overschrijding","beperking","overtuiging","voorkomen","bestedingen","promoten","sanctie","verhinderen","nr","je","leeftijdsgrens","kleinere","wel","nibud","niet","novi"}
                    
stopwords = indieners | years | manual_stopwords


def bigram_stopword_preprocess(doc):
    sentence_stream = simple_preprocess(strip_tags(doc), deacc=True)
    sentence_stream = [word for word in sentence_stream if word not in stopwords]
    return sentence_stream
    # return bigram_phraser[sentence_stream]

before removal empty texts 39245
after removal empty texts 39244
after selecting latest cabinet 39244
39244


In [57]:
df['Kamer'].value_counts()

Kamer
Rutte III        12178
Rutte II         10761
Rutte IV          9007
Rutte I           4766
Balkenende IV     2532
Name: count, dtype: int64

In [58]:
# https://github.com/scikit-learn-contrib/hdbscan/issues/607
df = df[df['Kamer']  == 'Rutte IV']
model = Top2Vec(documents, speed='deep-learn', embedding_model='distiluse-base-multilingual-cased',workers=8, min_count=20, tokenizer=bigram_stopword_preprocess, ngram_vocab=False)
model.get_num_topics()
model.save("data/doc2vec_deep_bigram_enhanced_stopwords_rutteIV")
model.save("data/doc2vec_deep_bigram_enhanced_stopwords_all")

2023-08-05 21:17:49,240 - top2vec - INFO - Pre-processing documents for training


2023-08-05 21:17:59,557 - top2vec - INFO - Downloading distiluse-base-multilingual-cased model
2023-08-05 21:18:01,250 - top2vec - INFO - Creating joint document/word embedding
2023-08-05 21:50:53,591 - top2vec - INFO - Creating lower dimension embedding of documents
2023-08-05 21:51:17,621 - top2vec - INFO - Finding dense areas of documents
2023-08-05 21:51:20,212 - top2vec - INFO - Finding topics


## Reduce topic number to something more manageable

In [59]:
# For the manual analysis it's necessary to reduce the amount of topics, otherwise you will have 50 or so topics to investigate

df, documents = get_df()
model = Top2Vec.load("data/doc2vec_deep_bigram_enhanced_stopwords_rutteIV")

before removal empty texts 39245
after removal empty texts 39244
after selecting latest cabinet 39244
39244


In [60]:
topic_sizes, topic_nums = model.get_topic_sizes()
print(f'{len(topic_nums)} topics. In total there are {sum(topic_sizes)} documents. These are the amount of documents per topic:\n{topic_sizes}')

170 topics. In total there are 39244 documents. These are the amount of documents per topic:
[2071 1275 1247  934  933  926  701  686  684  676  640  585  564  564
  555  554  531  516  488  472  466  438  422  417  414  406  404  391
  389  385  381  375  367  350  338  334  324  315  312  310  287  287
  286  283  274  273  272  271  269  266  261  255  228  226  226  225
  223  220  219  217  217  215  212  203  203  198  192  190  180  178
  176  175  173  171  165  164  163  157  157  157  154  152  147  145
  140  138  138  137  136  134  130  128  128  128  128  127  127  125
  120  118  117  116  116  113  111  108  107  106  102   98   98   93
   93   93   92   91   91   84   84   84   83   82   80   79   78   78
   77   74   73   72   68   67   67   66   65   64   62   62   61   61
   60   57   54   53   53   52   50   50   49   48   48   47   47   46
   45   44   40   40   40   39   38   36   35   35   35   33   32   30
   30   28]


In [61]:
# helper to check what happens if you reduce topics
def get_reduced_topics(num_topics):
    print(f'performing reduction to {num_topics} topics')
    reduced_topics = model.hierarchical_topic_reduction(num_topics)
    topic_words, word_scores, topic_nums = model.get_topics(reduced=True)
    reduced_topics = tuple(tuple(sorted(t)) for t in reduced_topics)
    return reduced_topics, topic_words

def find_diff(reduced1, reduced2, reverse=False):
    # find the topics that where merged and return their index
    if not reverse:
        changed = set(reduced1) - set(reduced2)
        return [index for index, topic in enumerate(reduced1) if topic in changed]
    else:
        changed = set(reduced2) - set(reduced1)
        return [index for index, topic in enumerate(reduced2) if topic in changed]


def print_merge(large, small, num_words=50):
    print(f'\ninspecting difference from {len(large[0])} to {len(small[0])} topics')
    print('old topics')
    for i in find_diff(large[0],small[0]):
        print(large[1][i][:num_words])
    print('new topic')
    for i in find_diff(large[0],small[0], reverse=True):
        print(small[1][i][:num_words])

def find_optimal_num_topics(minn, maxx):
    start = minn
    stop = maxx
    reductions = {i: get_reduced_topics(i) for i in range(start, stop, -1)}
    for i in range(start, stop +1,-1):
        print(i)
        print_merge(reductions[i], reductions[i-1])
find_optimal_num_topics(19,14)

performing reduction to 19 topics
performing reduction to 18 topics
performing reduction to 17 topics
performing reduction to 16 topics
performing reduction to 15 topics
19

inspecting difference from 19 to 18 topics
old topics
['rechtsstaat' 'wetswijzigingen' 'legale' 'overgangsrecht' 'juridische'
 'mijnbouwwet' 'wettelijk' 'grondwettelijke' 'juridisch' 'grondwet'
 'wetboek' 'verblijfsrecht' 'omgevingsrecht' 'oorlogsrecht' 'illegaal'
 'wettelijke' 'constitutionele' 'rechtspositie' 'kiesrecht' 'wetten'
 'legaal' 'legaliseren' 'jurisprudentie' 'herstelrecht' 'advocatuur'
 'barendrecht' 'makkelijk' 'advocaat' 'trekkingsrecht' 'rechtsorde'
 'adviesrecht' 'rechtszekerheid' 'imams' 'gerechtshof' 'gerechtelijke'
 'gemeentewet' 'wetswijziging' 'hof' 'eenvoudig' 'gereguleerd'
 'gemakkelijk' 'reguleren' 'geleden' 'regelgeving' 'nu' 'onmogelijk'
 'meteen' 'imago' 'terecht' 'rechts']
['januari' 'maanden' 'maandelijkse' 'november' 'maand' 'april'
 'maandelijks' 'kalenderjaar' 'december' 'juni' 'ju

In [62]:
# Make a choice to how many topics to reduce
num_topics = 14
reduced_topics = model.hierarchical_topic_reduction(num_topics)


In [63]:
topic_words, word_scores, topic_nums = model.get_topics(reduced=True)
for t in topic_words:
    print(t)

['makkelijk' 'imams' 'straks' 'gemakkelijk' 'nu' 'brzo' 'toch' 'ongekend'
 'imago' 'meteen' 'laat' 'quick' 'geleden' 'snel' 'snelle' 'non' 'mond'
 'goed' 'zo' 'eenvoudig' 'klaar' 'natuurlijk' 'vervolg' 'openlijk' 'ja'
 'inmiddels' 'spoedig' 'comite' 'dende' 'teneinde' 'drukke' 'tenzij'
 'zodra' 'onmogelijk' 'geacht' 'teniet' 'zeker' 'btw' 'apk' 'mooie'
 'schone' 'gestuurd' 'hbo' 'nertsen' 'opriep' 'uitzetten' 'zitten' 'pas'
 'dak' 'vindt']
['milieubeheer' 'natuurbeheer' 'landbouwbeleid' 'ecologische'
 'landbouwakkoord' 'landbouwsector' 'ecosysteem' 'natuurbeleid'
 'ecologisch' 'afvalbeheerplan' 'ecosystemen' 'landbouw' 'landbouwgronden'
 'biodiversiteit' 'dierenwelzijn' 'landbouwgrond' 'natuurgebieden'
 'natuurgebied' 'milieuraad' 'biomassa' 'visserijbeleid' 'dierenleed'
 'milieueffecten' 'dieren' 'milieuwinst' 'milieuschade' 'biologisch'
 'mond' 'visserijsector' 'omgevingsrecht' 'fauna' 'klimaatbeleid'
 'gestuurd' 'boeren' 'natuurwaarden' 'omgevingswet' 'biologische'
 'natuurlijke' 'k

In [65]:
topic_words, word_scores, topic_nums = model.get_topics(reduced=True)
# for t in topic_words:
#     print(t)

# get some more stopwords to filter out
from collections import defaultdict
sums = defaultdict(int)

for t, s in zip(topic_words, word_scores):
    for word, score in zip(t,s):
        sums[word] += score + 1
    
scores = [(-score, word)  for word, score in sums.items()]
scores.sort()
'","'.join(s for w,s in scores if '_' not in s)

'imams","mond","non","imago","toch","afvalbeheerplan","makkelijk","gestuurd","onmogelijk","hbo","nu","pas","rijksbegroting","begrotingssteun","gemakkelijk","co","eenvoudig","dende","natuurlijk","vervolg","uitzetten","zeker","opriep","begrotingsbehan","energiekosten","budgettaire","migranten","vluchtelingen","vluchteling","basisonderwijs","illegalen","milieubeheer","immigranten","macrobudget","politieacademie","onderwijstijd","basisschool","ecologische","preventiebeleid","zorgkosten","illegale","illegaliteit","ecologisch","arbeidsmigratie","schooljaar","scholieren","fiscale","milieuraad","gefinancierde","klimaatbeleid","klimaatakkoord","financieel","milieueffecten","belastinggeld","biobrandstoffen","gefinancierd","duizenden","financiele","financierings","biobrandstof","luchtvervuiling","tienduizenden","veilig","kleine","mondkapje","ongekend","whw","ja","nertsen","zo","inderdaad","gevaarlijk","budget","begrotingen","budgetten","begroting","onderwijsaanbod","onderwijsgeld","amnesty","budg

In [45]:
topic_words, word_scores, topic_scores, topic_nums = model.search_topics(keywords= ['politie'] , num_topics=4, reduced=True)


In [26]:
topics = {
  0: 'Onderwijs',  
  1: 'Buitenlandse zaken',  
  2: 'Algemene zaken',  
  3: 'Natuur & gaswinning',  
  4: 'Landbouw & dierenwelzijn',  
  5: 'Zorg',  
  6: 'Sociale zaken',  
  7: 'Justitie',  
  8: 'Pensioenstelsel',  
  9: 'Europese Unie',  
  10: 'Klimaat & energie',  
  11: 'Milieu & regelgeving',  
  12: 'Zorg',  
  13: 'Openbaar vervoer',  
  14: 'Financiele sector',  
  15: 'Wonen'
  }
topic_words, word_scores, topic_nums = model.get_topics()
topics = {i:' ,'.join(topic_words[i][:3]) for i in range(len(topic_words))}
doc_ids = list(range(len(documents)))
topic_nums, topic_score, topic_words, word_scores = model.get_documents_topics(doc_ids,reduced=True)
topic_names = [topics[t] for t in topic_nums]
assert len(topic_nums) == len(df)
df['Topic'] = topic_names
topic_nums, topic_score, topic_words, word_scores = model.get_documents_topics(doc_ids,reduced=False)
df['Topic_initial'] = topic_nums
df['Topic_score'] = topic_score
# df.sort_values(['Topic_initial', 'Topic_score'], ascending=False, inplace=True)


## Optional add climate deepdive

In [27]:
topic_sizes, topic_nums = model.get_topic_sizes(reduced=False)
topic_words, word_scores, topic_nums = model.get_topics(reduced=False)
climate_idx = [topic_nums for t in topic_nums if t in reduced_topics[10]]
climate_subtopics = {128: 'Afhankelijkheid fossiele brandstoffen',
 165: 'CO2 reductie',
 5: 'Voldoen aan Parijs',
 141: 'Electriciteit',
 126: 'Groningen',
 205: 'Zonnepanelen',
 96: 'Energierekening betalen',
 236: 'Biomassa',
 105: 'Kolencentrales',
 239: 'Windturbines - overlast',
 29: 'Windturbines - subsidie'}

In [28]:
df['Klimaat'] = df.loc[df['Topic']=='Klimaat & energie', 'Topic_initial'].map(climate_subtopics)

In [29]:
save(df, 'df_including_topics_full.pickle')
save(model, 'doc2vec_deep_bigram_enhanced_stopwords_rutteIV_reduced')

# Prepare slimmed down versions for production

In [43]:
df = load("df_including_topics_full.pickle")
df['BesluitTekst'] = df['BesluitTekst'].str.replace('.','')

print(len(df))

model = Top2Vec.load("data/doc2vec_deep_bigram_enhanced_stopwords_rutteIV_reduced")
doc_ids = list(range(len(df)))
# don't sort the df before this operation
topic_nums, topic_score, topic_words, word_scores = model.get_documents_topics(doc_ids,reduced=True)
topics = [', '.join(topic_words[i][:3]) for i in range(len(topic_words))]

df['Topic_initial'] = topics
df['Topic_score'] = topic_score
df.sort_values(['Topic_initial', 'Topic_score'], ascending=False, inplace=True)
# important do this only after all row filters have been set
df = df[df['Kamer']=='Rutte IV']
df.index = df['Index']
print(len(df))
stem_column = [c for c in df.columns if 'Stem_' in c and c != 'Stem_persoon']
required_cols = ['Kamer', 'Jaar','Indienende_partij', 'BesluitSoort','BesluitTekst','Topic_initial', 'Topic_score','Indienende_persoon_partij','Partijen_Voor', 'Partijen_Tegen', 'Text']

# streamlit has problems with category type: 
# https://github.com/streamlit/streamlit/issues/47
for col in stem_column + required_cols[:-4]:
    df[col] = df[col].astype('category')

df = df[stem_column + required_cols]
save(df, "df_production.pickle")


39244
9007


In [44]:
def delete_documents(self, doc_ids):
    """
    Delete documents from current model.
    Warning: If document ids were not used in original model, deleting
    documents will change the indexes and therefore doc_ids.
    The documents will be deleted from the current model without changing
    existing document, word and topic vectors. Topic sizes will be updated.
    If deleting a large quantity of documents relative to the current model
    size a new model should be trained for best results.
    Parameters
    ----------
    doc_ids: List of str, int
        A unique value per document that is used for referring to documents
        in search results.
    """
    # make sure documents exist
    self._validate_doc_ids(doc_ids, doc_ids_neg=[])

    # update index
    if self.documents_indexed:
        # delete doc_ids from index
        index_ids = [self.doc_id2index_id(doc_id) for doc_id in doc_ids]
        for index_id in index_ids:
            self.document_index.mark_deleted(index_id)
        # update index_id and doc_ids
        for doc_id in doc_ids:
            self.doc_id2index_id.pop(doc_id)
        for index_id in index_ids:
            self.index_id2doc_id.pop(index_id)

    # get document indexes from ids
    doc_indexes = self._get_document_indexes(doc_ids)

    # delete documents
    if self.documents is not None:
        self.documents = np.delete(self.documents, doc_indexes, 0)

    # delete document ids
    if self.document_ids is not None:
        for doc_id in doc_ids:
            self.doc_id2index.pop(doc_id)
        keys = list(self.doc_id2index.keys())
        self.document_ids = np.array(keys)
        values = list(range(0, len(self.doc_id2index.values())))
        self.doc_id2index = dict(zip(keys, values))

    # delete document vectors
    self._set_document_vectors(np.delete(self._get_document_vectors(norm=False), doc_indexes, 0))

    if self.embedding_model == 'doc2vec':
        num_docs = len(doc_indexes)
        self.model.docvecs.count -= num_docs
        self.model.docvecs.max_rawint -= num_docs
        self.model.docvecs.vectors_docs_norm = None
        self.model.docvecs.init_sims()

    # update topics
    # self._unassign_documents_from_topic(doc_indexes, hierarchy=False)

    if self.hierarchy is not None:
        self._unassign_documents_from_topic(doc_indexes, hierarchy=True)
model = Top2Vec.load("data/doc2vec_deep_bigram_enhanced_stopwords_rutteIV_reduced")

# model.delete_documents = delete_documents
df = load("df_including_topics_full.pickle")


In [45]:
topic_sizes, topic_nums = model.get_topic_sizes()
print(f'{len(topic_nums)} topics. In total there are {sum(topic_sizes)} documents. These are the amount of documents per topic:\n{topic_sizes}')

topic_words, word_scores, topic_nums = model.get_topics(reduced=True)
    
for num, size, word in zip(topic_nums, topic_sizes, topic_words):
    print(num, word[:3], size)
print(sum(topic_sizes))

161 topics. In total there are 39244 documents. These are the amount of documents per topic:
[1912 1543 1382 1310  991  932  839  737  686  683  680  568  555  548
  522  514  489  475  463  451  451  442  429  420  413  408  401  397
  393  390  383  377  373  361  356  355  333  326  315  298  296  286
  283  282  279  277  267  264  260  248  243  243  237  235  228  225
  225  224  221  216  211  210  208  203  201  199  194  191  188  187
  183  181  181  177  177  176  174  171  166  162  161  150  148  142
  139  135  132  130  129  128  125  125  122  120  119  118  117  116
  111  110  109  107  106  100   98   98   93   92   91   91   90   90
   90   88   87   81   81   80   80   77   75   75   74   72   71   70
   69   69   68   68   67   66   65   64   63   61   60   59   55   55
   54   53   53   51   49   47   46   43   43   40   40   38   36   36
   35   33   31   29   28   26   21]
0 ['wet langdurig_toezicht' 'onderhavige_wetsvoorstel'
 'voorliggende_wetsvoorstel'] 1912

In [46]:
model.delete_documents(list(range(len(df))))
model.delete_documents = 1
model.save("data/doc2vec_production")

## Check app

In [54]:
df = load("df_production.pickle")
model = Top2Vec.load("data/doc2vec_production")
topic_words, word_scores, topic_scores, topic_nums = model.search_topics(keywords= ['politie'] , num_topics=4, reduced=True)
topic_nums

array([44, 23, 13, 15], dtype=int64)

In [55]:
topic_sizes, topic_nums = model.get_topic_sizes()
print(f'{len(topic_nums)} topics. In total there are {sum(topic_sizes)} documents. These are the amount of documents per topic:\n{topic_sizes}')

47 topics. In total there are 0 documents. These are the amount of documents per topic:
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0]


In [315]:
topic_words, word_scores, topic_scores, topic_nums = model.search_topics(keywords= ['politie'] , num_topics=4, reduced=True)
topic_nums

array([44, 23, 13, 15], dtype=int64)

In [122]:
topic_words, word_scores, topic_nums = model.get_topics(num_topics=47)


In [125]:
topic_words[45]

array(['politieagenten', 'politietop', 'politiemensen', 'politiewet',
       'politieacademie', 'polisaanbod', 'politie', 'polissen', 'cop',
       'zedenpolitie', 'overheidsbeleid', 'regeringsbeleid',
       'kabinetsbeleid', 'georganiseerde_criminaliteit',
       'openbaar_ministerie', 'asielbeleid', 'nationale_veiligheid',
       'bureaucratische', 'autoriteiten', 'landenbeleid', 'beleidskader',
       'rijksbeleid', 'bureaucratie', 'beleidsopties',
       'ministeriele_regeling', 'politici', 'loonbeleid', 'beleidsinzet',
       'administratieve_lasten', 'sanctiebeleid', 'decentrale_overheden',
       'politieke_partijen', 'politiek', 'politieke', 'beleids',
       'agenten', 'beleid', 'beleidskeuze', 'industriebeleid',
       'beleidskeuzes', 'preventiebeleid', 'beleidsnota', 'agentschap',
       'beleidsregels', 'bestuursorganen', 'beleidsmatige',
       'beleidsvrijheid', 'openbare_orde', 'nederlandse_zorgautoriteit',
       'beleidslijn'], dtype='<U28')

In [141]:
df = load("df_production.pickle")
len(df)

6017

In [144]:
df[(df['Topic_initial']=='verduurzamen') & (df['Kamer']=='Rutte IV')]['Text'].iloc[0]

'2\nTweede Kamer der Staten-Generaal\nVergaderjaar 2021–2022\n32 813 Kabinetsaanpak Klimaatbeleid\nNr. 1013 MOTIE VAN DE LEDEN BONTENBAL EN GRINWIS\nVoorgesteld 19 april 2022\nDe Kamer,\ngehoord de beraadslaging,\noverwegende dat de verduurzaming van de gebouwde omgeving de\nkomende jaren vooral gefocust moet zijn op het verduurzamen van de\nwarmtevoorziening in de gebouwde omgeving, resulterend in een\nreductie van het aardgasverbruik;\nverzoekt de regering om in het beleid voor de verduurzaming van de\ngebouwde omgeving ook een subdoelstelling voor aardgasreductie op te\nnemen,\nen gaat over tot de orde van de dag.\nBontenbal\nGrinwis\nkst-32813-1013ISSN\n0921 - 7371\n’s-Gravenhage 2022 Tweede Kamer, vergaderjaar 2021–2022, 32 813, nr. 1013'

In [None]:
df = load("df_including_topics_full.pickle")
print(len(df))
df['BesluitTekst'].value_counts()

6017


Aangenomen    3296
Verworpen     2721
Name: BesluitTekst, dtype: int64