## Parse

In [1]:
%load_ext autoreload
%autoreload 2
from gesetz import Gesetz, progressBar
import pickle

In [2]:
import sys
sys.path.append('..')
import collections
import statistics
import pandas as pd
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
%matplotlib inline
from typing import List, Dict
import re


In [3]:
with open("pickles/laws_links_topics.pickle", "rb") as fp:   # Unpickling
    Gesetz.collected_laws = pickle.load(fp)

In [4]:
Gesetz.collected_laws["SEBG"].topics

[('OrganR', 0.47706366),
 ('WahlR', 0.30715984),
 ('ArbR', 0.085194804),
 ('GesR', 0.06330281),
 ('BerufsR', 0.029223077),
 ('BahnR', 0.019881079)]

In [5]:
Gesetz.get_topic_count()

{'OrganR': 116,
 'BeamtenR': 85,
 'VersorgungsR': 158,
 'ArbR': 90,
 'MedR': 80,
 'KommR': 19,
 'UmweltR': 219,
 'KriegsfolgenR': 22,
 'StatistikR': 44,
 'BankR': 56,
 'GebührenR': 10,
 'EntsorgungsR': 21,
 'FinMarktR': 16,
 'InsolvenzR': 17,
 'WohnR': 5,
 'GesR': 174,
 'BahnR': 66,
 'SachenR': 71,
 'WahlR': 12,
 'ProduktR': 14,
 'ProzessR': 106,
 'StrafR': 106,
 'SteuerR': 70,
 'PatentR': 19,
 'BauR': 23,
 'AgrarR': 12,
 'VerkehrsR': 19,
 'BerufsR': 10,
 'AsylR': 3}

In [6]:
import plotly.graph_objects as go

tops=[topic for topic in Gesetz.get_topic_count().keys()]
values = [amt for amt in Gesetz.get_topic_count().values()]
zipped = list(zip(values,tops))
zipped.sort(reverse=True)
print(zipped)
fig = go.Figure(data=[
    go.Bar(name='Topic Distribution', x=[x[1] for x in zipped], y=[x[0] for x in zipped]),
])
# Change the bar mode
fig.update_layout(barmode='stack')
fig.show()

[(219, 'UmweltR'), (174, 'GesR'), (158, 'VersorgungsR'), (116, 'OrganR'), (106, 'StrafR'), (106, 'ProzessR'), (90, 'ArbR'), (85, 'BeamtenR'), (80, 'MedR'), (71, 'SachenR'), (70, 'SteuerR'), (66, 'BahnR'), (56, 'BankR'), (44, 'StatistikR'), (23, 'BauR'), (22, 'KriegsfolgenR'), (21, 'EntsorgungsR'), (19, 'VerkehrsR'), (19, 'PatentR'), (19, 'KommR'), (17, 'InsolvenzR'), (16, 'FinMarktR'), (14, 'ProduktR'), (12, 'WahlR'), (12, 'AgrarR'), (10, 'GebührenR'), (10, 'BerufsR'), (5, 'WohnR'), (3, 'AsylR')]


In [7]:
def dump_topic_to_txt(topic_name):
    laws = []
    for law in Gesetz.collected_laws.values():
        if law.get_topic() == topic_name:
            laws.append((law.topics[0][1],law.name_short,law))
    laws.sort(reverse=True)
    file = open(topic_name+".txt","w",encoding="utf8")
    for (prob,law_name,law) in laws:
        file.write((str(round(prob,3)).ljust(5)+" - "+law_name.ljust(15)+" - '"))
        file.write(law.name_full + "'\n")
    file.close()

In [9]:
dump_topic_to_txt("GesR")
dump_topic_to_txt("FinMarktR")
dump_topic_to_txt("OrganR")
dump_topic_to_txt("ArbR")
dump_topic_to_txt("WahlR")
dump_topic_to_txt("ProzessR")
dump_topic_to_txt("SachenR")
dump_topic_to_txt("UmweltR")
dump_topic_to_txt("InsolvenzR")
dump_topic_to_txt("StrafR")