In [1]:
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import time
import sys
import os

import seaborn as sns
import gensim
from datetime import datetime, date

import django
import platform

if platform.node() == "srv-mcc-apsis":
    sys.path.append('/home/muef/tmv/BasicBrowser/')
else:
    # local paths
    sys.path.append('/Documents/Data/tmv/BasicBrowser/')

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "BasicBrowser.settings")
django.setup()

# import from appended path
import parliament.models as pm
from parliament.tasks import do_search, run_tm
import cities.models as cmodels
import scoping.models as sm
from django.contrib.auth.models import User
from tmv_app.models import *
from utils.tm_mgmt import update_topic_scores
from utils.text import *
from django.db.models import Q, Count, Func, F, Sum, Value, Case, When, IntegerField

from parliament.utils import merge_utterance_paragraphs

# export search results

In [2]:
def get_interjection_content(interjections):
    text = []
    for ij in interjections:

        for party in ij.parties.all():
            text.append(party.name.upper())

        for person in ij.persons.all():
            text.append(person.clean_name)

        text.append("[" + ij.get_type_display() + "]")
        if ij.type == 2:
            text.append('[Speech: "' + ij.text + '"]')
        else:
            text.append("[" + ij.get_type_display() + "]")


    return " ".join(text)

from IPython.display import display, clear_output

In [4]:
uts = pm.Utterance.objects.filter(search_matches=89).order_by('document__date','id')
uts_values = uts.values('id', 'document__parlperiod__n', 'document__sitting', 'document__date', 'document__id',
                        'speaker__clean_name', 'speaker__party__name')

data = []

# add text without interjections
for i, ut in enumerate(uts):
    vals = uts_values[i]
    vals['text'] = ut.paragraph_texts.replace("\n", " ").strip()
    vals['paragraph_count'] = ut.paragraph_set.count()
    interjections = pm.Interjection.objects.filter(paragraph__utterance=ut)
    vals['interjection_count'] =  interjections.count()
    vals['interjection_content'] = get_interjection_content(interjections)
    data.append(vals)
    clear_output(wait=True)
    print(i, "of", uts.count(), "done")
    
df = pd.DataFrame(data)
df.columns = ["Date", "Document DB ID", "Parliamentary period", "Sitting", "Speech DB ID",
              "Interjection content", "Interjection count", "Paragraph count", "Speaker", "Speaker party", "Speech text"]
df = df[["Speech DB ID","Date", "Parliamentary period", "Sitting", "Document DB ID", "Speaker", "Speaker party",
    "Interjection count", "Interjection content", "Paragraph count", "Speech text"]]
df

9166 of 9167 done


Unnamed: 0,Speech DB ID,Date,Parliamentary period,Sitting,Document DB ID,Speaker,Speaker party,Interjection count,Interjection content,Paragraph count,Speech text
0,2804930,1949-09-22,1,7,11592,Dr. Franz Richter (Niedersachsen),wav,25,[Applause] [Applause] [Other] [Other] [Amuseme...,22,Meine Damen und Herren! Der Herr Bundeskanzler...
1,2804965,1949-09-28,1,9,11594,Dr. Konrad Adenauer,cducsu,6,[Other] [Other] [Other] [Other] [Other] [Other...,6,Ich habe dem Hohen Hause im Namen der Bundesre...
2,2804968,1949-09-28,1,9,11594,Dr. Kurt Schumacher,spd,4,[Applause] [Applause] [Other] [Other] [Other] ...,4,Meine Damen und Herren! Die sozialdemokratisch...
3,2804970,1949-09-28,1,9,11594,Dr. Gerd Bucerius,cducsu,8,[Other] [Other] [Other] [Other] [Outcry] [Outc...,5,Meine Damen und Herren! Der Erklärung der CDU/...
4,2804972,1949-09-28,1,9,11594,Dr. Hermann Höpker-Aschoff,fdp,11,[Outcry] [Outcry] [Other] [Other] [Other] [Oth...,11,Meine Damen und Herren! Als uns vor einigen Mo...
5,2804974,1949-09-28,1,9,11594,Dr. Hans-Joachim Merkatz,cducsu,2,"[Speech] [Speech: ""Gestern haben Sie den Antra...",2,Herr Präsident! Meine Damen und Herren! Die Fr...
6,2804978,1949-09-28,1,9,11594,Friedrich Rische,kpd,22,"[Speech] [Speech: ""Das hat er so nicht gesagt!...",19,Meine Damen und Herren! Seit einigen Tagen wir...
7,2804980,1949-09-28,1,9,11594,Alfred Loritz,wav,4,"[Speech] [Speech: ""Das muß man oft genug sagen...",4,Meine sehr verehrten Damen und Herren! Ich gla...
8,2804982,1949-09-28,1,9,11594,Dr. Bernhard Reismann,dzp,2,[Other] [Other] [Applause] [Applause],2,"Meine sehr verehrten ,Damen und Herren! Im Nam..."
9,2804983,1949-09-28,1,9,11594,Dr. Erich Köhler,cducsu,7,"[Objection] [Objection] [Speech] [Speech: ""Sie...",7,Das Wort hat der Herr Abgeordnete von Thadden....


In [5]:
df.to_csv("data/database_export_search_89.csv")

In [None]:
# speeches without speaker
counter = 0
for i, ut in enumerate(uts):
    if not ut.speaker.clean_name:
        counter += 1
print("Speeches without speaker:", counter)

In [7]:
dfr = pd.read_csv("data/database_export_search_89.csv", index_col=0)

In [8]:
print(len(dfr[dfr["Speaker"].isna()]))
dfr[dfr["Speaker"].isna()]

58


Unnamed: 0,Speech DB ID,Date,Parliamentary period,Sitting,Document DB ID,Speaker,Speaker party,Interjection count,Interjection content,Paragraph count,Speech text
2414,3077260,1976-01-23,7,216,13122,,,5,"Dr. Hans Apel [Speech] [Speech: ""Falsch!""] CDU...",4,"Ich weiß sehr wohl, daß zum Schluß des Vermitt..."
2421,3079501,1976-03-11,7,227,13133,,,9,CDUCSU [Applause] [Applause] Dr. Graf Lambsdor...,9,"Gut, dieses Wort „soziale Demontage"" ist in di..."
2422,3079505,1976-03-11,7,227,13133,,,0,,1,Das zweite kann ich Ihnen nicht so pauschal be...
2456,3087866,1977-01-20,8,8,13173,,,33,"[Speech] [Speech: ""Sehr wohl!""] [Speech] [Spee...",26,"Ich bitte Sie um Verständnis, wenn ich nun bei..."
2552,3098206,1977-10-26,8,51,13216,,,26,CDUCSU [Applause] [Applause] Dr. Helmut Kohl [...,21,"Herr Abgeordneter, die Politik und die Sache g..."
2831,3126348,1979-07-04,8,167,13332,,,83,"Herbert Wehner [Speech] [Speech: ""... vor dem ...",68,Herr Präsident! Meine sehr verehrten Damen und...
2834,3126356,1979-07-04,8,167,13332,,,6,[Other] [Other] CDUCSU [Applause] [Applause] C...,6,Frau Präsidentin! Meine sehr verehrten Damen u...
2886,3130651,1979-11-28,8,188,13353,,,15,CDUCSU [Applause] [Applause] CDUCSU [Applause]...,15,Herr Präsident! Meine Damen und Herren! Diese ...
2887,3130658,1979-11-28,8,188,13353,,,22,CDUCSU [Applause] [Applause] SPD [Outcry] [Out...,21,"Ich begrüße es zunächst, daß Sie das in Ihren ..."
2890,3130668,1979-11-28,8,188,13353,,,16,CDUCSU [Applause] [Applause] CDUCSU [Applause]...,15,Also ich bin grundsätzlich damit einverstanden...


In [10]:
# Probably wrong attribution
dfr[dfr["Speech text"].str.match("Das Wort")]

Unnamed: 0,Speech DB ID,Date,Parliamentary period,Sitting,Document DB ID,Speaker,Speaker party,Interjection count,Interjection content,Paragraph count,Speech text
9,2804983,1949-09-28,1,9,11594,Dr. Erich Köhler,cducsu,7,"[Objection] [Objection] [Speech] [Speech: ""Sie...",7,Das Wort hat der Herr Abgeordnete von Thadden....
18,2805170,1949-09-30,1,11,11596,Dr. Erich Köhler,cducsu,7,[Other] [Other] [Other] [Other] [Other] [Other...,7,Das Wort hat der Herr Abgeordnete Müller. Müll...
19,2805287,1949-10-20,1,12,11597,Dr. Carlo Schmid (Frankfurt),spd,6,[Other] [Other] [Other] [Other] [Other] [Other...,6,Das Wort zur Begründung des Antrages Drucksach...
153,2813181,1950-12-07,1,105,11690,Dr. Hermann Schäfer,fdp,10,[Other] [Other] SPD [Applause] [Applause] [Oth...,4,Das Wort hat der Abgeordnete Bertram. Dr. Bert...
157,2813188,1950-12-07,1,105,11690,Dr. Hermann Ehlers,cducsu,2,"[Speech] [Speech: ""Beweise!""] [Applause] [Appl...",2,Das Wort hat der Herr Bundesminister für Wirts...
175,2813618,1950-12-15,1,108,11693,Dr. Carlo Schmid (Frankfurt),spd,2,"[Speech] [Speech: ""Das ist ja übertrieben!""] [...",3,Das Wort hat der Abgeordnete Paul zur Begründu...
206,2814238,1951-02-14,1,117,11702,Dr. Carlo Schmid (Frankfurt),spd,2,[Other] [Other] [Applause] [Applause],2,Das Wort zur Begründung des nach Punkt 7 b der...
320,2817856,1951-06-14,1,152,11737,Dr. Hermann Ehlers,cducsu,1,[Other] [Other],2,Das Wort hat der Abgeordnete Dr. Kreyssig.. Dr...
366,2819206,1951-09-18,1,163,11748,Dr. Hermann Schäfer,fdp,10,[Outcry] [Outcry] [Other] [Other] [Outcry] [Ou...,10,Das Wort hat der Abgeordnete Dr. Müller. Dr. D...
401,2820060,1951-10-17,1,169,11754,Dr. Hermann Schäfer,fdp,10,"[Laughter] [Laughter] [Speech] [Speech: ""Sehr ...",9,Das Wort hat der Abgeordnete Fisch. Fisch (KPD...
