In [90]:
import pandas as pd
import os
from spacy.lemmatizer import Lemmatizer, ADJ, NOUN, VERB
import spacy
import numpy as np
import distance

In [91]:
def precision(y_true, y_pred):
    i = set()
    len1 = len(y_pred)
    for pred in y_pred:
        for true in y_true:
            if pred == true:
                i.add(pred)
                break
            if len(list(distance.lcsubstrings(pred, true))) != 0:
                anteil = len(list(distance.lcsubstrings(pred, true))[0])/len(true)
                if anteil > 0.3:
                    i.add(pred)
    if len1 == 0:
        return 0.0
    elif float(len(i) / len1) > 1:
        return 1.0
    else:
        return float(len(i) / len1)


def recall(y_true, y_pred):
    i = set()
    if len(y_true) == 0:
        return 0.0
    else:
        for pred in y_pred:
            for true in y_true:
                if pred == true:
                    i.add(pred)
                    break
                if len(list(distance.lcsubstrings(pred, true))) != 0:
                    anteil = len(list(distance.lcsubstrings(pred, true))[0])/len(true)
                    if anteil > 0.3:
                        i.add(pred)
        if float(len(i) / len(y_true)) > 1:
            return 1.0
        else:
            return float(len(i) / len(y_true))


def f1(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    if p + r == 0:
        return 0.0
    else:
        return 2 * (p * r) / (p + r)
    
def get_PI(filename):
    getFilename = os.path.basename(filename)
    filename, ext = getFilename.split(".")
    if "_" in filename:
        filename, rest = filename.split("_", 1)
    return filename

In [92]:
os.chdir(r"C:\Users\Goegg\OneDrive\Desktop\Durchgänge")

In [93]:
# create df out of goldstandard
goldstandard = pd.ExcelFile(r"GOLDSTANDARD.xlsx")
# choose model-combinations for predictions:
directory = r"7. YAKE - SPACY NE\MI_Schlagworte_Composed"
extension = ".xlsx"
nlp = spacy.load("de_core_news_lg")
lemmatizer = nlp.vocab.morphology.lemmatizer

In [94]:
#Geographischer Index

all_geo_precision = []
all_geo_recall = []
all_geo_f1 = []
for filename in os.listdir(r"C:\Users\Goegg\OneDrive\Desktop\zu annotieren"):
    
    pi_name = get_PI(filename)
    print("Presseinformation:", pi_name)
    #get geo-pred
    df_pred = pd.read_excel(os.path.join(directory, pi_name + extension), header=0)
    pred_geo_out = df_pred['Geographischer Index:'].tolist()
    pred_geo = [x for x in pred_geo_out if x == x]
    #get geo-gold
    df_gold = pd.read_excel(goldstandard, pi_name)
    gold_geo_out = df_gold['Geographischer Index'].tolist()
    gold_geo = [lemmatizer(x, NOUN)[0].lower() for x in gold_geo_out if x == x]
    print("Goldstandard:", gold_geo)
    print("Predicted:", pred_geo, "\n")
    if len(gold_geo) == 0 and len(pred_geo) == 0:
        print("both nothing\n __________________________")
        all_geo_precision.append(1.0)
        all_geo_recall.append(1.0)
        all_geo_f1.append(1.0)
        continue
    else:
        print("Precision:", precision(gold_geo, pred_geo))
        print("Recall:", recall(gold_geo, pred_geo))
        print("F1-Score:", f1(gold_geo, pred_geo), "\n___________________________")
        all_geo_precision.append(precision(gold_geo, pred_geo))
        all_geo_recall.append(recall(gold_geo, pred_geo))
        all_geo_f1.append(f1(gold_geo, pred_geo))
        
mean_geo_precision = sum(all_geo_precision) / len(all_geo_precision)
mean_geo_recall = sum(all_geo_recall) / len(all_geo_recall)
mean_geo_f1 = sum(all_geo_f1) / len(all_geo_f1)    
print(mean_geo_precision, mean_geo_recall, mean_geo_f1)
    
    



Presseinformation: 2000 0310
Goldstandard: []
Predicted: ['stuttgart', 'technikpresse'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2000 0702
Goldstandard: ['backnang']
Predicted: ['zöw', 'backnang', 'stripline', 'aui', 'bnsismaterialien', 'dielektrikumsznhlen', 'zwei-oder', 'standardoberflächen', 'a 11', 'poatfach', 'stuttgart', 'borm1nn', 'kru••', 'stalb', 'sn60pb', 'indium', 'alumini•'] 

Precision: 0.058823529411764705
Recall: 1.0
F1-Score: 0.1111111111111111 
___________________________
Presseinformation: 2000 1009
Goldstandard: []
Predicted: ['zöw', 'bosch-platln', 'zundkerzen', 'oper', 'ma.iseelektrode', 'journalisten-rückfragen', 'u.-malte', 'd-70049 stuttgart', 'oflentlichkeitoarbi', 'bosch-platin-zündkerzen', 'stuttgart', 'technikpresse'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2001 0350
Goldstandard: ['genf']
Predicted: ['bosch-silver-plus', 'rückzündungen', 'europa'] 

Precisio

Goldstandard: ['detmold']
Predicted: ['detmold', 'dornrad-paketieranlagen', 'd-70049 stuttgart', 'dornrades', 'rüttelstrecke', 'transportbecher', 'waiblingen', 'stuttgart'] 

Precision: 0.125
Recall: 1.0
F1-Score: 0.2222222222222222 
___________________________
Presseinformation: 2012 0113
Goldstandard: []
Predicted: ['mems-sensormarkt', 'sensordatenfusion', 'd-70049 stuttgart'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2012 0808
Goldstandard: []
Predicted: ['d-70049 stuttgart', 'bosch-kraftfahrzeugtechnik'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2013 0201
Goldstandard: []
Predicted: ['d-70049 stuttgart', 'autolock-schnellspann-bohrfutter', 'multischleifer'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2013 0618
Goldstandard: ['berlin', 'europa', 'südeuropa']
Predicted: ['berlin', 'europa', 'd-70049 stuttgart', 'deutschlandforum', 'stuttgart

In [95]:
#Körperschaftsindex

all_org_precision = []
all_org_recall = []
all_org_f1 = []
for filename in os.listdir(r"C:\Users\Goegg\OneDrive\Desktop\zu annotieren"):
    
    pi_name = get_PI(filename)
    print("Presseinformation:", pi_name)
    #get org-pred
    df_pred = pd.read_excel(os.path.join(directory, pi_name + extension), header=0)
    pred_org_out = df_pred['Körperschaftsindex:'].tolist()
    pred_org = [x for x in pred_org_out if x == x]
    #get org-gold
    df_gold = pd.read_excel(goldstandard, pi_name)
    gold_org_out = df_gold['Körperschaftsindex'].tolist()
    gold_org = [lemmatizer(x, NOUN)[0].lower() for x in gold_org_out if x == x]
    print("Goldstandard:", gold_org)
    print("Predicted:", pred_org, "\n")
    if len(gold_org) == 0 and len(pred_org) == 0:
        print("both nothing\n __________________________")
        all_org_precision.append(1.0)
        all_org_recall.append(1.0)
        all_org_f1.append(1.0)
        continue
    else:
        print("Precision:", precision(gold_org, pred_org))
        print("Recall:", recall(gold_org, pred_org))
        print("F1-Score:", f1(gold_org, pred_org), "\n___________________________")
        all_org_precision.append(precision(gold_org, pred_org))
        all_org_recall.append(recall(gold_org, pred_org))
        all_org_f1.append(f1(gold_org, pred_org))
        
mean_org_precision = sum(all_org_precision) / len(all_org_precision)
mean_org_recall = sum(all_org_recall) / len(all_org_recall)
mean_org_f1 = sum(all_org_f1) / len(all_org_f1)    
print(mean_org_precision, mean_org_recall, mean_org_f1)

Presseinformation: 2000 0310
Goldstandard: []
Predicted: ['robert bosch gmbh', 'bosch', 'rtf-datei', 'ttl'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2000 0702
Goldstandard: []
Predicted: ['robert', 'zöt', 'zoef', 'bosch-produktbereich raumfahrttechnik', 'mikrowelllen-streifen', 'bosch', 'iwei microvia', 'zot', 'zof', 'robert bosch', 'vda', 'din', 'esa', 'qnes', 'fur kundemmfrngen'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2000 1009
Goldstandard: ['daimler-chrysler', 'audi', 'bmw', 'cadillac', 'citroen', 'daewoo', 'fiat', 'lancia', 'mitsubishi', 'opel', 'peugeot', 'porsche', 'renault', 'suzuki', 'vauxhall', 'volvo', 'vw']
Predicted: ['robert', 'zöt', 'zoef', 'mercedes', 'daimler-chrysler', 'elus1izität', 'bosch', 'audi', 'bmw', 'cadillac', 'citroen', 'daewoo', 'fiat', 'lancia', 'mitsubishi', 'opel', 'peugeot', 'porsche', 'renault', 'seal', 'skodn', 'ssangyong', 'suzuki', 'vauxhall', 'volv

Goldstandard: ['gesellschaft für technische überwachung']
Predicted: ['presse-information', 'gesellschaft für technische  überwachung', 'gtü', 'bosch', 'bremsregelsysteme', 'robert bosch gmbh'] 

Precision: 0.16666666666666666
Recall: 1.0
F1-Score: 0.2857142857142857 
___________________________
Presseinformation: 2002 0710
Goldstandard: ['assel gmbh']
Predicted: ['robert', 'zoef', 'robert bosch gmbh', 'bosch', 'sensotronic brake', 'esp', 'assel gmbh', 'roberl bosch gmbh marlln', 'zör', 'preaseforum', 'darun', 'bosch rexroth', 'bosch-gruppe', 'boschgeschäftsbereichen'] 

Precision: 0.21428571428571427
Recall: 1.0
F1-Score: 0.35294117647058826 
___________________________
Presseinformation: 2002 1008
Goldstandard: ['junker']
Predicted: ['presse-information', 'junker', 'bosch-thermotechnik', 'bosch thermotechnik', 'robert bosch gmbh'] 

Precision: 0.8
Recall: 1.0
F1-Score: 0.888888888888889 
___________________________
Presseinformation: 2003 0212
Goldstandard: ['skil']
Predicted: ['pres

Goldstandard: ['bosch sensortec']
Predicted: ['robert bosch gmbh', 'brand management', 'bosch', 'bosch sensortec gmbh', 'robert bosch gmbh.', 'bosch gruppe', 'bosch-gruppe'] 

Precision: 0.8571428571428571
Recall: 1.0
F1-Score: 0.923076923076923 
___________________________
Presseinformation: 2012 0808
Goldstandard: ['chassis systems control']
Predicted: ['presse-information', 'bosch', 'elektronische stabilitäts-programm', 'brand management', 'berganfahrhilfe hill hold control', 'bosch-gruppe', 'robert bosch gmbh'] 

Precision: 0.14285714285714285
Recall: 1.0
F1-Score: 0.25 
___________________________
Presseinformation: 2013 0201
Goldstandard: ['bosch power tools', 'skil', 'dremel']
Predicted: ['bosch pi', 'bosch', 'robert bosch gmbh', 'brand management', 'grüne leds', 'ecp', 'bosch akku-geräten', 'schnell-ladegerät', 'akkuspannung/-kapazität', 'robert bosch gmbh tel', 'd-70745 leinfelden-echterdingen', 'bosch-gruppe', 'skil', 'dremel', 'bosch power tools'] 

Precision: 0.666666666666

In [96]:
#Personen-Index

all_per_precision = []
all_per_recall = []
all_per_f1 = []
for filename in os.listdir(r"C:\Users\Goegg\OneDrive\Desktop\zu annotieren"):
    
    pi_name = get_PI(filename)
    print("Presseinformation:", pi_name)
    #get per-pred
    df_pred = pd.read_excel(os.path.join(directory, pi_name + extension), header=0)
    pred_per_out = df_pred['Personen-Index'].tolist()
    pred_per = [x for x in pred_per_out if x == x]
    #get per-gold
    df_gold = pd.read_excel(goldstandard, pi_name)
    gold_per_out = df_gold['Personen-Index'].tolist()
    gold_per = [lemmatizer(x, NOUN)[0].lower() for x in gold_per_out if x == x]
    print("Goldstandard:", gold_per)
    print("Predicted:", pred_per, "\n")
    if len(gold_per) == 0 and len(pred_per) == 0:
        print("both nothing\n __________________________")
        all_per_precision.append(1.0)
        all_per_recall.append(1.0)
        all_per_f1.append(1.0)
        continue
    else:
        print("Precision:", precision(gold_per, pred_per))
        print("Recall:", recall(gold_per, pred_per))
        print("F1-Score:", f1(gold_per, pred_per), "\n___________________________")
        all_per_precision.append(precision(gold_per, pred_per))
        all_per_recall.append(recall(gold_per, pred_per))
        all_per_f1.append(f1(gold_per, pred_per))
        
mean_per_precision = sum(all_per_precision) / len(all_per_precision)
mean_per_recall = sum(all_per_recall) / len(all_per_recall)
mean_per_f1 = sum(all_per_f1) / len(all_per_f1)    
print(mean_per_precision, mean_per_recall, mean_per_f1)

Presseinformation: 2000 0310
Goldstandard: []
Predicted: ['gerhard ketterer'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2000 0702
Goldstandard: []
Predicted: ['ftir hochfrequeniunwendungcn', 'zuverlässigkeits-und lebensdauerprüfungen', 'liefer-und abnahmespezifi•', 'jouma', 'gerhard ketterer'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2000 1009
Goldstandard: []
Predicted: ['eme  ', 'bosch-zündkerzen', 'ersatzteilgeschtlft', 'frank·ulnch breit•prechar', 'robert bosch', 'ulf-malte wünsch'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2001 0350
Goldstandard: []
Predicted: [] 

both nothing
 __________________________
Presseinformation: 2001 0909
Goldstandard: ['dr. marco lang']
Predicted: ['marco lang', 'hawera', 'bohrfortschritt', 'beschlag-und schlangenbohrer', 'handwerkeralltag', 'heavy-duty-metall'] 

Precision: 0.16666666666666666
Recall: 1.0

Presseinformation: 2010 0701
Goldstandard: []
Predicted: ['robert bosch', 'uta-micaela dürig', 'divar', 'erika görge'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2010 1019
Goldstandard: []
Predicted: ['diamant-riff-segmentsägeblatt', 'titannitrid', 'robert bosch', 'uta-micaela dürig', 'karin heinlein telefon', 'gerhard ketterer'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2011 0326
Goldstandard: ['dr. wolfgang malchow']
Predicted: ['dr. malchow', 'wolfgang malchow', 'uta-micaela dürig', 'karriereberatungsseminare', 'journalistenkontakt', 'dirk haushalter', 'robert bosch'] 

Precision: 0.2857142857142857
Recall: 1.0
F1-Score: 0.4444444444444445 
___________________________
Presseinformation: 2011 0807
Goldstandard: ['michael voeth']
Predicted: ['müllereitagung', 'uta-micaela dürig', 'antriebsbereich', 'produktreste', 'michael voeth', 'journalistenkontakt', 'johanna bauer', 'robert bosch'] 

P

In [97]:
#Themen-Index

all_thema_precision = []
all_thema_recall = []
all_thema_f1 = []
for filename in os.listdir(r"C:\Users\Goegg\OneDrive\Desktop\zu annotieren"):
    
    pi_name = get_PI(filename)
    print("Presseinformation:", pi_name)
    #get thema-pred
    df_pred = pd.read_excel(os.path.join(directory, pi_name + extension), header=0)
    pred_thema_out = df_pred['Themen-Index'].tolist()
    pred_thema = [x for x in pred_thema_out if x == x]
    #get thema-gold
    df_gold = pd.read_excel(goldstandard, pi_name)
    gold_thema_out = df_gold['Themen-Index'].tolist()
    gold_thema = [lemmatizer(x, NOUN)[0].lower() for x in gold_thema_out if x == x]
    print("Goldstandard:", gold_thema)
    print("Predicted:", pred_thema, "\n")
    if len(gold_thema) == 0 and len(pred_thema) == 0:
        print("both nothing\n __________________________")
        all_thema_precision.append(1.0)
        all_thema_recall.append(1.0)
        all_thema_f1.append(1.0)
        continue
    else:
        print("Precision:", precision(gold_thema, pred_thema))
        print("Recall:", recall(gold_thema, pred_thema))
        print("F1-Score:", f1(gold_thema, pred_thema), "\n___________________________")
        all_thema_precision.append(precision(gold_thema, pred_thema))
        all_thema_recall.append(recall(gold_thema, pred_thema))
        all_thema_f1.append(f1(gold_thema, pred_thema))
        
mean_thema_precision = sum(all_thema_precision) / len(all_thema_precision)
mean_thema_recall = sum(all_thema_recall) / len(all_thema_recall)
mean_thema_f1 = sum(all_thema_f1) / len(all_thema_f1)    
print(mean_thema_precision, mean_thema_recall, mean_thema_f1)

Presseinformation: 2000 0310
Goldstandard: ['elektrowerkzeug']
Predicted: ['pro', 'internet', 'browser'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2000 0702
Goldstandard: ['raumfahrttechnik']
Predicted: ['gmbh'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2000 1009
Goldstandard: []
Predicted: ['gmbh', 'werk', 'bild', 'leistung', 'modell', 'platin', 'mercedes c-kla11e', 'die fließgepresste mittelelektrode', 'internet', 'mercedes c-klasse', 'browser'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2001 0350
Goldstandard: []
Predicted: ['prozent', 'anspruch', 'silver plus', 'die spitzenbatterie von bosch'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2001 0909
Goldstandard: ['hammerbohrer', 'diamant']
Predicted: ['september', 'leiter', 'hersteller', 'pressekonferenz', 'marktführer', 'handwerker'] 

Precisi

Precision: 0.052
Recall: 1.0
F1-Score: 0.0988593155893536 
___________________________
Presseinformation: 2002 0403
Goldstandard: []
Predicted: ['prozent'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2002 0710
Goldstandard: ['chassissysteme']
Predicted: ['richtfest', 'gmbh', 'entwicklung', 'milliarde', 'ingenieur', 'mitarbeiter', 'stuttgart', 'by wire', 'vergangenen jahr', 'euro', 'die jpeg-bilder', 'browser'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2002 1008
Goldstandard: ['junkers/bosch-thermotechnik', 'junkers-website', 'heizung']
Predicted: ['tipps', 'auftritt', 'konzept', 'design', 'rubrik', 'neuer junkers-internetauftritt', 'internet'] 

Precision: 0.14285714285714285
Recall: 0.3333333333333333
F1-Score: 0.2 
___________________________
Presseinformation: 2003 0212
Goldstandard: ['elektrowerkzeugen']
Predicted: ['februar', 'kompetenz', 'anbieter', 'saw  line visor'] 

Precision: 0.0


Goldstandard: ['rallye', 'bosch boxberg klassik rallye']
Predicted: ['juni', 'klassik', 'märz', 'höhepunkt', 'information', 'lösung', 'aa dr', 'karlsruhe', 'oldtimer-rallye bosch boxberg klassik', 'rallye', 'bosch boxberg klassik', 'internet'] 

Precision: 0.25
Recall: 1.0
F1-Score: 0.4 
___________________________
Presseinformation: 2018 0204
Goldstandard: ['fahrrad-leasing', 'mitarbeiter', 'gesundheit', 'sport']
Predicted: ['mitarbeiter', 'förderung', 'unternehmen', 'stadt', 'gesundheit', 'bosch ebike-marktstudie', 'britische'] 

Precision: 0.2857142857142857
Recall: 0.5
F1-Score: 0.36363636363636365 
___________________________
Presseinformation: 2019 0501
Goldstandard: ['online', 'computer aided service']
Predicted: ['online', 'information', 'mai', 'zugriff', 'karlsruhe', 'zeit', 'd-76225 karlsruhe twitter', 'computer aided service', 'cas plus'] 

Precision: 0.3333333333333333
Recall: 1.0
F1-Score: 0.5 
___________________________
Presseinformation: 2019 1103
Goldstandard: ['landwi

In [98]:
#Produkt-Index

all_prod_precision = []
all_prod_recall = []
all_prod_f1 = []
for filename in os.listdir(r"C:\Users\Goegg\OneDrive\Desktop\zu annotieren"):
    
    pi_name = get_PI(filename)
    print("Presseinformation:", pi_name)
    #get prod-pred
    df_pred = pd.read_excel(os.path.join(directory, pi_name + extension), header=0)
    pred_prod_out = df_pred['Produkt-Index'].tolist()
    pred_prod = [x for x in pred_prod_out if x == x]
    #get prod-gold
    df_gold = pd.read_excel(goldstandard, pi_name)
    gold_prod_out = df_gold['Produkt-Index'].tolist()
    gold_prod = [lemmatizer(x, NOUN)[0].lower() for x in gold_prod_out if x == x]
    print("Goldstandard:", gold_prod)
    print("Predicted:", pred_prod, "\n")
    if len(gold_prod) == 0 and len(pred_prod) == 0:
        print("both nothing\n __________________________")
        all_prod_precision.append(1.0)
        all_prod_recall.append(1.0)
        all_prod_f1.append(1.0)
        continue
    else:
        print("Precision:", precision(gold_prod, pred_prod))
        print("Recall:", recall(gold_prod, pred_prod))
        print("F1-Score:", f1(gold_prod, pred_prod), "\n___________________________")
        all_prod_precision.append(precision(gold_prod, pred_prod))
        all_prod_recall.append(recall(gold_prod, pred_prod))
        all_prod_f1.append(f1(gold_prod, pred_prod))
        
mean_prod_precision = sum(all_prod_precision) / len(all_prod_precision)
mean_prod_recall = sum(all_prod_recall) / len(all_prod_recall)
mean_prod_f1 = sum(all_prod_f1) / len(all_prod_f1)    
print(mean_prod_precision, mean_prod_recall, mean_prod_f1)

Presseinformation: 2000 0310
Goldstandard: ['ahs 6000 pro', 'ahs 7000']
Predicted: ['maßstab', 'messer'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2000 0702
Goldstandard: ['leiterplatten']
Predicted: ['anforderung', 'leiterplatten', 'fax'] 

Precision: 0.3333333333333333
Recall: 1.0
F1-Score: 0.5 
___________________________
Presseinformation: 2000 1009
Goldstandard: ['slk 320', 'zündkerze']
Predicted: ['anforderung', 'elektrode'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
Presseinformation: 2001 0350
Goldstandard: ['silver plus', 'powercontrol system', 'esp', 'common rail', 'bea 350', 'zündkerze', 'funline']
Predicted: ['start', 'produktvorteil powercontrol system'] 

Precision: 1.0
Recall: 0.2857142857142857
F1-Score: 0.4444444444444445 
___________________________
Presseinformation: 2001 0909
Goldstandard: ['quadro-x', 'sägeblätter']
Predicted: ['qualität', 'ausführung'] 

Precision: 0.5
Recall: 0.5
F1-Sco

Goldstandard: ['esitronic', 'festplatte']
Predicted: ['datum', 'festplatte', 'internet der dinge', 'iot cloud'] 

Precision: 0.25
Recall: 0.5
F1-Score: 0.3333333333333333 
___________________________
Presseinformation: 2019 1103
Goldstandard: ['nevonex']
Predicted: ['effizienz', 'gerät', 'internet der dinge', 'iot cloud'] 

Precision: 0.0
Recall: 0.0
F1-Score: 0.0 
___________________________
0.38913043478260867 0.4100573339703775 0.3706513896731288


In [99]:
print("Standard Deviation of Precision/Products is ", np.std(all_thema_precision))

Standard Deviation of Precision/Products is  0.22662733593975792


### 

###### 