### get positive facts and corresponding negative facts with same subject from dbpedia 

In [None]:
pip install rdflib
pip install SPARQLWrapper

In [2]:
import pandas as pd
from numpy.random import RandomState
from rdflib import Graph, plugin, URIRef
from SPARQLWrapper import SPARQLWrapper, RDF, JSON, XML
import os

In [None]:
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.addDefaultGraph("http://dbpedia.org")

In [None]:
def read_data(file_path):
    cols = ['subject', 'predicate', 'object', 'truth_value']
    df = pd.read_csv(file_path, sep=";", names = cols)

    df = df.drop(0, axis=0)
    df = df.drop('truth_value', axis=1)

    return df

Common variable names in notebook:
s = subject
p = predicate
o = object

In [None]:
# fetch query results and return single triple

def get_result(sparql, query):
    s, p, o = '', '', ''
    df = pd.DataFrame([], columns=['subject', 'predicate', 'object'])
    prefix = """PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbp: <http://dbpedia.org/property/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    """
    sparql.setQuery(prefix+query)
    try :
        sparql.setReturnFormat(RDF)
        triple = sparql.query().convert()
        for i, (s,p,o) in enumerate(triple):
            pass
    except Exception as e: print(e)
    return str(s), str(p), str(o)

In [None]:
def convert_to_csv(df, prop_val, truth_val, path):
    df.insert(1, 'property', prop_val)
    df.insert(3, 'truth_value', truth_val)
       
    df.rename(columns={ df.columns[0]: "subject", df.columns[1]: "predicate", df.columns[2]: "object", df.columns[3]: "truth_value" }, inplace = True)
    df.to_csv(path, index=False)
    df.head

In [None]:
# This function queries DBpedia and take out positive facts for university related relations and store it in dataframes 
# The dataframes are later cleaned and converted to csv files
def positive_university(sparql):
    df = pd.DataFrame([], columns = ['uni', 'chancellor', 'affiliation', 'city'])
    query = """PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbp: <http://dbpedia.org/property/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    SELECT  ?uni ?city ?chancellor ?affiliation
        WHERE {
        ?uni dbo:chancellor ?chancellor;
            dbo:city ?city;
            dbo:affiliation ?affiliation.
        ?city a owl:Thing.
        ?affiliation a owl:Thing.
        } LIMIT 500
    """
    sparql.setQuery(query)
    try :
        sparql.setReturnFormat(JSON)
        triple = sparql.query().convert()
        for i in triple["results"]["bindings"]:
            df = df.append({'uni': i['uni']['value'], 'chancellor':i['chancellor']['value'], 
                            'city':i['city']['value'], 'affiliation':i['affiliation']['value']}, ignore_index = True)
    except Exception as e: print(e)
    
    df_chan = df[['uni','chancellor']].sort_values('uni', ascending=False).drop_duplicates(subset='uni', keep='first')
    convert_to_csv(df_chan, 'https://dbpedia.org/ontology/chancellor', 1, 'positive/uni_chancellor.csv')
    
    df_aff = df[['uni', 'affiliation']].sort_values('uni', ascending=False).drop_duplicates(subset='uni', keep='first')
    convert_to_csv(df_aff, 'https://dbpedia.org/ontology/affiliation', 1, 'positive/uni_affiliation.csv')
    
    df_city = df[['uni', 'city']].sort_values('uni', ascending=False).drop_duplicates(subset='uni', keep='first')
    convert_to_csv(df_city, 'https://dbpedia.org/ontology/city', 1, 'positive/uni_city.csv')
    
    return df



In [None]:
# The functions below queries dbpedia to find related negative examples for the given subject. 
def negative_university_chancellor(sparql, university_name):
    query = f"""CONSTRUCT {{
    {university_name} ?p2 ?o2.
    }} WHERE {{
    {university_name} dbo:chancellor ?o1;
        ?p2 ?o2.
    ?o2 rdf:type dbo:Person.
    FILTER ( ?o1 != ?o2 )
    FILTER not exists {{ {university_name} dbo:chancellor ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

def negative_university_city(sparql, university_name):
    query = f"""CONSTRUCT {{
    {university_name} ?p2 ?o2.
    }} WHERE {{
    {university_name} dbo:city ?o1;
        ?p2 ?o2.
    ?o2 rdf:type dbo:City.
    FILTER ( ?o1 != ?o2 )
    FILTER not exists {{ {university_name} dbo:city ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

def negative_university_affiliation(sparql, university_name):
    query = f"""CONSTRUCT {{
    {university_name} ?p2 ?o2.
    }} WHERE {{
    {university_name} dbo:affiliation ?o1;
        ?p2 ?o2.

    VALUES ?uni_type {{ dbr:Public_university dbr:Research_university dbr:Private_university }}
    {university_name} dbp:type ?uni_type.

    ?o2 rdf:type dbo:Organisation.

    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {university_name} dbo:affiliation ?o2 }}
    }} LIMIT 1"""
    
    s, p, o = get_result(sparql, query)

    return s, p, o

In [None]:
df_uni_positiv = positive_university(sparql)

In [None]:
# Get negative examples for university chancellor

df_uni_chan = pd.DataFrame([], columns=['subject', 'o1', 'p2', 'o2'])
df_uni_chan_positiv = read_data('positive/uni_chancellor.csv')
for i, (subject, predicate, object) in df_uni_chan_positiv.iterrows():
    s2, p2, o2 = negative_university_chancellor(sparql, '<'+subject+'>')
    if(o2!=''):
        df_uni_chan = df_uni_chan.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_uni_chan[df_uni_chan['p2']!=''])>=100:
        break

convert_to_csv(df_uni_chan[['subject','o2']], 'https://dbpedia.org/ontology/chancellor', 0, 'harder_negative/university_chancellor_negative.csv')    

In [None]:
# Get negative examples for university affiliation

df_uni_aff = pd.DataFrame([], columns=['subject', 'o1', 'p2', 'o2'])
df_uni_aff_positiv = read_data('positive/uni_affiliation.csv')
for i, (subject, predicate, object) in df_uni_aff_positiv.iterrows():
    s2, p2, o2 = negative_university_affiliation(sparql, '<'+subject+'>')
    if(o2!=''):
        df_uni_aff = df_uni_aff.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_uni_aff[df_uni_aff['p2']!=''])>=100:
        break

convert_to_csv(df_uni_aff[['subject','o2']], 'https://dbpedia.org/ontology/affiliation', 0, 'harder_negative/university_affiliation_negative.csv')    

In [None]:
# Get negative examples for university city

df_uni_city = pd.DataFrame([], columns=['subject', 'p1', 'o1', 'p2', 'o2'])
df_uni_city_positiv = read_data('positive/uni_city.csv')
for i, (subject, predicate, object) in df_uni_city_positiv.iterrows():
    s2, p2, o2 = negative_university_city(sparql, '<'+subject+'>')
    if(o2!=''):
        df_uni_city = df_uni_city.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_uni_city[df_uni_city['p2']!=''])>=100:
        break
        
convert_to_csv(df_uni_city[['subject','o2']], 'https://dbpedia.org/ontology/city', 0, 'harder_negative/university_city_negative.csv')

In [None]:
# This function queries DBpedia and take out positive facts for movie related relations and store it in dataframes 
# The dataframes are later cleaned and converted to csv files
def positive_movie(sparql):
    df = pd.DataFrame([], columns = ['movie', 'producer', 'director', 'starring', 'productionCompany'])
    query = """PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbp: <http://dbpedia.org/property/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    SELECT  ?movie ?director ?producer ?pCompany ?star
        WHERE {
            ?movie dbp:director ?director;
                dbp:producer ?producer;
                dbo:productionCompany ?pCompany;
                dbo:starring ?star.
            ?producer a owl:Thing.
            ?director a owl:Thing.
            ?pCompany a owl:Thing.
        } LIMIT 500
    """
    sparql.setQuery(query)
    try :
        sparql.setReturnFormat(JSON)
        triple = sparql.query().convert()
        for i in triple["results"]["bindings"]:
            df = df.append({'movie': i['movie']['value'], 'director':i['director']['value'], 
                            'producer':i['producer']['value'], 'starring':i['star']['value'], 
                            'productionCompany': i['pCompany']['value']}, ignore_index = True)
    except Exception as e: print(e)
        
    df_producer = df[['movie','producer']].sort_values('movie', ascending=False).drop_duplicates(subset='movie', keep='first')
    convert_to_csv(df_producer, 'https://dbpedia.org/ontology/producer', 1, 'positive/movie_producer.csv')
    
    df_director = df[['movie','director']].sort_values('movie', ascending=False).drop_duplicates(subset='movie', keep='first')
    convert_to_csv(df_director, 'https://dbpedia.org/ontology/director', 1, 'positive/movie_director.csv')
    
    df_starring = df[['movie','starring']].sort_values('movie', ascending=False).drop_duplicates(subset='movie', keep='first')
    convert_to_csv(df_starring, 'https://dbpedia.org/ontology/starring', 1, 'positive/movie_starring.csv')    
    
    df_prodCompany = df[['movie','productionCompany']].sort_values('movie', ascending=False).drop_duplicates(subset='movie', keep='first')
    convert_to_csv(df_prodCompany, 'https://dbpedia.org/ontology/productionCompany', 1, 'positive/movie_productionCompany.csv')  
    
    return df



In [None]:
# The functions below queries dbpedia to find related negative examples for the given subject. 
def negative_movie_producer(sparql, movie_name):
    query = f"""CONSTRUCT {{
    {movie_name} ?p2 ?o2.
    }} WHERE {{
    {movie_name} dbo:producer ?o1;
        ?p2 ?o2;
         rdf:type dbo:Film.

    ?o2 rdf:type dbo:Person.

    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {movie_name} dbo:producer ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

def negative_movie_director(sparql, movie_name):
    query = f"""CONSTRUCT {{
    {movie_name} ?p2 ?o2.
    }} WHERE {{
    {movie_name} dbo:director ?o1;
        ?p2 ?o2;
         rdf:type dbo:Film.

    ?o2 rdf:type dbo:Person.

    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {movie_name} dbo:director ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

def negative_movie_pc(sparql, movie_name):
    query = f"""CONSTRUCT {{
    {movie_name} ?p2 ?o2.
    }} WHERE {{
    {movie_name} dbo:productionCompany ?o1;
        ?p2 ?o2;
         rdf:type dbo:Film.
    ?o2 rdf:type dbo:Company.

    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {movie_name} dbo:productionCompany ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

def negative_movie_starring(sparql, movie_name):
    query = f"""CONSTRUCT {{
    {movie_name} ?p2 ?o2.
    }} WHERE {{
    {movie_name} dbo:starring ?o1;
        ?p2 ?o2;
         rdf:type dbo:Film.
    ?o2 rdf:type dbo:Person.

    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {movie_name} dbo:starring ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

In [None]:
df_movie_positiv = positive_movie(sparql)

In [None]:
# Get negative examples for movie producer

df_movie_producer = pd.DataFrame([], columns=['subject', 'o1', 'p1', 'p2', 'o2'])
df_movie_producer_positiv = read_data('positive/movie_producer.csv')
for i, (subject, predicate, object) in df_movie_producer_positiv.iterrows():
    s2, p2, o2 = negative_movie_producer(sparql, '<'+subject+'>')
    if(o2!=''):
        df_movie_producer = df_movie_producer.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_movie_producer[df_movie_producer['p2']!=''])>=100:
        break
        
convert_to_csv(df_movie_producer[['subject','o2']], 'https://dbpedia.org/ontology/producer', 0, 'harder_negative/movie_producer_negative.csv')

In [None]:
# Get negative examples for movie director

df_movie_director = pd.DataFrame([], columns=['subject', 'p1', 'o1', 'p2', 'o2'])
df_movie_director_positiv = read_data('positive/movie_director.csv')
for i, (subject, predicate, object) in df_movie_director_positiv.iterrows():
    s2, p2, o2 = negative_movie_director(sparql, '<'+subject+'>')
    if(o2!=''):
        df_movie_director = df_movie_director.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_movie_director[df_movie_director['p2']!=''])>=100:
        break
        
convert_to_csv(df_movie_director[['subject','o2']], 'https://dbpedia.org/ontology/director', 0, 'harder_negative/movie_director_negative.csv')

In [None]:
# Get negative examples for movie production company

df_movie_pc = pd.DataFrame([], columns=['subject', 'p1', 'o1', 'p2', 'o2'])
df_movie_pc_positiv = read_data('positive/movie_productionCompany.csv')
for i, (subject, predicate, object) in df_movie_pc_positiv.iterrows():
    s2, p2, o2 = negative_movie_pc(sparql, '<'+subject+'>')
    if(o2!=''):
        df_movie_pc = df_movie_pc.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_movie_pc[df_movie_pc['p2']!=''])>=100:
        break
        
convert_to_csv(df_movie_pc[['subject','o2']], 'https://dbpedia.org/ontology/productionCompany', 0, 'harder_negative/movie_productionCompany_negative.csv')  

In [None]:
# Get negative examples for movie starring

df_movie_starring = pd.DataFrame([], columns=['subject', 'p1', 'o1', 'p2', 'o2'])
df_movie_starring_positiv = read_data('positive/movie_productionCompany.csv')
for i, (subject, predicate, object) in df_movie_starring_positiv.iterrows():
    s2, p2, o2 = negative_movie_starring(sparql, '<'+subject+'>')
    if(o2!=''):
        df_movie_starring = df_movie_starring.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_movie_starring[df_movie_starring['p2']!=''])>=100:
        break

convert_to_csv(df_movie_starring[['subject','o2']], 'https://dbpedia.org/ontology/starring', 0, 'harder_negative/movie_starring_negative.csv')    

In [None]:
len(df_movie_pc[df_movie_pc['p2']!=''])

In [None]:
# This function queries DBpedia and take out positive facts for scientist related relations and store it in dataframes 
# The dataframes are later cleaned and converted to csv files
def positive_scientist(sparql):
    df = pd.DataFrame([], columns = ['scientist', 'acadDiscipline', 'award', 'birthPlace', 'deathPlace'])
    query = """PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX dbp: <http://dbpedia.org/property/>
    PREFIX dbr: <http://dbpedia.org/resource/>
    SELECT  DISTINCT ?scientist ?disc ?award ?birthPlace ?deathPlace
        WHERE {
            VALUES ?university { dbr:Public_university dbr:Research_university dbr:Private_university }
            ?education dbp:type ?university.
            ?scientist a dbo:Person;
                dbo:academicDiscipline ?disc;
                dbo:award ?award;
                dbo:birthPlace ?birthPlace;
                dbo:deathPlace ?deathPlace.
        } LIMIT 5000
    """
    sparql.setQuery(query)
    try :
        sparql.setReturnFormat(JSON)
        triple = sparql.query().convert()
        for i in triple["results"]["bindings"]:
            df = df.append({'scientist': i['scientist']['value'], 'acadDiscipline':i['disc']['value'], 
                            'award':i['award']['value'], 'birthPlace':i['birthPlace']['value'], 
                            'deathPlace': i['deathPlace']['value']}, ignore_index = True)
    except Exception as e: print(e)

    df_acadDiscipline = df[['scientist','acadDiscipline']].sort_values('scientist', ascending=False).drop_duplicates(subset='scientist', keep='first')
    convert_to_csv(df_acadDiscipline, 'https://dbpedia.org/ontology/academicDiscipline', 1, 'positive/scientist_acadDiscipline.csv')
  
    df_award = df[['scientist','award']].sort_values('scientist', ascending=False).drop_duplicates(subset='scientist', keep='first')
    convert_to_csv(df_award, 'https://dbpedia.org/ontology/award', 1, 'positive/scientist_award.csv')
    
    df_birthPlace = df[['scientist', 'birthPlace']].sort_values('scientist', ascending=False).drop_duplicates(subset='scientist', keep='first')
    convert_to_csv(df_birthPlace, 'https://dbpedia.org/property/birthPlace', 1, 'positive/scientist_birthplace.csv')
    
    df_deathPlace = df[['scientist', 'deathPlace']].sort_values('scientist', ascending=False).drop_duplicates(subset='scientist', keep='first')
    convert_to_csv(df_deathPlace, 'https://dbpedia.org/ontology/deathPlace', 1, 'positive/scientist_deathPlace.csv')
        
    return df



In [None]:
# The functions below queries dbpedia to find related negative examples for the given subject. 
def negative_scientist_acad(sparql, scientist_name):
    query = f"""CONSTRUCT {{
    {scientist_name} ?p2 ?o2.
    }} WHERE {{
    {scientist_name} dbo:academicDiscipline ?o1;
        ?p2 ?o2;
        rdf:type dbo:Person.
    ?o2 a dbo:Book.
    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {scientist_name} dbo:academicDiscipline ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

def negative_scientist_award(sparql, scientist_name):
    query = f"""CONSTRUCT {{
    {scientist_name} ?p2 ?o2.
    }} WHERE {{
    {scientist_name} dbo:award ?o1;
        ?p2 ?o2;
        rdf:type dbo:Person.
    ?o2 a dbo:Award.
    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {scientist_name} dbo:award ?o2 }}
    FILTER ( ?p2 != rdf:type)
    }} LIMIT 1"""
    
    return get_result(sparql, query)

# def negative_scientist_edu(sparql, scientist_name):
#     query = f"""CONSTRUCT {{
#     {scientist_name} ?p2 ?o2.
#     }} WHERE {{
#     VALUES ?university {{ dbr:Public_university dbr:Research_university dbr:Private_university }}
#     ?o1 dbp:type ?university.
#     ?o2 dbp:type ?university.
#     {scientist_name} dbp:education ?o1;
#         ?p2 ?o2;
#         rdf:type dbo:Person.
#     FILTER ( ?o1 != ?o2 ).
#     FILTER not exists {{ {scientist_name} dbp:education ?o2 }}
#     FILTER ( ?p2 != rdf:type)
#     }} LIMIT 1"""
    
#     return get_result(sparql, query)

def negative_scientist_birth(sparql, scientist_name):
    query = f"""CONSTRUCT {{
    {scientist_name} ?p2 ?o2.
    }} WHERE {{
    {scientist_name} dbo:birthPlace ?o1;
        ?p2 ?o2;
        rdf:type dbo:Person.
    ?o2 a dbo:Place.
    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {scientist_name} dbo:birthPlace ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

def negative_scientist_death(sparql, scientist_name):
    query = f"""CONSTRUCT {{
    {scientist_name} ?p2 ?o2.
    }} WHERE {{
    {scientist_name} dbo:deathPlace ?o1;
        ?p2 ?o2;
        rdf:type dbo:Person.
    ?o2 a dbo:Place.
    FILTER ( ?o1 != ?o2 ).
    FILTER not exists {{ {scientist_name} dbo:deathPlace ?o2 }}
    }} LIMIT 1"""
    
    return get_result(sparql, query)

In [None]:
df_scientist_positiv = positive_scientist(sparql)

In [None]:
# Get negative examples for scientist academic discipline

df_scientist_acad = pd.DataFrame([], columns=['subject', 'p1', 'o1', 'p2', 'o2'])
df_scientist_acad_positiv = read_data('positive/scientist_acadDiscipline.csv')
for i, (subject, predicate, object) in df_scientist_acad_positiv.iterrows():
    s2, p2, o2 = negative_scientist_acad(sparql, '<'+subject+'>')
    if(o2!=''):
        df_scientist_acad = df_scientist_acad.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_scientist_acad[df_scientist_acad['p2']!=''])>=100:
        break

convert_to_csv(df_scientist_acad[['subject','o2']], 'https://dbpedia.org/ontology/academicDiscipline', 0, 'harder_negative/scientist_academicDiscipline_negative.csv')

In [None]:
# Get negative examples for scientist academic awards

df_scientist_award = pd.DataFrame([], columns=['subject', 'p1', 'o1', 'p2', 'o2'])
df_scientist_award_positiv = read_data('positive/scientist_award.csv')
for i, (subject, predicate, object) in df_scientist_award_positiv.iterrows():
    s2, p2, o2 = negative_scientist_award(sparql, '<'+subject+'>')
    if(o2!=''):
        df_scientist_award = df_scientist_award.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_scientist_award[df_scientist_award['p2']!=''])>=100:
        break

convert_to_csv(df_scientist_award[['subject','o2']], 'https://dbpedia.org/ontology/award', 0, 'harder_negative/scientist_award_negative.csv')

In [None]:
# Get negative examples for scientist academic birthplace

df_scientist_birthplace = pd.DataFrame([], columns=['subject', 'p1', 'o1', 'p2', 'o2'])
df_scientist_birthplace_positiv = read_data('positive/scientist_birthplace.csv')
for i, (subject, predicate, object) in df_scientist_birthplace_positiv.iterrows():
    s2, p2, o2 = negative_scientist_birth(sparql, '<'+subject+'>')
    if(o2!=''):
        df_scientist_birthplace = df_scientist_birthplace.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
    if len(df_scientist_birthplace[df_scientist_birthplace['p2']!=''])>=100:
        break
        
convert_to_csv(df_scientist_birthplace[['subject','o2']], 'https://dbpedia.org/property/birthPlace', 0, 'harder_negative/scientist_birthPlace_negative.csv')

In [None]:
# Get negative examples for scientist academic deathplace

df_scientist_deathplace = pd.DataFrame([], columns=['subject', 'p1', 'o1', 'p2', 'o2'])
df_scientist_deathplace_positiv = read_data('positive/scientist_deathPlace.csv')
for i, (subject, predicate, object) in df_scientist_deathplace_positiv.iterrows():
    s2, p2, o2 = negative_scientist_death(sparql, '<'+subject+'>')
    if(o2!=''):
        df_scientist_deathplace = df_scientist_deathplace.append({'subject': subject, 'o1':object, 'o2':o2, 'p2':p2}, ignore_index = True)
#     if len(df_scientist_deathplace[df_scientist_deathplace['p2']!=''])>=100:
#         break
        
convert_to_csv(df_scientist_deathplace[['subject','o2']], 'https://dbpedia.org/ontology/deathPlace', 0, 'harder_negative/scientist_deathPlace_negative.csv')

In [None]:
df_scientist_positiv.drop_duplicates()

In [None]:
df_scientist_deathplace

In [None]:
df_scientist_deathplace_positiv = read_data('positive/scientist_deathPlace.csv')
df_scientist_deathplace_positiv