In [1]:
import re
from neo4j import GraphDatabase
from selenium import webdriver
import pandas as pd

driver = webdriver.Chrome()
neo4j_driver = GraphDatabase.driver('bolt://localhost:7687', auth=('neo4j', 'letmein'))

In [87]:
books = [('Harry_Potter_and_the_Philosopher', 'https://warnerbros.fandom.com/wiki/Harry_Potter_and_the_Philosopher%27s_Stone/Transcript'),
         ('Harry Potter and the Chamber of Secrets', 'https://warnerbros.fandom.com/wiki/Harry_Potter_and_the_Chamber_of_Secrets/Transcript'),
         ('Harry Potter and the Prisoner of Azkaban','https://warnerbros.fandom.com/wiki/Harry_Potter_and_the_Prisoner_of_Azkaban/Transcript'),
         ('Harry Potter and the Order of the Phoenix', 'https://warnerbros.fandom.com/wiki/Harry_Potter_and_the_Order_of_the_Phoenix/Transcript'),
         ('Harry Potter and the Half-Blood Prince', 'https://warnerbros.fandom.com/wiki/Harry_Potter_and_the_Half-Blood_Prince/Transcript'),
         ('Harry Potter and the Deathly Hallows – Part 2', 'https://warnerbros.fandom.com/wiki/Harry_Potter_and_the_Deathly_Hallows_%E2%80%93_Part_2/Transcript')]

In [89]:
def store_to_neo4j(book):
    data = []
    title = book[0]
    url = book[1]
    driver.get(url)
    elem = driver.find_element_by_class_name("mw-parser-output")
    h2 = elem.find_elements_by_tag_name('h2')
    for i in range(1,len(h2) - 1):
        scene = h2[i].text
        try:
            content = elem.find_elements_by_xpath(f"//p[./preceding-sibling::h2[.='{h2[i].text}']][./following-sibling::h2[.='{h2[i + 1].text}']] | dl[./preceding-sibling::h2[.='{h2[i].text}']][./following-sibling::h2[.='{h2[i + 1].text}']]")
            for section in content:
                # Skip scene setting in the middle of the text
                clean_content = re.sub("[\(\[].*?[\)\]]", "", section.text)

                for row in clean_content.split('\n'):
                    if row:
                        character = row.split(':')[0].strip()
                        text = row.split(':')[1].strip()
                        data.append({'book':title, 'scene': scene, 'character':character, 'text':text})
        except:
            pass
    with neo4j_driver.session() as session:
        session.run("""
        UNWIND $data as row
        MERGE (b:Book{title:row.book})
        MERGE (s:Scene{title:row.scene})
        MERGE (c:Character{name:row.character})
        CREATE (l:Line{text:row.text})
        MERGE (c)-[:SPOKE]->(l)
        MERGE (l)-[:IN_SCENE]->(s)
        MERGE (s)-[:IN_BOOK]->(b)
        """, {'data':data})

In [90]:
for book in books:
    store_to_neo4j(book)

In [3]:
def read_query(query):
    with neo4j_driver.session() as session:
        result = session.run(query)
        return pd.DataFrame([r.values() for r in result], columns=result.keys())

In [96]:
read_query("""
MATCH (c1:Character),(c2:Character)
WHERE id(c1) <> id(c2) AND c1.name = trim(replace(c2.name, 'Professor', ''))
RETURN c1.name, c2.name
""")

Unnamed: 0,c1.name,c2.name
0,Dumbledore,Professor Dumbledore
1,McGonagall,Professor McGonagall


In [100]:
read_query("""
MATCH (c1:Character),(c2:Character)
WHERE id(c1) <> id(c2) AND c1.name = trim(replace(c2.name, 'Professor', ''))
CALL apoc.refactor.mergeNodes([c2,c1]) YIELD node
RETURN distinct 'done'
""")

Unnamed: 0,'distinct done'
0,distinct done
1,distinct done


In [101]:
read_query("""
MATCH (c:Character)
WHERE c.name CONTAINS "Neville"
RETURN c.name as character
""")

Unnamed: 0,character
0,Neville
1,Neville Longbottom


In [102]:
read_query("""
MATCH (c:Character)
WHERE c.name CONTAINS "Neville"
WITH collect(c) as nodes
CALL apoc.refactor.mergeNodes(nodes) YIELD node
RETURN distinct 'done'
""")

Unnamed: 0,'done'
0,done


In [103]:
read_query("""
MATCH (c:Character)
WHERE c.name CONTAINS "Man in"
WITH collect(c) as nodes
CALL apoc.refactor.mergeNodes(nodes) YIELD node
RETURN distinct 'done'
""")

Unnamed: 0,'done'
0,done


In [104]:
read_query("""
MATCH (c:Character)
WHERE c.name CONTAINS "Death Eater"
WITH collect(c) as nodes
CALL apoc.refactor.mergeNodes(nodes) YIELD node
RETURN distinct 'done'
""")

Unnamed: 0,'done'
0,done


In [108]:
read_query("""
MATCH (c:Character)
WHERE size(c.name) > 100
DETACH DELETE c
""")

In [110]:
read_query("""
MATCH (c:Character)
WHERE c.name contains "All"
DETACH DELETE c
""")

In [114]:
read_query("""
MATCH (c:Character)
WHERE c.name contains "and " 
DETACH DELETE c
""")

In [None]:
read_query("""
MATCH (c:Character)-[:SPOKE]->()-[:IN_SCENE]->()<-[:IN_SCENE]-()<-[:SPOKE]-(c1:Character)
WHERE id(c) < id(c1)
WITH c,c1,count(*) as total_interactions
MERGE (c)-[r:INTERACTS]-(c1)
SET r.weight = total_interactions""")

In [117]:
read_query("""
CALL gds.pageRank.write({
    nodeProjection:'Character',
    relationshipProjection:{INTERACTS:{orientation:'UNDIRECTED', properties:'weight'}},
    writeProperty:'pagerank',
    relationshipWeightProperty:'weight'
})
""")

Unnamed: 0,writeMillis,nodePropertiesWritten,ranIterations,didConverge,centralityDistribution,postProcessingMillis,createMillis,computeMillis,configuration
0,116,75,20,False,"{'p99': 8.108580589294434, 'min': 0.1521606445...",64,27,55,"{'maxIterations': 20, 'writeConcurrency': 4, '..."


In [118]:
read_query("""
CALL gds.louvain.write({
    nodeProjection:'Character',
    relationshipProjection:{INTERACTS:{orientation:'UNDIRECTED', properties:'weight'}},
    writeProperty:'louvain',
    relationshipWeightProperty:'weight'
})
""")

Unnamed: 0,writeMillis,nodePropertiesWritten,modularity,modularities,ranLevels,communityCount,communityDistribution,postProcessingMillis,createMillis,computeMillis,configuration
0,16,75,0.150615,"[0.1476574385046393, 0.15061480279678685]",2,6,"{'p99': 33, 'min': 4, 'max': 33, 'mean': 12.5,...",10,31,200,"{'maxIterations': 10, 'writeConcurrency': 4, '..."


In [7]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

In [4]:
sentiment_data = read_query("""
MATCH (l:Line)
RETURN id(l) as id, l.text as text
""")

In [8]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/tomaz/nltk_data...


True

In [9]:
sia = SentimentIntensityAnalyzer()

In [12]:
for i, row in sentiment_data.iterrows():
    print(row['text'], sia.polarity_scores(row['text']))

I should have known that you would be here...Professor McGonagall {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Good evening, Professor Dumbledore. Are the rumors true, Albus? {'neg': 0.0, 'neu': 0.551, 'pos': 0.449, 'compound': 0.6908}
I'm afraid so, Professor. The good, and the bad {'neg': 0.256, 'neu': 0.512, 'pos': 0.232, 'compound': -0.0865}
And the boy? {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Hagrid is bringing him. {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Do you think it wise to trust Hagrid with something as important as this? {'neg': 0.0, 'neu': 0.573, 'pos': 0.427, 'compound': 0.802}
Ah, Professor, I would trust Hagrid with my life. {'neg': 0.0, 'neu': 0.68, 'pos': 0.32, 'compound': 0.5106}
Professor Dumbledore, Sir. Professor McGonagall. {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
No problems, I trust, Hagrid? {'neg': 0.533, 'neu': 0.109, 'pos': 0.359, 'compound': -0.1531}
No, sir. Little tyke fell asleep just as we were flying ov

W-what do you m-mean? {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
You know perfectly well what I mean.  We'll have another chat soon...when you've had time to decide where your loyalties lie. {'neg': 0.0, 'neu': 0.649, 'pos': 0.351, 'compound': 0.8481}
Oh, Professors. I found this, in the Restricted Section. It's still hot. That means there's a student out of bed. {'neg': 0.133, 'neu': 0.867, 'pos': 0.0, 'compound': -0.3818}
Mum?  Dad? {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Ron! You've really got to see this! Ron! You've got to see this!  Ron, Ron, come on. Get out of bed! {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Why? {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
There's something you've got to see. Now, come on! {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Come on. Come. Come look, it's my parents! {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
I only see me. {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Look in pro

I killed Sirius Black. Ha ha ha! You coming to get me? {'neg': 0.237, 'neu': 0.369, 'pos': 0.394, 'compound': 0.2481}
Crucio! {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
You've got to mean it, Harry. She killed him. She deserves it. You know the spell, Harry.  Do it.  So weak. {'neg': 0.288, 'neu': 0.712, 'pos': 0.0, 'compound': -0.8268}
It was foolish of you to come here tonight, Tom. The Aurors are on their way. {'neg': 0.123, 'neu': 0.877, 'pos': 0.0, 'compound': -0.2732}
By which time I shall be gone. And you... shall be dead. {'neg': 0.301, 'neu': 0.699, 'pos': 0.0, 'compound': -0.6486}
They're here! Search everywhere. Look down by the stables! You two, come with me! Any sign?  Potter! {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Many of you are surely wondering why I have summoned you at this hour. It has come to my attention that earlier this evening, Harry Potter was sighted in Hogsmeade.  Now, should anyone, student or staff, attempt to aid Mr. Potter, they 

In [16]:
from hatesonar import Sonar
sonar = Sonar()



In [23]:
for i, row in sentiment_data.iterrows():
    data = sonar.ping(text=row['text'])
    print(data)
    if data['top_class'] == 'neither':
        continue
    for j in data['classes']:
        if j['confidence'] > 0.5:
            print(row['text'], j['class_name'], j['confidence'])

{'text': 'I should have known that you would be here...Professor McGonagall', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.10641463747328841}, {'class_name': 'offensive_language', 'confidence': 0.4015076538661645}, {'class_name': 'neither', 'confidence': 0.49207770866054706}]}
{'text': 'Good evening, Professor Dumbledore. Are the rumors true, Albus?', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.054301561822280006}, {'class_name': 'offensive_language', 'confidence': 0.2899966179163342}, {'class_name': 'neither', 'confidence': 0.6557018202613858}]}
{'text': "I'm afraid so, Professor. The good, and the bad", 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.03445009552215627}, {'class_name': 'offensive_language', 'confidence': 0.30890789300002847}, {'class_name': 'neither', 'confidence': 0.6566420114778152}]}
{'text': 'And the boy?', 'top_class': 'neither', 'classes': [{'class_name':

{'text': "He killed my parents, didn't he? The one who gave me this. You know, Hagrid, I know you do.", 'top_class': 'offensive_language', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.04082479172101252}, {'class_name': 'offensive_language', 'confidence': 0.5320232100688999}, {'class_name': 'neither', 'confidence': 0.42715199821008765}]}
He killed my parents, didn't he? The one who gave me this. You know, Hagrid, I know you do. offensive_language 0.5320232100688999
{'text': "First, and understand this, Harry, 'cause it's very important. Not all wizards are good. Some of them go bad. A few years ago, there was one wizard who went as bad as you can go. And his name was V-...his name was V-...", 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.06231021952064968}, {'class_name': 'offensive_language', 'confidence': 0.38274678402305684}, {'class_name': 'neither', 'confidence': 0.5549429964562935}]}
{'text': 'Maybe if you wrote it down?', 'top_clas

{'text': "Harry, no way! You heard what Madam Hooch said! Besides, you don't even know how to fly.  What an idiot.", 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.06618853024569611}, {'class_name': 'offensive_language', 'confidence': 0.3808565614063555}, {'class_name': 'neither', 'confidence': 0.5529549083479484}]}
{'text': "Give it here, Malfoy, or I'll knock you off your broom!", 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.12489176760156499}, {'class_name': 'offensive_language', 'confidence': 0.322476905531093}, {'class_name': 'neither', 'confidence': 0.552631326867342}]}
{'text': 'Is that so?  Have it your way, then!', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.07896225652569824}, {'class_name': 'offensive_language', 'confidence': 0.4200341635929939}, {'class_name': 'neither', 'confidence': 0.5010035798813078}]}
{'text': 'Good job, Harry!', 'top_class': 'neither', 'classe

{'text': "Look in properly. Go on. Stand there. There. You see them, don't you? Thats my dad", 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.059022200860052734}, {'class_name': 'offensive_language', 'confidence': 0.38770244812524546}, {'class_name': 'neither', 'confidence': 0.5532753510147019}]}
{'text': "That's me! Only, I'm head boy...and I'm holding the Quidditch cup! And bloody hell, I'm Quidditch Captain too! I look good. Harry, do you think this mirror shows the future?", 'top_class': 'offensive_language', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.03970262307214962}, {'class_name': 'offensive_language', 'confidence': 0.497931874777989}, {'class_name': 'neither', 'confidence': 0.46236550214986155}]}
{'text': 'How can it? Both my parents are dead.', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.04619560305062565}, {'class_name': 'offensive_language', 'confidence': 0.3618275532172072}, {'class_na

{'text': 'fifty points!', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.03945320844208391}, {'class_name': 'offensive_language', 'confidence': 0.33037608806864915}, {'class_name': 'neither', 'confidence': 0.630170703489267}]}
{'text': 'Good job.', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.03847136223752934}, {'class_name': 'offensive_language', 'confidence': 0.3307137900015447}, {'class_name': 'neither', 'confidence': 0.6308148477609259}]}
{'text': 'Second, to Mr. Ronald Weasley, for the best played game of chess that Hogwarts has seen these many years', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.040067119386598314}, {'class_name': 'offensive_language', 'confidence': 0.3240514853576187}, {'class_name': 'neither', 'confidence': 0.635881395255783}]}
{'text': "We're tied with Slytherin!", 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.043144

{'text': 'This is it.  This is it, Ron. I think this is the entrance to the Chamber of Secrets.', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.05286007626171228}, {'class_name': 'offensive_language', 'confidence': 0.3077618401766955}, {'class_name': 'neither', 'confidence': 0.6393780835615922}]}
{'text': 'Say something. Harry, say something in Parseltongue.', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.03945320844208391}, {'class_name': 'offensive_language', 'confidence': 0.33037608806864915}, {'class_name': 'neither', 'confidence': 0.630170703489267}]}
{'text': 'Hesha- Hassah.', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.03945320844208391}, {'class_name': 'offensive_language', 'confidence': 0.33037608806864915}, {'class_name': 'neither', 'confidence': 0.630170703489267}]}
{'text': "Excellent, Harry. Ah, good work. Well, then, I'll just be, uh... there's no need for me to s

{'text': 'Sirius, what are you doing here? If somebody sees you...', 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.13571644987562878}, {'class_name': 'offensive_language', 'confidence': 0.37160492236290243}, {'class_name': 'neither', 'confidence': 0.4926786277614688}]}
{'text': "I had to see you off, didn't I?  What's life without a little risk?", 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.04314393711944203}, {'class_name': 'offensive_language', 'confidence': 0.42858860959610573}, {'class_name': 'neither', 'confidence': 0.5282674532844521}]}
{'text': "I don't want to see you get chucked back in Azkaban.", 'top_class': 'neither', 'classes': [{'class_name': 'hate_speech', 'confidence': 0.06993460389878954}, {'class_name': 'offensive_language', 'confidence': 0.3852715527632747}, {'class_name': 'neither', 'confidence': 0.5447938433379359}]}
{'text': "I'm surprised the Ministry's still letting you walk around free, P