# Scrap notebook for testing and debugging

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from data_loader import Data_Loader
from annotator import Annotator
import time
from scraper import Reddit

In [None]:
# A list of urls that of submissions that you want to add to your graph. 
# These should be top level posts (not links to comments)
submissions = ['https://www.reddit.com/r/sanfrancisco/comments/7r3cy3/how_the_san_francisco_school_lottery_works_and/' ]
# Below is the full list of submissions I'm currently using for the school choice project
# submissions = [
#     'https://www.reddit.com/r/sanfrancisco/comments/bs5f69/just_had_the_elementary_school_lottery_explained/',
#     'https://www.reddit.com/r/sanfrancisco/comments/7r3cy3/how_the_san_francisco_school_lottery_works_and/',
#     'https://www.reddit.com/r/sanfrancisco/comments/4ah4no/fuck_the_sf_school_lottery_thats_all/',
#     'https://www.reddit.com/r/sanfrancisco/comments/b5kbse/how_the_student_assignment_system_works_sfusd/',
#     'https://www.reddit.com/r/sanfrancisco/comments/9hh9z8/two_sf_school_board_members_to_introduce/',
#     'https://www.reddit.com/r/sanfrancisco/comments/4646v8/experience_with_enrolling_in_sfusd_school/',
#     'https://www.reddit.com/r/sanfrancisco/comments/a5nrej/sf_school_board_plans_to_replace_muchcriticized/',
#     'https://www.reddit.com/r/sanfrancisco/comments/bhcxhb/san_francisco_had_an_ambitious_plan_to_tackle/',
#     'https://www.reddit.com/r/sanfrancisco/comments/5e5834/i_made_a_website_of_sf_elementary_school_test/',
#     'https://www.reddit.com/r/sanfrancisco/comments/cg5coh/sfusd_kindergarten/'
# ]

In [4]:
dl = Data_Loader()

In [3]:
dl.clear_graph()

In [4]:
comment_url = ['https://www.reddit.com/r/sanfrancisco/comments/7r3cy3/how_the_san_francisco_school_lottery_works_and/dstzrg6?utm_source=share&utm_medium=web2x']

In [5]:
dl.load_from_comment(comment_url)

In [3]:
from nodes import Comment

In [4]:
c = Comment.match(dl.graph, "dstzrg6").first()

In [5]:
c.text

"I'm a millionaire, but I live in a CTIP block.  That's San Francisco for you. "

In [6]:
a = Annotator()

In [8]:
a.annotate("algorithm: tie-breakers: CTIP1; another code: subcode; algorithm: somethingelse", c, c.text)

# Text indexing

In [5]:
dl.graph.evaluate(
                "CALL db.index.fulltext.createNodeIndex(\"contentIndex\",[\"Submission\", \"Comment\"],[\"title\", \"text\"])"
            )

In [10]:
c = dl.graph.run(
    f"CALL db.index.fulltext.queryNodes(\"contentIndex\", \"the lottery\")"
).data()

In [11]:
c

[{'node': (_595:Submission {created_time: 1516215974.0, id: '7r3cy3', link: '/r/sanfrancisco/comments/7r3cy3/how_the_san_francisco_school_lottery_works_and/', score: 32, text: '', title: 'How the San Francisco School Lottery Works, And How It Doesn\u2019t', upvote_ratio: 0.86, url: 'https://ww2.kqed.org/news/2018/01/11/how-the-san-francisco-school-lottery-works-and-how-it-doesnt-2/'}),
  'score': 1.0679931640625},
 {'node': (_108:Comment {created_time: 1516251944.0, id: 'dsus810', link: '/r/sanfrancisco/comments/7r3cy3/how_the_san_francisco_school_lottery_works_and/dsus810/', score: 3, submission: '7r3cy3', text: "> The lottery is absolutely not random.\n\nIf there's a (random) chance that you won't get your first preference, then the lottery *is* random. ", top_level: false}),
  'score': 0.2881859540939331},
 {'node': (_675:Comment {created_time: 1516249579.0, id: 'dsuqip6', link: '/r/sanfrancisco/comments/7r3cy3/how_the_san_francisco_school_lottery_works_and/dsuqip6/', score: 3, subm

# Timing experiments

In [None]:
dl.clear_graph()

In [None]:
tic = time.perf_counter()
dl.load_submissions(submissions)
toc = time.perf_counter()
print(f"Added submission in {toc - tic:0.4f} seconds")

In [None]:
dl.clear_graph()

In [None]:
dl.graph.schema.drop_uniqueness_constraint("User", "id")
dl.graph.schema.drop_uniqueness_constraint("Submission", "id")
dl.graph.schema.drop_uniqueness_constraint("Subreddit", "id")

In [None]:
tic = time.perf_counter()
dl.load_submissions(submissions)
toc = time.perf_counter()
print(f"Downloaded the tutorial in {toc - tic:0.4f} seconds")