# Getting "context chunks"

For a specified character index range, let's retrieve some of the context left and right of that range.


# Initial setup

In [27]:
import pandas as pd
from text_matcher.matcher import Text, Matcher

In [28]:
# ACTION: copy path to results JSONL file here (filename should end "_results_[hyperparameters].jsonl")

startData = "/Users/milan/Library/CloudStorage/GoogleDrive-mtt2126@columbia.edu/My Drive/iAnnotate/MIT/Quotable Content/Data/Barthes/1977_DeathAuthorHeath/Results/Barthes_1977_DeathAuthorHeath_results_t2-c3-n2-m3-nostops.jsonl"

In [29]:
# Infer naming variables from path

textTitle = startData.rsplit("_", 4)[-3]
publicationYear = startData.rsplit("_", 4)[-4]
authorSurname = startData.rsplit("_", 4)[-5]
authorSurname = authorSurname.rsplit("/", 1)[-1]
hyperparSuffix = startData.rsplit("_", 4)[-1]
hyperparSuffix = f"_{hyperparSuffix[:-6]}"
dataDir = startData.rsplit("/", 4)[0]

print(f"Author surname: {authorSurname}\nPublication year: {publicationYear}\nText title: {textTitle}\nHyperparameters suffix: {hyperparSuffix}\nData directory:{dataDir}")

projectName = f"{authorSurname}_{publicationYear}_{textTitle}"
sourceDir = f"{dataDir}/{authorSurname}/{publicationYear}_{textTitle}/SourceText"
corpusDir = f"{dataDir}/{authorSurname}/{publicationYear}_{textTitle}/TargetCorpus"
resultsDir = f"{dataDir}/{authorSurname}/{publicationYear}_{textTitle}/Results"

Author surname: Barthes
Publication year: 1977
Text title: DeathAuthorHeath
Hyperparameters suffix: _t2-c3-n2-m3-nostops
Data directory:/Users/milan/Library/CloudStorage/GoogleDrive-mtt2126@columbia.edu/My Drive/iAnnotate/MIT/Quotable Content/Data


In [4]:
# Load source text

with open(f"{sourceDir}/{projectName}_plaintext.txt") as f: 
    rawText = f.read()

mm = Text(rawText, 'Middlemarch')

In [5]:
# Load in the JSONL file with the full text of JSTOR articles

fulltextDF = pd.read_json(f"{corpusDir}/{projectName}_fulltext.jsonl", lines=True)
print(f"Loaded {len(fulltextDF)} full-text items")

In [14]:
# Load in the JSONL file with the results of text matcher

resultsDF = pd.read_json(f"{startData}", lines=True)
print(f"Loaded {len(resultsDF)} results from text-matcher")

In [15]:
len(resultsDF)

20256

In [18]:
df = pd.merge(fulltextDF, resultsDF, on="id")

In [19]:
df.head

<bound method NDFrame.head of                                                creator_x datePublished_x  \
0                       [ROSEMARIE MORGAN, T. R. Wright]      1990-06-02   
1                                    [Karen L. Thornber]      2009-08-01   
2                                        [MARTIN THOMAS]      2009-12-01   
3                          [Michael Allen, Paul Muldoon]      1995-12-01   
4                                         [SAAM TRIVEDI]      2015-10-01   
...                                                  ...             ...   
20251                           [Daniel Chávez Landeros]      2023-01-01   
20252                                   [MICHAEL THORPE]      1995-10-01   
20253  [Stephen Steele, André Breton, Jean-Pierre Sak...      2004-04-01   
20254                                 [Gaynor Macdonald]      2003-06-01   
20255                                     [Bruce Horner]      1994-10-01   

           docSubType_x docType_x                doi_x  \

In [22]:
# Free up memory from initial separate dataframes

del fulltextDF
del resultsDF

In [23]:
# Drop items with no matches from results dataframe

print(f"Total number of items from JSTOR: {len(df)}")
df = df[df['numMatches']>=1]
print(f"Total number of items with at least one quotation detected: {len(df)}")

Total number of items from JSTOR: 676
Total number of items with at least one quotation detected: 676


In [32]:
df.head

<bound method NDFrame.head of                                         creator_x datePublished_x  \
43                                  [Frank Burke]      1989-10-01   
50                            [Lars-Olof Åhlberg]      1999-04-01   
79                         [Leila Rahimi Bahmany]      2015-01-01   
93                                 [SEÁN TRAVERS]      2018-01-01   
104                              [Kinereth Meyer]      1989-07-01   
...                                           ...             ...   
20138          [Fred Misurella, Alvin J. Seltzer]      1975-10-01   
20172                        [Margarita Tupitsyn]      1993-12-01   
20187                      [Judith May Fathallah]      2017-01-01   
20210           [MARK L. SAMPLE, Matthew K. Gold]      2012-01-01   
20233  [Marshall W. Alcorn, <suffix>Jr.</suffix>]      1987-02-01   

           docSubType_x docType_x                   doi_x  \
43     research-article   article         10.2307/1212711   
50     research-art

In [35]:
df = df.explode(['Locations in A', 'Locations in B'])

In [55]:
len(df)

932

In [58]:
# Replace old indices with new indices
df = df.reset_index()

In [59]:
# Specify chunk size left and right
num_characters_before_quote = 250
num_characters_after_quote = 750

# Create an empty list that we will populate with the contexts for quotations
context_chunks_for_quotations = []
context_chunks_ids = []

In [61]:
# Loop over each of the start and end locations to produce a x-character chunk of context
for item in range(len(df)):
    article_URL = df['id'].iloc[item]
    startandEndLocations = df['Locations in B'].iloc[item]
    print(article_URL)
    print(startandEndLocations[0])
    print(startandEndLocations[1])
    article_index = df[df['id'] == article_URL].index[0]
    print(article_index)
    article_title = df['title_y'].iloc[article_index]
    print(article_title)
#    article_decade = df_with_fulltexts['Decade'].loc[article_index]
#    print(article_decade)
    article_journal = df['isPartOf_x'].iloc[article_index]
    article_text = df['fullText'].iloc[article_index]

    cleaned_article_text = Text(article_text, article_title)
#    if article_journal in list_of_VS_journals:
    #if article_decade in decades_to_check:
    context_chunks_for_quotations.append((cleaned_article_text.text[startandEndLocations[0]-num_characters_before_quote:startandEndLocations[0]]) + "[...]" + (cleaned_article_text.text[startandEndLocations[1]:startandEndLocations[1]+num_characters_after_quote]))
    context_chunks_ids.append(df['id'].iloc[item])

http://www.jstor.org/stable/1212711
500
572
0
Fellini: Changing the Subject
http://www.jstor.org/stable/3333732
3946
4075
1
Understanding and Appreciating Art: The Relevance of Experience
http://www.jstor.org/stable/3333732
4380
4623
1
Understanding and Appreciating Art: The Relevance of Experience
http://www.jstor.org/stable/j.ctv312d0np
815979
816134
3
Mirrors of Entrapment and Emancipation
http://www.jstor.org/stable/26489194
4737
4777
4
EMPTY CONSTRUCTS
http://www.jstor.org/stable/41153396
13252
13493
5
'It Is Written': Tom Stoppard and the Drama of the Intertext
http://www.jstor.org/stable/j.ctt1sq5v63.6
5811
5893
6
‘Golden-Mouthed Anna of All the Russias’:
http://www.jstor.org/stable/45116752
13903
14144
7
Shakespearean Authorship in Popular British Cinema
http://www.jstor.org/stable/20057782
22701
22738
8
Mixing Impossible Genres: David Achkar and African Autobiographical Documentary
http://www.jstor.org/stable/40356588
21617
21696
9
En torno a la autoridad narrativa en Casa de 

http://www.jstor.org/stable/j.ctv2rh2bzp
656510
656667
80
Palimpsests of Themselves
http://www.jstor.org/stable/3513448
3767
3834
81
Birth. Life. "A Morta". de Andrade
http://www.jstor.org/stable/3513448
3845
4158
81
Birth. Life. "A Morta". de Andrade
http://www.jstor.org/stable/374772
15373
15396
83
Toward a Syntax of Fiction
http://www.jstor.org/stable/1478019
42092
42133
84
Understanding Interpretation
http://www.jstor.org/stable/44504960
4231
4413
85
"The nameless something": Authorial Suicide and the True Body of the "Autobiography of Mark Twain"
http://www.jstor.org/stable/44504960
12126
12366
85
"The nameless something": Authorial Suicide and the True Body of the "Autobiography of Mark Twain"
http://www.jstor.org/stable/44504960
27207
27323
85
"The nameless something": Authorial Suicide and the True Body of the "Autobiography of Mark Twain"
http://www.jstor.org/stable/468660
41847
41916
88
New Criticism and Deconstructive Criticism, or What's New?
http://www.jstor.org/stable/468

http://www.jstor.org/stable/j.ctt5vjtj7
86574
86667
159
How to Read a Folktale
http://www.jstor.org/stable/40002682
7474
7549
160
"Every Man Who Is Hanged Leaves a Poem":Criminal Poets in Victorian Street Balladas
http://www.jstor.org/stable/10.7591/j.ctt1g69x25.11
20412
20561
161
The Death of the Author:
http://www.jstor.org/stable/10.7591/j.ctt1g69x25.11
20568
20647
161
The Death of the Author:
http://www.jstor.org/stable/10.7591/j.ctt1g69x25.11
20783
20993
161
The Death of the Author:
http://www.jstor.org/stable/10.7591/j.ctt1g69x25.11
21087
21173
161
The Death of the Author:
http://www.jstor.org/stable/10.7591/j.ctt1g69x25.11
21433
21737
161
The Death of the Author:
http://www.jstor.org/stable/10.7591/j.ctt1g69x25.11
21755
21815
161
The Death of the Author:
http://www.jstor.org/stable/4335525
47291
47452
167
The Habitations of the Word
http://www.jstor.org/stable/40139115
21437
21552
168
Distant Relations: Chronicle of Various Close Readings
http://www.jstor.org/stable/40139115
215

http://www.jstor.org/stable/26365352
11670
11724
235
"AFTER THE / UNAUTHOR": FRAGMENTED AUTHOR FUNCTIONS IN TOM PHILLIPS'S "A HUMUMENT"
http://www.jstor.org/stable/26365352
13998
14263
235
"AFTER THE / UNAUTHOR": FRAGMENTED AUTHOR FUNCTIONS IN TOM PHILLIPS'S "A HUMUMENT"
http://www.jstor.org/stable/3734508
5672
5724
238
Review Article
http://www.jstor.org/stable/479969
3650
3722
239
Exploding the Intertextual: Buzzati and His [?] Reader of "I Sette Messaggeri"
http://www.jstor.org/stable/3513743
6352
6467
240
Machado de Assis: A obra entreaberta
http://www.jstor.org/stable/1290988
7915
7966
241
Dance Analysis in Performance
http://www.jstor.org/stable/3738750
4223
4265
242
Foucault on the "Question of the Author": A Critical Exegesis
http://www.jstor.org/stable/3738750
4345
4492
242
Foucault on the "Question of the Author": A Critical Exegesis
http://www.jstor.org/stable/3738750
4493
4589
242
Foucault on the "Question of the Author": A Critical Exegesis
http://www.jstor.org/stable/3738

http://www.jstor.org/stable/10.7591/j.ctt1g69x25
21755
21815
309
Clarissa's Ciphers
http://www.jstor.org/stable/10.1086/375037
16764
16821
315
Divining an Author: The Idea of Authorship in an Indian Religious Tradition
http://www.jstor.org/stable/23105072
17865
17913
316
Ordinary Readers, Extraordinary Texts and Ludmilla: Part One
http://www.jstor.org/stable/23105072
17918
18635
316
Ordinary Readers, Extraordinary Texts and Ludmilla: Part One
http://www.jstor.org/stable/4174498
14848
14915
318
Ignoto in the Age of Print: The Manipulation of Anonymity in Early Modern England
http://www.jstor.org/stable/10.7591/j.ctt207g6vm
531047
531079
319
Fictions of Authority
http://www.jstor.org/stable/24476211
54408
54441
320
"This Is Not a Parade, It's a Protest March": Intertextuality, Citation, and Political Action on the Streets of Bolivia and Argentina
http://www.jstor.org/stable/1500994
13678
13730
321
The Power of Silence: An Enquiry through Fictional Writing
http://www.jstor.org/stable/10.2

http://www.jstor.org/stable/3051087
17487
17645
387
Conflicting Logics: Twentieth-Century Studies at the Crossroads
http://www.jstor.org/stable/3051087
17653
17982
387
Conflicting Logics: Twentieth-Century Studies at the Crossroads
http://www.jstor.org/stable/26283639
4567
4640
390
The Incomplete Joyce
http://www.jstor.org/stable/24304348
34455
34521
391
Recuperating the Author: Consuming Fictions of the 1990S
http://www.jstor.org/stable/10.5149/9780807863237_strathausen
446723
446842
392
The Look of Things
http://www.jstor.org/stable/10.7591/j.ctt207g60p
11985
12214
393
Signature Pieces
http://www.jstor.org/stable/10.7591/j.ctt207g60p
12667
13223
393
Signature Pieces
http://www.jstor.org/stable/10.7591/j.ctt207g60p
13326
13399
393
Signature Pieces
http://www.jstor.org/stable/10.7591/j.ctt207g60p
13663
13736
393
Signature Pieces
http://www.jstor.org/stable/10.7591/j.ctt207g60p
14030
14269
393
Signature Pieces
http://www.jstor.org/stable/10.7591/j.ctt207g60p
14293
14409
393
Signature Pi

http://www.jstor.org/stable/778251
25111
25200
464
Looking Myself in the Mouth
http://www.jstor.org/stable/26237298
4746
4818
466
Sovereignty of the Dead
http://www.jstor.org/stable/26237298
5123
5206
466
Sovereignty of the Dead
http://www.jstor.org/stable/26237298
5522
5628
466
Sovereignty of the Dead
http://www.jstor.org/stable/3300118
5204
5345
469
Misusing Canonical Intertexts: Jamaica Kincaid, Wordsworth and Colonialism's "Absent Things"
http://www.jstor.org/stable/23925187
23436
23523
470
GOD IN THEORY: MILTON, LITERATURE AND THEODICY
http://www.jstor.org/stable/23925187
23597
23636
470
GOD IN THEORY: MILTON, LITERATURE AND THEODICY
http://www.jstor.org/stable/23925187
29101
29295
470
GOD IN THEORY: MILTON, LITERATURE AND THEODICY
http://www.jstor.org/stable/23925187
30132
30205
470
GOD IN THEORY: MILTON, LITERATURE AND THEODICY
http://www.jstor.org/stable/30001567
710
794
474
Rewriting 'Radical Innocence': Poetic Formations of Memory and Presence in Yeats, Mahon and Muldoon
http

http://www.jstor.org/stable/10.5325/jaynrandstud.17.2.0153
21555
21704
537
Philosophical Problems in Contemporary Art Criticism: Objectivism, Poststructuralism, and the Axiom of Authorship
http://www.jstor.org/stable/10.5325/jaynrandstud.17.2.0153
21861
22013
537
Philosophical Problems in Contemporary Art Criticism: Objectivism, Poststructuralism, and the Axiom of Authorship
http://www.jstor.org/stable/10.5325/jaynrandstud.17.2.0153
22024
22215
537
Philosophical Problems in Contemporary Art Criticism: Objectivism, Poststructuralism, and the Axiom of Authorship
http://www.jstor.org/stable/10.5325/jaynrandstud.17.2.0153
30075
30147
537
Philosophical Problems in Contemporary Art Criticism: Objectivism, Poststructuralism, and the Axiom of Authorship
http://www.jstor.org/stable/10.5325/jaynrandstud.17.2.0153
30218
30408
537
Philosophical Problems in Contemporary Art Criticism: Objectivism, Poststructuralism, and the Axiom of Authorship
http://www.jstor.org/stable/10.5325/jaynrandstud.17.2.0

http://www.jstor.org/stable/41482896
11050
11104
609
Critical Literacy in the Elementary Classroom
http://www.jstor.org/stable/20057369
22801
22918
611
Literary History and the Search for Certainty
http://www.jstor.org/stable/3481150
23077
23154
612
Lawyers, Law & the Movies: The Hitchcock Cases
http://www.jstor.org/stable/27172812
97829
97906
613
Anxieties of Authorship, Critique of Readership
http://www.jstor.org/stable/24586600
41081
41107
614
Dubuffet avec Damisch
http://www.jstor.org/stable/10.5699/modelangrevi.112.1.0257
4208
4279
615
Fashioning Spaces: Mode and Modernity in Late-Nineteenth-Century Paris
http://www.jstor.org/stable/20134517
18602
18669
616
An Interview with Larry McCaffery
http://www.jstor.org/stable/4539822
79183
79266
617
Between Humanism and Late Style
http://www.jstor.org/stable/24694584
30012
30169
618
Becoming Mad Bio-graphically: The Styling Body in Modern Japanese Literature
http://www.jstor.org/stable/26413685
22542
22696
619
VARIATIONS ON THE AUTHOR
htt

http://www.jstor.org/stable/3194904
10845
10883
682
"Little Corks That Mark a Sunken Net": Virginia Woolf's "Sketch of the past" as a Fictional Memoir
http://www.jstor.org/stable/j.ctv2z0vtrq
725016
725168
683
Materia-autore | Author-Matter
http://www.jstor.org/stable/43854546
15835
16000
684
A Contradictory Assemblage of Self: James Frey, Creative Nonfiction, and the Empire of Oprah
http://www.jstor.org/stable/j.ctvcszzjm
441711
441838
685
Chivalry, Reading, and Women's Culture in Early Modern Spain
http://www.jstor.org/stable/41348749
2323
2552
686
Theory, Disciplinarity, and the Study of Religion: Lessons from a Publishing Nightmare
http://www.jstor.org/stable/j.ctt1sq5v63
341427
341509
687
Twentieth-Century Russian Poetry
http://www.jstor.org/stable/40238974
14171
14490
688
Writing from the Periphery: The Case of Ngugi and Conrad
http://www.jstor.org/stable/488215
11141
11410
689
In Pursuit of Invisible Tracks: Photographs of a Dead Author
http://www.jstor.org/stable/488215
11141
1

http://www.jstor.org/stable/43307983
6684
6785
757
Postmodern Narrative and the Limits of Fantasy
http://www.jstor.org/stable/43307983
6991
7268
757
Postmodern Narrative and the Limits of Fantasy
http://www.jstor.org/stable/41304877
32386
32473
759
Writing the Vanishing Real: Hyperreality and Magical Realism
http://www.jstor.org/stable/44372198
17829
18048
760
Reading and Repeating "Our Mutual Friend"
http://www.jstor.org/stable/10.7591/j.ctt207g60p.4
11985
12214
761
INTRODUCTION
http://www.jstor.org/stable/10.7591/j.ctt207g60p.4
12667
13223
761
INTRODUCTION
http://www.jstor.org/stable/10.7591/j.ctt207g60p.4
13326
13399
761
INTRODUCTION
http://www.jstor.org/stable/10.7591/j.ctt207g60p.4
13663
13736
761
INTRODUCTION
http://www.jstor.org/stable/10.7591/j.ctt207g60p.4
14030
14269
761
INTRODUCTION
http://www.jstor.org/stable/10.7591/j.ctt207g60p.4
14293
14409
761
INTRODUCTION
http://www.jstor.org/stable/10.7591/j.ctt207g60p.4
14521
14688
761
INTRODUCTION
http://www.jstor.org/stable/10.7591

http://www.jstor.org/stable/2739278
90
181
832
Reading Differences: The Case of Letter 141 in Les Liaisons Dangereuses
http://www.jstor.org/stable/44325405
2848
2940
833
DEATH TO THE AUTHOR! EXPUNGING THE AUTHORIAL PRESENCE FROM TENNESSEE WILLIAMS'S SHORT STORIES
http://www.jstor.org/stable/44325405
6280
6395
833
DEATH TO THE AUTHOR! EXPUNGING THE AUTHORIAL PRESENCE FROM TENNESSEE WILLIAMS'S SHORT STORIES
http://www.jstor.org/stable/26867581
11590
11642
835
An Analysis of the Problematic Discourse Surrounding “Authentic Texts”
http://www.jstor.org/stable/42968079
6948
7076
836
Roland Barthes's Resurrection of the Author and Redemption of Biography
http://www.jstor.org/stable/42968079
7453
7587
836
Roland Barthes's Resurrection of the Author and Redemption of Biography
http://www.jstor.org/stable/42968079
19255
19378
836
Roland Barthes's Resurrection of the Author and Redemption of Biography
http://www.jstor.org/stable/468617
13684
13801
839
A Re-Vision of Literature
http://www.jstor.or

http://www.jstor.org/stable/777161
18727
18865
906
Where's the Artist? Feminist Practice and Poststructural Theories of Authorship
http://www.jstor.org/stable/777161
18874
19103
906
Where's the Artist? Feminist Practice and Poststructural Theories of Authorship
http://www.jstor.org/stable/777161
19230
19288
906
Where's the Artist? Feminist Practice and Poststructural Theories of Authorship
http://www.jstor.org/stable/777161
19293
19465
906
Where's the Artist? Feminist Practice and Poststructural Theories of Authorship
http://www.jstor.org/stable/jj.8665551.9
53458
53621
910
Truth, Lies, Education, Politics
http://www.jstor.org/stable/jj.8665550.6
25671
25779
911
Making Superhero Texts Mean:
http://www.jstor.org/stable/jj.8665550.6
25775
25991
911
Making Superhero Texts Mean:
http://www.jstor.org/stable/3176606
79504
79562
913
The Torment of Secrecy: Ethical and Epistemological Problems in the Study of Esoteric Traditions
http://www.jstor.org/stable/j.ctv1fxgbs
6
183
914
Liebe und Macht

In [64]:
print(context_chunks_for_quotations)
#len(context_chunks_ids)



In [None]:
Write output to a single text file

In [65]:
output_file = open(f'{resultsDir}/{projectName}-quotation-contexts.txt', mode='w', encoding='utf-8')

for context in context_chunks_for_quotations:
     output_file.write(context)
     output_file.write('\n')
output_file.close()

# Generate word frequency list for all context chunks