In [None]:
# New env (NER)
# required versions in 'requirements.txt'

# 0. Read Source Data & Preprocessing

In [1]:
import pandas as pd

In [29]:
data = pd.read_excel('./data/3415_All_STORIES_Updated.xlsx', index_col = None, header = 0)

In [30]:
data.head()

Unnamed: 0,STORY ID,STORY TEXT
0,1.0,"Wörl, Ludwig \n\nLudwig Wörl (1906-1967) spent..."
1,2.0,"Pscheidt, Johann_x000D_\n_x000D_\nJohann Psche..."
2,3.0,"Czeżowski, Tadeusz_x000D_\nCzeżowska, Antonina..."
3,4.0,"Kowalski, Władysław_x000D_\n_x000D_\nDuring th..."
4,6.0,"Choms, Władysława_x000D_\n_x000D_\nWładysława ..."


In [31]:
# remove '\n' and '_x000D_'
data['STORY TEXT'] = data['STORY TEXT'].str.replace("_x000D_", " ")
data['STORY TEXT'] = data['STORY TEXT'].replace(r'\s+|\\n', ' ', regex=True)

In [32]:
# we have
# 3414 of stories
data

Unnamed: 0,STORY ID,STORY TEXT
0,1.0,"Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 ..."
1,2.0,"Pscheidt, Johann Johann Pscheidt was born in t..."
2,3.0,"Czeżowski, Tadeusz Czeżowska, Antonina Czeżows..."
3,4.0,"Kowalski, Władysław During the occupation, Wła..."
4,6.0,"Choms, Władysława Władysława Choms, the wife o..."
...,...,...
3409,13524.0,"Kosek, Julian Kosek, Janina Mordechai Wulkan (..."
3410,13611.0,"Sajowski, Mikołaj Sajowska, Helena Dembińska (..."
3411,5257.0,"File 5257, 5257a ZIENOWICZ, HELENA KUKOLEWSKI,..."
3412,3053.0,"Files 3053; 3053a Badowski, Stefan Franciszek/..."


In [120]:
data = data[:20]

# 1. Milestone 1: Find Names - NER & Co-reference Resolution

## 1.1 Find all Names: Simple NER: Start with SPACY Package and model “en_core_web_md” + "en_coreference_web_trf"

In [6]:
import spacy
import spacy_experimental

In [33]:
# !python -m spacy download en_core_web_md

Collecting en-core-web-md==3.4.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.4.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')


In [7]:
# Load basic Spacy model for splitting sentences and simple NER
nlp_base = spacy.load('en_core_web_md')
nlp_coref = spacy.load('en_coreference_web_trf')

# Combine two different models (Coref + NER)

# use replace_listeners for the coref components
nlp_coref.replace_listeners("transformer", "coref", ["model.tok2vec"])
nlp_coref.replace_listeners("transformer", "span_resolver", ["model.tok2vec"])

# we won't copy over the span cleaner
nlp_base.add_pipe("coref", source=nlp_coref)
nlp_base.add_pipe("span_resolver", source=nlp_coref)

  from .autonotebook import tqdm as notebook_tqdm


<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x7fbf0381a220>

## Coreference Resolution

In [8]:
# since Spacy doesn't have direct coreference resolver
# we define lightweight function for resolving references in text
def resolve_references(doc) -> str:
    """
    Function for resolving references with the coref ouput
    doc (Doc): The Doc object processed by the coref pipeline
    RETURNS (str): The Doc string with resolved references
    """
    # token.idx : token.text
    token_mention_mapper = {}
    output_string = ""
    clusters = [
    val for key, val in doc.spans.items() if key.startswith("coref_cluster")
        ]

    # Iterate through every found cluster
    for cluster in clusters:
        first_mention = cluster[0]
        # Iterate through every other span in the cluster
        for mention_span in list(cluster)[1:]:
            # Set first_mention as value for the first token in mention_span in the token_mention_mapper
            token_mention_mapper[mention_span[0].idx] = first_mention.text + mention_span[0].whitespace_
            for token in mention_span[1:]:
                # Set empty string for all the other tokens in mention_span
                token_mention_mapper[token.idx] = ""
    # Iterate through every token in the Doc
    for token in doc:
        # Check if token exists in token_mention_mapper
        if token.idx in token_mention_mapper:
            output_string += token_mention_mapper[token.idx]
        # Else add original token text
        else:
            output_string += token.text + token.whitespace_
    return output_string

In [121]:
coref_data = pd.DataFrame(columns = ['STORY ID', 'STORY TEXT'])

from spacy import displacy
from tqdm import tqdm

for row in tqdm(data.itertuples(), total = data.shape[0]):
    resolved_doc = resolve_references(nlp_base(row._2))
    # output = nlp_base(resolved_doc)
    # displacy.render(output, style="ent")
    new_row = {'STORY ID' : row._1, 'STORY TEXT' : resolved_doc}
    coref_data.loc[len(coref_data)] = new_row

100%|██████████| 20/20 [01:25<00:00,  4.27s/it]


In [126]:
coref_data

Unnamed: 0,STORY ID,STORY TEXT
0,1.0,"Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 ..."
1,2.0,"Pscheidt, Johann Johann Pscheidt was born in t..."
2,3.0,"Czeżowski, Czeżowski , Antonina Czeżowska, Ter..."
3,4.0,"Kowalski, Władysław During the occupation, Kow..."
4,6.0,"Choms, Władysława Władysława Choms, the wife o..."
5,7.0,"Hencel, Hencel , Roman At various times during..."
6,8.0,"Hartmann, Hans Hauptmann Hartmann , was one of..."
7,11.0,Gertruda Babilinska Gertruda Babilinska was bo...
8,13.0,"Chacza, Edward Edward Chacza, who lived in Bar..."
9,14.0,"Semenyuk, Domna Loseva (Semenyuk), Nadezhda Du..."


In [291]:
coref_data['STORY TEXT'][1]

'Pscheidt, Johann Johann Pscheidt was born in the city of Radauti near Czernowitz, in an area that was in Romania at the time and later became part of the Ukraine. Pscheidt, Johann Johann Pscheidt was a Volksdeutsche, or ethnic German. At the beginning of the 1930s, Pscheidt, Johann Johann Pscheidt worked as a building contractor in Czernowitz. Without revealing Pscheidt, Johann Johann Pscheidt identity, Pscheidt, Johann Johann Pscheidt helped poor Jews in Czernowitz with food and money. Pscheidt, Johann Johann Pscheidt also gave the Jewish workers that Pscheidt, Johann Johann Pscheidt employed money over and above the Jewish workers that he employed salaries, and provided the money for two of the Jewish workers that he employed to immigrate to Israel. In 1940 Czernowitz was annexed to the Soviet Union. As part of the arrangement with Germany the Volksdeutsche of the area were forced to move to areas belonging to Germany. In 1941 Pscheidt, Johann Johann Pscheidt moved to Zagłębie provi

### Separate into sentences, find Named Entity (PERSON) and make a dataframe

In [132]:
table_1 = pd.DataFrame(columns = ['storynumber', 'name', 'sentence'])

In [133]:
for id, story in zip(coref_data['STORY ID'], coref_data['STORY TEXT']):
    sentences = [i for i in nlp_base(story).sents]
    for sentence in sentences:
        for entity in sentence.ents:
            if entity.label_ == 'PERSON':
                stc = str(sentence)
                new_row = {'storynumber' : id, 'name' : entity.text, 'sentence' : stc}
                table_1.loc[len(table_1)] = new_row

KeyboardInterrupt: 

In [110]:
# Extract Output
table_1.to_csv('./output/3415_SPACY_All_Names.xlsx', index = False)

In [68]:
table_1 = pd.read_csv('./output/3415_SPACY_All_Names.xlsx')

## 1.2 Find Rescuers Names: Filtering Only Rescuers (Righteous Among the Nations) Names

### 1.2.1.1 Step 1: Preparing a table of filtered sentences: “Rescuing Phrases-Name” - Lexicon-based

In [134]:
# Table 1
table_1
# table1 = pd.read_excel('./output/3415_SPACY_All_Names.xlsx', index = False)

Unnamed: 0,storynumber,name,sentence
0,1.0,Ludwig Ludwig Wörl,"Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 ..."
1,1.0,Wörl,"Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 ..."
2,1.0,Ludwig Ludwig Wörl,"Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 ..."
3,1.0,Ludwig Ludwig Wörl,"Wörl, Ludwig Ludwig Wörl (1906-1967) was first..."
4,1.0,Wörl,After spending some nine months in a dark dete...
5,1.0,Ludwig Ludwig Wörl,After spending some nine months in a dark dete...
6,1.0,Wörl,"In 1942, Wörl, Ludwig Ludwig Wörl (1906-1967) ..."
7,1.0,Ludwig Ludwig Wörl,"In 1942, Wörl, Ludwig Ludwig Wörl (1906-1967) ..."
8,1.0,Lagerälteste,Appointed as the Lagerälteste (the camp elder)...
9,1.0,Wörl,Appointed as the Lagerälteste (the camp elder)...


In [70]:
# Table 2 (lexicon)
table_2 = pd.read_excel('./data/Final_Lexicon_Updated.xlsx')

In [71]:
table_2

Unnamed: 0,Story ID,Rescuing Verb,Rescuing Phrase
0,1.0,sent,his life as a political prisoner in nazi conce...
1,1.0,sent,"to the camp’s joinery and later, as a trained ..."
2,1.0,employed,personnel. appointed as the lagerälteste (the ...
3,1.0,saving,lagerälteste (the camp elder) of the hospital ...
4,1.0,put himself at risk,"barracks, wörl, against the express orders of ..."
...,...,...,...
8922,5257.0,looked after,"Helena looked after the children, obtaining ""A..."
8923,5257.0,looked after,"Helena looked after the children, obtaining ""A..."
8924,5257.0,sheltered,"Initially, they were only supposed to stay wit..."
8925,5257.0,obtained,Helenapresented the fugitives as her brother’s...


In [135]:
#Table3
table_3 = pd.DataFrame(columns = ['Story ID','Rescuing Verb', 'Rescuing_phrases-name'])

In [73]:
# input
# Table1
table_1['sentence'][0]
# check if rescuing verb appear in the sentence. (Lexicon reference - same story ID)
# If there is rescuing verb existed,
# (Also name)
# add this sentence to a new table Table3 

'Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 years of his life as a political prisoner in Nazi concentration camps.'

In [74]:
# sampling table_1
table_1_ = table_1[:50]

In [136]:
table_1.head()

Unnamed: 0,storynumber,name,sentence
0,1.0,Ludwig Ludwig Wörl,"Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 ..."
1,1.0,Wörl,"Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 ..."
2,1.0,Ludwig Ludwig Wörl,"Wörl, Ludwig Ludwig Wörl (1906-1967) spent 11 ..."
3,1.0,Ludwig Ludwig Wörl,"Wörl, Ludwig Ludwig Wörl (1906-1967) was first..."
4,1.0,Wörl,After spending some nine months in a dark dete...


In [137]:
for row in tqdm(table_1.itertuples(), total = table_1.shape[0]):
    # storyid = row.storynumber
    # 본 회차 스토리에서 찾아내야하는 verb 목록 불러오고
    verbs_to_find = list(set(table_2[table_2['Story ID'] == row.storynumber]['Rescuing Verb']))
    # row 한개를 nlp_base에 넣고 verb 추출
    doc = nlp_base(row.sentence)
    verbs_extracted = [token for token in doc if token.pos_ == 'VERB']
    for verb in verbs_extracted:
        # 존재하는 verb들 중 lexicon verbs 에 있는 verb를 발견하고
        if str(verb) in verbs_to_find:
            # 그 sentence의 사람 이름도 있다면(당연히 있지) 통과
            # 이 부분은 추후에 제거 가능 ( 나혼자 해본것.)
            # for entity in doc.ents:
            #     if entity.label_ == 'PERSON':
            #          print(entity.text)

            new_row = {'Story ID' : row.storynumber, 'Rescuing Verb' : str(verb), 'Rescuing_phrases-name' : row.sentence}
            table_3.loc[len(table_3)] = new_row

100%|██████████| 40/40 [00:16<00:00,  2.47it/s]


In [139]:
# Table 3
table_3

Unnamed: 0,Story ID,Rescuing Verb,Resuing_phrases-name
0,1.0,sent,"Wörl, Ludwig Ludwig Wörl (1906-1967) was first..."
1,1.0,sent,"In 1942, Wörl, Ludwig Ludwig Wörl (1906-1967) ..."
2,1.0,sent,"In 1942, Wörl, Ludwig Ludwig Wörl (1906-1967) ..."
3,1.0,employed,Appointed as the Lagerälteste (the camp elder)...
4,1.0,saving,Appointed as the Lagerälteste (the camp elder)...
5,1.0,employed,Appointed as the Lagerälteste (the camp elder)...
6,1.0,saving,Appointed as the Lagerälteste (the camp elder)...
7,1.0,employed,Appointed as the Lagerälteste (the camp elder)...
8,1.0,saving,Appointed as the Lagerälteste (the camp elder)...
9,1.0,obtain,Ludwig Ludwig Wörl (1906-1967) at risk in orde...


In [140]:
# Table_4 (Merge table_2 and table_3)
# table_4: Complete Lexicon Structure Example
table_4 = pd.merge(table_2, table_3, how = 'left', on = ['Story ID','Rescuing Verb'])

In [142]:
# drop duplicated rows
table_4.drop_duplicates(inplace = True)

In [143]:
table_4

Unnamed: 0,Story ID,Rescuing Verb,Rescuing Phrase,Resuing_phrases-name
0,1.0,sent,his life as a political prisoner in nazi conce...,"Wörl, Ludwig Ludwig Wörl (1906-1967) was first..."
1,1.0,sent,his life as a political prisoner in nazi conce...,"In 1942, Wörl, Ludwig Ludwig Wörl (1906-1967) ..."
3,1.0,sent,"to the camp’s joinery and later, as a trained ...","Wörl, Ludwig Ludwig Wörl (1906-1967) was first..."
4,1.0,sent,"to the camp’s joinery and later, as a trained ...","In 1942, Wörl, Ludwig Ludwig Wörl (1906-1967) ..."
6,1.0,employed,personnel. appointed as the lagerälteste (the ...,Appointed as the Lagerälteste (the camp elder)...
...,...,...,...,...
8938,4481.0,recognized,"On January 17, 1990, Yad Vashem recognized Józ...",
8939,5257.0,looked after,"Helena looked after the children, obtaining ""A...",
8941,5257.0,sheltered,"Initially, they were only supposed to stay wit...",
8942,5257.0,obtained,Helenapresented the fugitives as her brother’s...,


In [201]:
table_4.to_csv('./drafts/table_4.xlsx', index = False)

### 1.2.1.2 Step 2:“Rescuers Names filtering” - Step-by-Step Breakdown

In [265]:
table_4 = pd.read_csv('./drafts/table_4.xlsx')

In [266]:
table_4

Unnamed: 0,Story ID,Rescuing Verb,Rescuing Phrase,Resuing_phrases-name
0,1.0,sent,his life as a political prisoner in nazi conce...,
1,1.0,sent,"to the camp’s joinery and later, as a trained ...",
2,1.0,employed,personnel. appointed as the lagerälteste (the ...,Appointed as the Lagerälteste (the camp elder)...
3,1.0,employed,personnel. appointed as the lagerälteste (the ...,Appointed as the Lagerälteste (the camp elder)...
4,1.0,saving,lagerälteste (the camp elder) of the hospital ...,Appointed as the Lagerälteste (the camp elder)...
...,...,...,...,...
14856,5257.0,looked after,"Helena looked after the children, obtaining ""A...",
14857,5257.0,looked after,"Helena looked after the children, obtaining ""A...",
14858,5257.0,sheltered,"Initially, they were only supposed to stay wit...",
14859,5257.0,obtained,Helenapresented the fugitives as her brother’s...,


In [267]:
table_4[:20]

Unnamed: 0,Story ID,Rescuing Verb,Rescuing Phrase,Resuing_phrases-name
0,1.0,sent,his life as a political prisoner in nazi conce...,
1,1.0,sent,"to the camp’s joinery and later, as a trained ...",
2,1.0,employed,personnel. appointed as the lagerälteste (the ...,Appointed as the Lagerälteste (the camp elder)...
3,1.0,employed,personnel. appointed as the lagerälteste (the ...,Appointed as the Lagerälteste (the camp elder)...
4,1.0,saving,lagerälteste (the camp elder) of the hospital ...,Appointed as the Lagerälteste (the camp elder)...
5,1.0,saving,lagerälteste (the camp elder) of the hospital ...,Appointed as the Lagerälteste (the camp elder)...
6,1.0,put himself at risk,"barracks, wörl, against the express orders of ...",
7,1.0,obtain,"the ss, employed jewish doctors, thus saving t...",
8,1.0,treatment,himself at risk in order to obtain at least a ...,
9,1.0,forge,obtain at least a minimum of the required medi...,


In [275]:
# Function to find the full name of the rescuer, and split it into first name and last name
def extract_name_info(row):
    sentence = row['Resuing_phrases-name']  # Adjusted column name
    rescuing_verb = row['Rescuing Verb']    # Adjusted column name
    
    # Skip NaN values
    if pd.isna(sentence):
        return pd.Series([None, None, None])  # Return None for full name, first name, and last name
    
    # Process sentence with Spacy
    doc = nlp_base(sentence)
    
    # Find the full name of the rescuer using NER
    full_name = None
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            full_name = ent.text  # Get the first PERSON entity (assuming it's the rescuer)
            break
    
    if not full_name:
        return pd.Series([None, None, None])  # Return None if no PERSON entity is found
    
    # Split the full name into first and last name
    name_parts = full_name.split()
    if len(name_parts) > 1:
        last_name = name_parts[-1]
        first_name = " ".join(name_parts[:-1])
    else:
        first_name = full_name
        last_name = None  # If no last name, leave it as None
    
    return pd.Series([full_name, first_name, last_name])

In [288]:
sample_table_4 = table_4[:40]

In [283]:
# Apply the function to the DataFrame and store the results in separate columns
sample_table_4[['Full Name', 'First Name', 'Last Name']] = sample_table_4.apply(extract_name_info, axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_table_4[['Full Name', 'First Name', 'Last Name']] = sample_table_4.apply(extract_name_info, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_table_4[['Full Name', 'First Name', 'Last Name']] = sample_table_4.apply(extract_name_info, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-

In [284]:
sample_table_4

Unnamed: 0,Story ID,Rescuing Verb,Rescuing Phrase,Resuing_phrases-name,Full Name,First Name,Last Name
0,1.0,sent,his life as a political prisoner in nazi conce...,,,,
1,1.0,sent,"to the camp’s joinery and later, as a trained ...",,,,
2,1.0,employed,personnel. appointed as the lagerälteste (the ...,Appointed as the Lagerälteste (the camp elder)...,Lagerälteste,Lagerälteste,
3,1.0,employed,personnel. appointed as the lagerälteste (the ...,Appointed as the Lagerälteste (the camp elder)...,Lagerälteste,Lagerälteste,
4,1.0,saving,lagerälteste (the camp elder) of the hospital ...,Appointed as the Lagerälteste (the camp elder)...,Lagerälteste,Lagerälteste,
5,1.0,saving,lagerälteste (the camp elder) of the hospital ...,Appointed as the Lagerälteste (the camp elder)...,Lagerälteste,Lagerälteste,
6,1.0,put himself at risk,"barracks, wörl, against the express orders of ...",,,,
7,1.0,obtain,"the ss, employed jewish doctors, thus saving t...",,,,
8,1.0,treatment,himself at risk in order to obtain at least a ...,,,,
9,1.0,forge,obtain at least a minimum of the required medi...,,,,


In [281]:
table_5 = sample_table_4[['Story ID','Full Name', 'Last Name', 'First Name', 'Resuing_phrases-name']]

Unnamed: 0,Story ID,Full Name,Last Name,First Name,Resuing_phrases-name
0,1.0,,,,
1,1.0,,,,
2,1.0,Lagerälteste,,Lagerälteste,Appointed as the Lagerälteste (the camp elder)...
3,1.0,Lagerälteste,,Lagerälteste,Appointed as the Lagerälteste (the camp elder)...
4,1.0,Lagerälteste,,Lagerälteste,Appointed as the Lagerälteste (the camp elder)...
5,1.0,Lagerälteste,,Lagerälteste,Appointed as the Lagerälteste (the camp elder)...
6,1.0,,,,
7,1.0,,,,
8,1.0,,,,
9,1.0,,,,


In [None]:
table_5.to_csv('./output/3415_SPACY_All_Rescuers.xlsx', index = False)

### Rescuing DATE

In [287]:
# Function to find the full name of the rescuer, first name, last name, and the rescued date
def extract_name_and_date(row):
    sentence = row['Resuing_phrases-name']  # Adjusted column name
    rescuing_verb = row['Rescuing Verb']    # Adjusted column name
    
    # Skip NaN values
    if pd.isna(sentence):
        return pd.Series([None, None, None, None])  # Return None for all if sentence is NaN
    
    # Process sentence with Spacy
    doc = nlp_base(sentence)
    
    # Extract full name (PERSON) and rescued date (DATE)
    full_name = None
    rescued_date = None
    for ent in doc.ents:
        if ent.label_ == "PERSON" and full_name is None:  # Get the first PERSON entity
            full_name = ent.text
        if ent.label_ == "DATE" and rescued_date is None:  # Get the first DATE entity
            rescued_date = ent.text

    if not full_name:
        return pd.Series([None, None, None, rescued_date])  # Return None for names if no PERSON found
    
    # Split the full name into first name and last name
    name_parts = full_name.split()
    if len(name_parts) > 1:
        last_name = name_parts[-1]
        first_name = " ".join(name_parts[:-1])
    else:
        first_name = full_name
        last_name = None  # If no last name, leave it as None
    
    return pd.Series([full_name, first_name, last_name, rescued_date])

In [289]:

# Apply the function to the DataFrame and store the results in separate columns
sample_table_4[['Full Name', 'First Name', 'Last Name', 'Rescued Date']] = sample_table_4.apply(extract_name_and_date, axis=1)

# Print the resulting DataFrame
print(sample_table_4)

    Story ID        Rescuing Verb  \
0        1.0                 sent   
1        1.0                 sent   
2        1.0             employed   
3        1.0             employed   
4        1.0               saving   
5        1.0               saving   
6        1.0  put himself at risk   
7        1.0               obtain   
8        1.0            treatment   
9        1.0                forge   
10       1.0            influence   
11       1.0           protecting   
12       1.0             exempted   
13       1.0            protected   
14       1.0               helped   
15       1.0            dedicated   
16       2.0                 sent   
17       2.0                 sent   
18       2.0                 sent   
19       2.0                 sent   
20       2.0                 sent   
21       2.0                 sent   
22       2.0             arranged   
23       2.0               rescue   
24       2.0             provided   
25       2.0             employed   
2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_table_4[['Full Name', 'First Name', 'Last Name', 'Rescued Date']] = sample_table_4.apply(extract_name_and_date, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample_table_4[['Full Name', 'First Name', 'Last Name', 'Rescued Date']] = sample_table_4.apply(extract_name_and_date, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guid

In [292]:
sample_table_4.iloc[29]

KeyError: 29

In [None]:
table_6.to_csv('3415_SPACY_All_Rescuers_Dates.xlsx', index = False)

In [219]:
# 문장 처리
# doc = nlp(sentence)

# 사람(Entity) 추출
people = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
print("Detected people:", people)

# 의존성 파싱으로 주어-동사 관계 분석
for token in doc:
    if token.dep_ == "nsubj" and token.head.lemma_ == "help":
        print(f"Subject: {token.text}, Verb: {token.head.text}")

Detected people: ['Wörl', 'Ludwig Ludwig Wörl']
Subject: Wörl, Verb: helped


In [None]:
3415_SPACY_All_Rescuers.XLS

In [None]:
output_file_name = '3415_SPACY_All_Names.xlsx'
output.to_csv(f'./output/{output_file_name}')