In [1]:
import pandas as pd
from collections import Counter
import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

def add_nlp_analysis_to_df(df, text_column):
    def nlp_analysis(text):
        if not isinstance(text, str):
            return {}, [], [], []

        doc = nlp(text)
        word_counts = Counter([token.text for token in doc])
        pos_counts = Counter([token.pos_ for token in doc])
        lemma_counts = Counter([token.lemma_ for token in doc])
        word_pos_pairs = [(token.text, token.pos_) for token in doc]
        token_details = [{'text': token.text, 'pos': token.pos_, 'lemma': token.lemma_, 'morph': str(token.morph)} for token in doc]

        return dict(word_counts), dict(pos_counts), dict(lemma_counts), word_pos_pairs, token_details

    analysis_results = df[text_column].apply(nlp_analysis)
    analysis_columns = ['word_counts', 'pos_counts', 'lemma_counts', 'word_pos_pairs', 'token_details']

    for i, col in enumerate(analysis_columns):
        df[col] = analysis_results.apply(lambda x: x[i])

    return df

In [2]:
data = pd.read_csv('learners_all_frog_row.csv', sep='\t', encoding='utf-8')
data.head(3)

Unnamed: 0,Subcorpus,Filename,Year data collection,Placement test score (raw),Placement test score (%),Proficiency,Sex,Age,School/University/Institution,Major,...,Proficiency (self-assessment) in additional language writing,Medium,Task number,Task title,Writing/audio details,Minutes taken to complete the task,Where the task was done,Resources used,Text,Original text
0,Learners,DE_SP_B1_26_13_13_TM,2016,30 / 60,50.0,B1 (lower intermediate),Male,26,Bremen,BiPEB: English Speaking Cultures,...,A1 (lower beginner),Spoken,13,13. Frog,spoken_offline_classroom,,Inside classroom,,One day Tommy found a frog in a forest and bro...,
1,Learners,DE_WR_B1_26_13_13_TM,2016,30 / 60,50.0,B1 (lower intermediate),Male,26,Bremen,BiPEB: English Speaking Cultures,...,A1 (lower beginner),Written,13,13. Frog,written_online,,Inside classroom,,One day Tommy found a frog in the Forest. He b...,
2,Learners,DE_SP_B1_19_11_13_RN,2016,37 / 60,61.7,B1 (lower intermediate),Female,19,Bremen,"BA English-Speaking Cultures: English, Slavoni...",...,A1 (lower beginner),Spoken,13,13. Frog,spoken_offline_classroom,,Inside classroom,,One day a little boy called John uh with his d...,


In [3]:
# Example usage
result = add_nlp_analysis_to_df(data, 'Text')
result

Unnamed: 0,Subcorpus,Filename,Year data collection,Placement test score (raw),Placement test score (%),Proficiency,Sex,Age,School/University/Institution,Major,...,Minutes taken to complete the task,Where the task was done,Resources used,Text,Original text,word_counts,pos_counts,lemma_counts,word_pos_pairs,token_details
0,Learners,DE_SP_B1_26_13_13_TM,2016,30 / 60,50.0,B1 (lower intermediate),Male,26,Bremen,BiPEB: English Speaking Cultures,...,,Inside classroom,,One day Tommy found a frog in a forest and bro...,,"{'One': 1, 'day': 1, 'Tommy': 4, 'found': 3, '...","{'NUM': 1, 'NOUN': 26, 'PROPN': 4, 'VERB': 19,...","{'one': 1, 'day': 1, 'Tommy': 4, 'find': 3, 'a...","[(One, NUM), (day, NOUN), (Tommy, PROPN), (fou...","[{'text': 'One', 'pos': 'NUM', 'lemma': 'one',..."
1,Learners,DE_WR_B1_26_13_13_TM,2016,30 / 60,50.0,B1 (lower intermediate),Male,26,Bremen,BiPEB: English Speaking Cultures,...,,Inside classroom,,One day Tommy found a frog in the Forest. He b...,,"{'One': 1, 'day': 1, 'Tommy': 3, 'found': 2, '...","{'NUM': 1, 'NOUN': 22, 'PROPN': 5, 'VERB': 22,...","{'one': 1, 'day': 1, 'Tommy': 3, 'find': 4, 'a...","[(One, NUM), (day, NOUN), (Tommy, PROPN), (fou...","[{'text': 'One', 'pos': 'NUM', 'lemma': 'one',..."
2,Learners,DE_SP_B1_19_11_13_RN,2016,37 / 60,61.7,B1 (lower intermediate),Female,19,Bremen,"BA English-Speaking Cultures: English, Slavoni...",...,,Inside classroom,,One day a little boy called John uh with his d...,,"{'One': 1, 'day': 1, 'a': 5, 'little': 2, 'boy...","{'NUM': 2, 'NOUN': 17, 'DET': 8, 'ADJ': 7, 'VE...","{'one': 1, 'day': 1, 'a': 5, 'little': 2, 'boy...","[(One, NUM), (day, NOUN), (a, DET), (little, A...","[{'text': 'One', 'pos': 'NUM', 'lemma': 'one',..."
3,Learners,DE_WR_B1_19_11_13_RN,2016,37 / 60,61.7,B1 (lower intermediate),Female,19,Bremen,"BA English-Speaking Cultures: English, Slavoni...",...,,Inside classroom,,"One day, there was a boy called John who wante...",,"{'One': 1, 'day': 1, ',': 5, 'there': 1, 'was'...","{'NUM': 2, 'NOUN': 14, 'PUNCT': 16, 'PRON': 17...","{'one': 2, 'day': 1, ',': 5, 'there': 1, 'be':...","[(One, NUM), (day, NOUN), (,, PUNCT), (there, ...","[{'text': 'One', 'pos': 'NUM', 'lemma': 'one',..."
4,Learners,DE_SP_B1_21_12_13_SE,2016,37 / 60,61.7,B1 (lower intermediate),Female,21,Bremen,"Teaching Gym, ESC, Romance Studies: French, En...",...,,Inside classroom,,One day a boy was sitting in his room / uh he ...,,"{'One': 1, 'day': 1, 'a': 8, 'boy': 1, 'was': ...","{'NUM': 1, 'NOUN': 37, 'DET': 19, 'AUX': 14, '...","{'one': 1, 'day': 1, 'a': 8, 'boy': 1, 'be': 9...","[(One, NUM), (day, NOUN), (a, DET), (boy, NOUN...","[{'text': 'One', 'pos': 'NUM', 'lemma': 'one',..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754,Learners,ES_SP_C1_21_15_13_ETC,2016,114 / 120,95.0,C1 (lower advanced),Female,21,Universidad de Granada,Filología (excepto Filología Inglesa),...,,,,okay so one night / uh / this kid had uh was l...,,"{'okay': 1, 'so': 4, 'one': 1, 'night': 1, '/'...","{'INTJ': 9, 'ADV': 9, 'NUM': 1, 'NOUN': 23, 'P...","{'okay': 1, 'so': 4, 'one': 1, 'night': 1, '/'...","[(okay, INTJ), (so, ADV), (one, NUM), (night, ...","[{'text': 'okay', 'pos': 'INTJ', 'lemma': 'oka..."
755,Learners,ES_WR_C2_46_36_13_ARG,2017,55 / 60,91.7,C2 (upper advanced),Male,46,COLEGIO NTRA. SRA. DE LOS DOLORES,,...,4.0,Outside classroom,,"Once upon a time, there was a little boy who l...",,"{'Once': 1, 'upon': 1, 'a': 10, 'time': 1, ','...","{'ADV': 8, 'SCONJ': 5, 'DET': 22, 'NOUN': 30, ...","{'once': 1, 'upon': 1, 'a': 10, 'time': 1, ','...","[(Once, ADV), (upon, SCONJ), (a, DET), (time, ...","[{'text': 'Once', 'pos': 'ADV', 'lemma': 'once..."
756,Learners,ES_WR_C2_32_15_13_IAMT,2017,56 / 60,93.3,C2 (upper advanced),Female,32,La Laguna / UNED,Licenciatura Matemáticas / Grado Estudios Ingl...,...,8.0,Outside classroom,,"Once upon a time, a boy had caught a frog in t...",,"{'Once': 1, 'upon': 1, 'a': 12, 'time': 1, ','...","{'ADV': 5, 'SCONJ': 6, 'DET': 39, 'NOUN': 48, ...","{'once': 1, 'upon': 1, 'a': 12, 'time': 1, ','...","[(Once, ADV), (upon, SCONJ), (a, DET), (time, ...","[{'text': 'Once', 'pos': 'ADV', 'lemma': 'once..."
757,Learners,ES_WR_C2_20_18_13_JIFM,2017,57 / 60,95.0,C2 (upper advanced),Male,20,Universidad de Valladolid,Grado en Estudios Ingleses,...,5.0,Outside classroom,,"One day, there was a wee boy who lived with hi...",,"{'One': 1, 'day': 2, ',': 19, 'there': 4, 'was...","{'NUM': 2, 'NOUN': 42, 'PUNCT': 36, 'PRON': 38...","{'one': 2, 'day': 2, ',': 19, 'there': 4, 'be'...","[(One, NUM), (day, NOUN), (,, PUNCT), (there, ...","[{'text': 'One', 'pos': 'NUM', 'lemma': 'one',..."


In [4]:
data.to_csv('learners_all_frog.csv', sep='\t', encoding='utf-8', index=False)