# PVM Analysis

## Set-up

In [40]:
# Dependencies
import pandas as pd
import numpy as np
import textdistance as td

# Make sure you can see all output
pd.options.display.max_rows = 4000
# pd.options.display.max_columns = 4000

In [41]:
# Store filepath in a variable
df = pd.read_csv("Resources/all_data_pvm_acc3.csv")

df.head()

Unnamed: 0,PID,Target,Production,Prod_Word_Dur,NOTES,Prod_Arpabet,Word_ID,Session_ID,Prod_Word_N,Prod_Phon_N,...,palatal_Acc,glottal_Acc,stop_Acc,fricative_Acc,affricate_Acc,glide_Acc,FeatureWeighted_PhonAcc,PVMWeighted_PhonAcc,Prod_N_Tot_Phonemes,Damerau_Levenshtein
0,15,book,B UH K,0.295646,Article (É) before word,B,1,0,1,1,...,1,1,1,1,1,1,1.0,1.0,3,0
1,15,book,B UH K,0.295646,Article (É) before word,UH,1,0,1,2,...,1,1,1,1,1,1,1.0,1.0,3,0
2,15,book,B UH K,0.295646,Article (É) before word,K,1,0,1,3,...,1,1,1,1,1,1,1.0,1.0,3,0
3,15,ball,B AO L,0.397365,,B,2,0,2,1,...,1,1,1,1,1,1,1.0,1.0,3,0
4,15,ball,B AO L,0.397365,,AO,2,0,2,2,...,1,1,1,1,1,1,1.0,1.0,3,0


## PVM Analyses

In [42]:
# Apply function to determine if change constituted gliding
# Gliding: When /r/ or /l/ are produced as a /w/ or /j/, such as “wabbit" for "rabbit" or "yeyow" for "yellow”)
# 1 = yes; 0 = no
df['gliding'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Manner_Acc'] == 0
        and x['Target_approximant'] == 1
        and x['Prod_glide'] == 1
        else 
        0, 
        axis=1
    )
)

In [43]:
# Apply function to determine if change constituted stopping
# Stopping: When a fricative (e.g., /f/ or /s/) or affricate (/ʧ/ or /ʤ/) is substituted with a stop consonant, such as “pan" for "fan" or "dump for "jump”)
# 1 = yes; 0 = no
df['stopping'] = (
    df
    .apply(
        lambda x: 
            1 
            if x['Manner_Acc'] == 0
            and (
                x['Target_affricate'] == 1 
                or 
                x['Target_fricative'] == 1
                )
            and x['Prod_stop'] == 1
            else 
            0, 
        axis=1
    )
)

In [44]:
# Apply function to determine if change constituted affrication
# Affrication: When a nonaffricate is replaced with an affricate, such as “joor" for "door” 
# 1 = yes; 0 = no
df['affrication'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Manner_Acc'] == 0
        and x['Prod_affricate'] == 1
        and x['Target_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [45]:
# Apply function to determine if change constituted deaffrication
# Deaffrication: When an affricate is replaced with a stop or fricative, such as “ships” for “chips”
# 1 = yes; 0 = no
df['deaffrication'] = (
    df
    .apply(
        lambda x: 
            1 
            if x['Manner_Acc'] == 0
            and x['Target_affricate'] == 1 
            and (
                x['Prod_stop'] == 1
                or
                x['Prod_fricative'] == 1
                )
            else 
            0, 
        axis=1
    )
)

In [46]:
# Apply function to determine if change constituted denasalization
# Denasalization: When a nasal consonant changes to a non-nasal consonant, such as “doze” for “nose”
# 1 = yes; 0 = no
df['denasalization'] = (
    df
    .apply(
        lambda x: 
            1 
            if x['Manner_Acc'] == 0
            and x['Place_Acc'] == 1
            and x['Target_nasal'] == 1
            else 
            0, 
        axis=1
    )
)

In [47]:
# Apply function to determine if change constituted nasalization
# Nasalization: When a non-nasal consonant changes to a nasal consonant, such as "nose" for "doze”
# 1 = yes; 0 = no
df['nasalization'] = (
    df
    .apply(
        lambda x: 
            1 
            if x['Manner_Acc'] == 0
            and x['Place_Acc'] == 1
            and x['Prod_nasal'] == 1
            else 
            0, 
        axis=1
    )
)

In [48]:
# Apply function to determine if change constituted backing
# Backing: When sounds produced forward in the mouth are substituted with sounds produced farther back in the mouth; e.g., alveolar for velar, such as “got” for “dot”
# Note: Only accounts for when consonants replace consonants or vowels replace vowels. Not consonant becomes vowel or vice versa.
# 1 = yes; 0 = no
df['backing'] = (
    df
    .apply(
        lambda x: 
            1 
            if 
                (x['Target_vowel'] == 1
                and x['Prod_vowel'] == 1
                and x['Target_front'] == 1
                and x['Prod_front'] == -1)
            or
                (x['Target_vowel'] == -1
                and x['Prod_vowel'] == -1
                and x['Target_Place_N'] < x['Prod_Place_N'])
            else 
                0, 
        axis=1
    )
)

In [49]:
# Apply function to determine if change constituted fronting
# Fronting: When sounds produced in the backward in the mouth are substituted with sounds produced more forward in the mouth; e.g., alveolar for bilabial, such as "bot" for "dot”
# Note: Only accounts for when consonants replace consonants or vowels replace vowels. Not consonant becomes vowel or vice versa.
# 1 = yes; 0 = no
df['fronting'] = (
    df
    .apply(
        lambda x: 
            1 
            if 
                (x['Target_vowel'] == 1
                and x['Prod_vowel'] == 1
                and x['Target_back'] == 1
                and x['Prod_back'] == -1)
            or
                (x['Target_vowel'] == -1
                and x['Prod_vowel'] == -1
                and x['Target_Place_N'] > x['Prod_Place_N'])
            else 
                0, 
        axis=1
    )
)

In [50]:
# Apply function to determine if change constituted alveolarization
# Alveolarization: When a nonalveolar sound is substituted with an alveolar sound, such as “tu" for "shoe”
# 1 = yes; 0 = no
df['alveolarization'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Place_Acc'] == 0
        and x['Prod_alveolar'] == 1
        and x['Target_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [51]:
# Apply function to determine if change constituted labialization
# Labialization: When a non-bilabial sound is replaced with a bilabial sound, such as “pie" for "tie”
# 1 = yes; 0 = no
df['labialization'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Place_Acc'] == 0
        and x['Prod_labial'] == 1
        and x['Target_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [52]:
# Apply function to determine if change constituted velarization
# Velarization: When a non-velar sound is replaced with a velar sound, such as “kite” for “light”
# 1 = yes; 0 = no
df['velarization'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Place_Acc'] == 0
        and x['Prod_velar'] == 1
        and x['Target_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [53]:
# Apply function to determine if change constituted post-alveolarization
# Post-alveolarization: When a non-post-alveolar sound is replaced with a post-alveolar sound, such as “chair” for “care
# 1 = yes; 0 = no
df['post-alveolarization'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Place_Acc'] == 0
        and x['Prod_post-alveolar'] == 1
        and x['Target_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [54]:
# Apply function to determine if change constituted dentalization
# Dentalization: When a non-dental sound is replaced with a dental sound, such as “teeth” for “thief”
# 1 = yes; 0 = no
df['dentalization'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Place_Acc'] == 0
        and x['Prod_dental'] == 1
        and x['Target_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [55]:
# Apply function to determine if change constituted palatalization
# Palatalization: When a non-palatal sound is replaced with a palatal sound, such as “year” for “rear”
# 1 = yes; 0 = no
df['palatalization'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Place_Acc'] == 0
        and x['Prod_palatal'] == 1
        and x['Target_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [56]:
# Apply function to determine if change constituted glottalization
# Glottalization: When a non-glottal sound is replaced with a glottal sound, such as “here” for “fear”
# 1 = yes; 0 = no
df['glottalization'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Place_Acc'] == 0
        and x['Prod_glottal'] == 1
        and x['Target_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [57]:
# Apply function to determine if change constituted devoicing
# Devoicing: When a voiced production is substituted for a voiceless production, such as “pin” for “bin”
# 1 = yes; 0 = no
df['devoicing'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Voicing_Acc'] == 0
        and x['Prod_voice'] == -1
        and x['Target_vowel'] == -1
        and x['Prod_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [58]:
# Apply function to determine if change constituted voicing
# Voicing: When a voiceless production is substituted for a voiced production, such as “bin” for “pin”
# 1 = yes; 0 = no
df['voicing'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Voicing_Acc'] == 0
        and x['Prod_voice'] == 1
        and x['Target_vowel'] == -1
        and x['Prod_vowel'] == -1
        else 
        0, 
        axis=1
    )
)

In [59]:
# Apply function to determine if change constituted prevocalic voicing
# Prevocalic Voicing: When a voiceless consonant at the preceding a vowel in a syllable like /k/ or /f/ is substituted with a voiced consonant like /g/ or /v/, such as “gup” for "cup”
# 1 = yes; 0 = no
df['prevocalic_voicing'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Voicing_Acc'] == 0
        and x['Target_Syll_Env'] == '#_V'
        and x['Prod_vowel'] == -1
        and x['Prod_voice'] == 1
        else 
        0, 
        axis=1
    )
)

In [60]:
# Apply function to determine if change constituted postvocalic voicing
# Postvocalic Voicing: When a voiceless consonant following a vowel in a syllable like /k/ or /f/ is substituted with a voiced consonant like /g/ or /v/, such as “pod” for "pot”
# 1 = yes; 0 = no
df['postvocalic_voicing'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Voicing_Acc'] == 0
        and x['Target_Syll_Env'] == 'V_#'
        and x['Prod_vowel'] == -1
        and x['Prod_voice'] == 1
        else 
        0, 
        axis=1
    )
)

In [61]:
# Apply function to determine if change constituted final consonant devoicing
# Final Consonant Devoicing: When a voiced consonant at the end of a word like /b/ or /d/ is substituted with a voiceless consonant like /p/ or /t/, such as "pick" for "pig”
# 1 = yes; 0 = no
df['final_consonant_devoicing'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Voicing_Acc'] == 0
        and '_#' in x['Target_Syll_Env']
        and x['Prod_vowel'] == -1
        and x['Prod_voice'] == 1
        else 
        0, 
        axis=1
    )
)

In [62]:
# Apply function to determine if change constituted epenthesis
# Epenthesis: When an extra sound is added to a word, such as “bu-lue" for "blue”
# 1 = yes; 0 = no

df['epenthesis'] = (
    df
    .apply(
        lambda x: 
        1 
        if len(x['Prod_Word_IPA']) > len(x['Target_Word_IPA'])
        else 
        0, 
        axis=1
    )
)

In [63]:
# Apply function to determine if change constituted assimilation
# Assimilation: When a consonant sound starts to sound like another sound in the word, such as “bub" for "bus”
# 1 = yes; 0 = no

df['Target_Word_IPA']=df['Target_Word_IPA'].astype('str')
df['Prod_Word_IPA']=df['Prod_Word_IPA'].astype('str')
df['Target_Phon_IPA']=df['Target_Phon_IPA'].astype('str')

df['assimilation'] = (
    df
    .apply(
        lambda x: 
        1
        if (
            x['Target_Word_IPA'].count(x['Target_Phon_IPA']) <
            x['Prod_Word_IPA'].count(x['Target_Phon_IPA'])
        )
        else 
        0, 
        axis=1
    )
)

In [64]:
# Apply function to determine if change constituted postvocalic assimilation
# Postvocalic Assimilation: When a consonant borrows features from a vowel that follows it in the word production (e.g., becomes more fronted or backed due to frontness of the vowel), such as “school” for “spool”
# 1 = yes; 0 = no

# First, need to determine if the target was a consonant using the 'Target_consonantal' column
# And if it was produced in error based on the 'Phon_Acc' column
# And if its expected to have a vowel following it based on the 'Target_Syll_Env' column (C_V, #_V, or V_V)

# Then, need to determine if the produced consonant has any features that match the following target vowel based on the vowel's height and frontness


In [65]:
# Apply function to determine if change constituted prevocalic assimilation
# Prevocalic Assimilation: When a consonant borrows features from a vowel that precedes it in the word production (e.g., becomes more fronted or backed due to frontness of the vowel), such as “leap for “leak”
# 1 = yes; 0 = no

# First, need to determine if the target was a consonant using the 'Target_consonantal' column
# And if it was produced in error based on the 'Phon_Acc' column
# And if its expected to have a vowel preceding it based on the 'Target_Syll_Env' column (V_C, V_#, or V_V)

# Then, need to determine if the produced consonant has any features that match the preceding target vowel based on the vowel's height and frontness


In [66]:
# Apply function to determine if change constituted coalescence
# Coalescence: When two phonemes are substituted with a different phoneme that still has similar features, such as “fort” for “sport”
# 1 = yes; 0 = no

# Would need to see if target was produced inaccurately and if preceding or succeeding sound is deleted


In [67]:
# Apply function to determine if change constituted reduplication
# Reduplication: When a complete or incomplete syllable is repeated, such as “baba" for "battle”
# 1 = yes; 0 = no

# Not sure our current dataset could do this
# Would need to identify syllable boundaries for each word first


In [68]:
# Apply function to determine if change constituted cluster reduction
# Cluster Reduction: When a consonant cluster is reduced to a single consonant, such as “soon” for “spoon”
# 1 = yes; 0 = no

# First need to determine if target is a part of a cluster
# Then need to determine if the target was deleted


In [69]:
# Apply function to determine if change constituted weak syllable deletion
# Weak Syllable Deletion: When the weak syllable in a word is deleted, such as “nana" for "banana”
# 1 = yes; 0 = no

# Not sure if we can do this with the way the data is currently set up. 
# Would need to identify strong an weak syllables for each word, then tie those syllables to the phonemes

In [70]:
# Apply function to determine if change constituted anticipation
# Anticipation: When a speech sound that occurs later in a word/sentence is produced earlier, such as “cork” for “take my bike”
# 1 = yes; 0 = no

# Would need to identify each word based on its collection of sounds 
# (e.g., fork would be [10,30,2,13], cork would be [13,20,2,13])
# Then, you would need to determine if one of the sounds was repeated (e.g., /k/ is expected to occur once, but it occurs twice)
# And whether the repeated sound happens earlier in the list then expected (position 1, when it should be in position 4)


In [71]:
# Apply function to determine if change constituted preservation
# Preservation: When a speech sound that occurs earlier in a word/sentence is produced later, such as “nine” for “knife”
# 1 = yes; 0 = no

# Would need to identify each word based on its collection of sounds 
# (e.g., knife would be [16,40,10], nine would be [16,40,16])
# Then, you would need to determine if one of the sounds was repeated (e.g., /n/ is expected to occur once, but it occurs twice)
# And whether the repeated sound happens later in the list then expected (position 3, when it should be in position 1)


In [72]:
# Apply function to determine if change constituted a shift
# Shift: When a speech sound that is supposed to occur in one part of the word/sentence is produced at a different part of the word/sentence, such as “poons” for “spoon”
# 1 = yes; 0 = no

def identify_shift(row):
    target_phonemes = list(row['Target_Word_IPA'])  # Assuming IPA representation is a string where each character represents a phoneme
    prod_phonemes = list(row['Prod_Word_IPA'])

    if row['Phon_Acc'] == 0:  # Check if there is an error in production
        target_phoneme = row['Target_Phon_IPA']

        # Check if the target phoneme appears in the produced word
        if target_phoneme in prod_phonemes:
            target_position = row['Target_Phoneme_ID']
            prod_position = prod_phonemes.index(target_phoneme)

            # Check if the target phoneme appears at a different position in the produced word
            if target_position != prod_position:
                return 1

    return 0

df['shift'] = df.apply(identify_shift, axis=1)

# # Would need to identify each word based on its collection of sounds 
# (e.g., spoon would be [19,18,38,16], poons would be [18,38,16,19])
# Then, would need to see if all sounds that should be present are present regardless of position
# Then, would determine whether the order of sounds shifted position, so if +/- 1 position would result in a series of correct positions for more than one sound in the word

In [73]:
# Apply function to determine if change constituted an exchange
# Exchange: When a sound in one part of a word/sentence trades places with a sound in another part of the word/sentence, such as 'call' for 'lock'
# 1 = yes; 0 = no

# # Would need to identify each word based on its collection of sounds 
# (e.g., spoon would be [19,18,38,16], poons would be [18,38,16,19])
# Then, would need to see if all sounds that should be present are present regardless of position



In [74]:
# Apply function to determine if change constituted compound word or syllable reduction
# Compound Word or Syllable Reduction: When a compound word is reduced to a single root word or syllable, such as “lunch” for “lunchbox”
# 1 = yes; 0 = no


In [75]:
# Apply function to determine if change constituted initial consonant deletion
# Initial Consonant Deletion: When the initial consonant in a word is left off, such as “ode” for ”toad”
# 1 = yes; 0 = no

def initial_consonant_deletion(row):
    target_phonemes = list(row['Target_Word_IPA'])  # Get list of phonemes for target
    prod_phonemes = list(row['Prod_Word_IPA']) # Get list of phonemes for production

    # Check if the word-initial phoneme of the target does not appear as the word-initial phoneme in the produced word
    if target_phonemes[0] != prod_phonemes[0]:
        # Now check if that initial phoneme doesn't appear elsewhere in the produced word
        if target_phonemes[0] not in prod_phonemes[1:]:
            return 1

    return 0

df['initial_consonant_deletion'] = df.apply(initial_consonant_deletion, axis=1)

In [76]:
# Apply function to determine if change constituted final consonant deletion
# Final Consonant Deletion: When the final consonant in a word is left off, such as “toe” for ”toad”
# 1 = yes; 0 = no
df['final_consonant_deletion'] = (
    df
    .apply(
        lambda x: 
        1 
        if x['Prod_Last_Phon'] == 1
        and x['Target_Word_Pos'].find('_#') == -1  
        and x['Target_Word_Pos'].find('addition') == -1
        else 
        0, 
        axis=1
    )
)

In [77]:
# Create a column to calculate the total number of missing phonemes from the production based on the target
df['Tot_Deletions'] = (
    df
    .apply(
        lambda x: 
        len(x['Target_Word_IPA']) - len(x['Prod_Word_IPA'])
        if len(x['Prod_Word_IPA']) < len(x['Target_Word_IPA'])
        else 
        0, 
        axis=1
    )
)

In [78]:
# Testing center to make sure functions worked correctly in identifying the phonological process
df[df['shift']==1][['Prod_Word_IPA','Target_Word_IPA','Prod_Phon_IPA', 'Target_Phon_IPA']]

Unnamed: 0,Prod_Word_IPA,Target_Word_IPA,Prod_Phon_IPA,Target_Phon_IPA
20,sɐf.ti,sef.ti.pɪn,,.
26,sɐf.ti,sef.ti.pɪn,,.
31,hæ.mɝ,hæ.mɚ,,.
37,tuʃ.bon,tuθ.brəʃ,,.
55,tuθ.pis,tuθ.brəʃ,,.
71,tuθ.pis,tuθ.brəʃ,,.
81,rʌb,ɪ.re.sɚ,b,r
83,rʌ.bɚn,ɪ.re.sɚ,ʌ,.
84,rʌ.bɚn,ɪ.re.sɚ,,r
86,rʌ.bɚn,ɪ.re.sɚ,ɚ,.
