In [1]:
from phonecodes import phonecodes
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from typing import Iterable, List
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset
from timeit import default_timer as timer
from torch.nn import Transformer
from torch import Tensor
from sklearn.model_selection import train_test_split
import tqdm
import librosa
import seaborn as sns
import torch.nn as nn
import torch
import torch.nn.functional as F
import numpy as np
import math
import os
import pandas as pd
import matplotlib.pyplot as plt
import textgrid
from scipy.spatial.distance import euclidean
import copy

import jiwer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from transformers import AutoProcessor, AutoModelForCTC
from phonemizer.backend.espeak.wrapper import EspeakWrapper
import soundfile as sf

_ESPEAK_LIBRARY = r"C:\Program Files\eSpeak NG\libespeak-ng.dll"
EspeakWrapper.set_library(_ESPEAK_LIBRARY)
processor_P = AutoProcessor.from_pretrained("facebook/wav2vec2-lv-60-espeak-cv-ft")
model_P = AutoModelForCTC.from_pretrained("facebook/wav2vec2-lv-60-espeak-cv-ft")

from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")




Some weights of the model checkpoint at facebook/wav2vec2-lv-60-espeak-cv-ft were not used when initializing Wav2Vec2ForCTC: ['wav2vec2.encoder.pos_conv_embed.conv.weight_g', 'wav2vec2.encoder.pos_conv_embed.conv.weight_v']
- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-lv-60-espeak-cv-ft and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably

In [2]:
human_result_path=r"..\data\test.xlsx"
human_result = pd.read_excel(human_result_path)

In [3]:
human_result_1a=human_result[human_result["Experiment"]=="1a"]


In [54]:
def get_pathset(paths):
    return [os.path.join(dir, each_file) for dir, mid, files in os.walk(paths) for each_file in files if each_file.endswith(".wav")]

def CTC_index(processor,outind):
    meaningful_ids = []
    meaningful_indices = []
    previous_id = -1  
    blank_token_id = processor.tokenizer.pad_token_id  
    for i, token_id in enumerate(outind[0]):  
        if token_id != previous_id and token_id != blank_token_id:
            meaningful_ids.append(token_id.item())  
            meaningful_indices.append(i)  
        previous_id = token_id
    
    return meaningful_indices

def get_set_diphone(paths,model,processor):
    out_dict={}
    english_phonemes = ['<pad>', '<s>', '</s>', '<unk>', 'p', 'b', 't', 'd', 'k', 'g', 'f', 'v', 'θ', 'ð', 's', 'z', 'ʃ', 'ʒ', 
                    'h', 'm', 'n', 'ŋ', 'l', 'ɹ', 'w', 'j', 'tʃ', 'dʒ', 
                    'i', 'ɪ', 'eɪ', 'ɛ', 'æ', 'ɑ', 'ʌ', 'ɔ', 'oʊ', 'ʊ', 'u', 
                    'ɜː', 'ə', 'aɪ', 'aʊ', 'ɔɪ']
    english_phoneme_dict = {k: v for k, v in processor_P.tokenizer.get_vocab().items() if k in english_phonemes}
    #english_phoneme_dict.values()
    for each_sentence in paths:
        tg = textgrid.TextGrid.fromFile(each_sentence[:-3]+"TextGrid")
        tg_sentence = [i for i in tg[0] if i.mark!=""]
        #tg_word = [i for i in tg[1] if i.mark!="" and i.mark!="sp"]

        '''sentence16_end_time=tg_sentence[15].maxTime
        tg_sentence = [i for i in tg_sentence if i.maxTime<=sentence16_end_time]
        tg_word = [i for i in tg_word if i.maxTime<=sentence16_end_time]'''
        
        wave, sr = librosa.load(each_sentence)
        wave_res = librosa.resample(wave, orig_sr=sr, target_sr=16000)
        #wave_res = wave_res[:int(sentence16_end_time*16000)]
        for each_tg in tg_sentence:
            start=round(each_tg.minTime*16000)
            end=round(each_tg.maxTime*16000)
            input=processor(wave_res[start:end],sampling_rate=16000, return_tensors="pt").input_values
            input=input.to(device)
            model.to(device)
            with torch.no_grad():
                out_encoder1=model(input).logits
            
            selected=out_encoder1
            mask = np.ones(selected.shape[-1], dtype=bool)
            mask[list(english_phoneme_dict.values())] = False
            selected[:, :, mask] = 0
            outind=torch.argmax(selected,dim=-1).cpu().numpy()
            
            #outind=torch.argmax(out_encoder1,dim=-1).cpu().numpy()
            transcription = processor.batch_decode(outind)[0].split(" ")
            phonemeindex = CTC_index(processor,outind)
            out_FE=model.wav2vec2.feature_extractor(input)[0].transpose(1,0).cpu().detach().numpy()
            for i in range(len(transcription)-1):
                key = transcription[i] + transcription[i + 1]
                if key not in out_dict:
                    out_dict[key] = []
                out_dict[key].append(np.vstack((out_FE[phonemeindex[i]], out_FE[phonemeindex[i + 1]])))
            torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    return out_dict


def get_training_paths(TrainingTalkerID,all_path):
    path_list=[]
    TalkerID=[]
    for each_ID in TrainingTalkerID.split(", "):
        if each_ID[:3]=="CMN":
            TalkerID.append(f"ALL_{each_ID[-3:]}_M_CMN_ENG_HT1")
        else:
            TalkerID.append(f"ALL_{each_ID[-3:]}_M_ENG_ENG_HT1")
    
    for each_path in TalkerID:
        for i in all_path:
            if each_path in i:
                path_list.append(i)
                break
    
    return path_list


def build_exposure_set(paths, native_dict, set_list, model,processor):
    english_phonemes = ['<pad>', '<s>', '</s>', '<unk>', 'p', 'b', 't', 'd', 
                        'k', 'g', 'f', 'v', 'θ', 'ð', 's', 'z', 'ʃ', 'ʒ', 
                        'h', 'm', 'n', 'ŋ', 'l', 'ɹ', 'w', 'j', 'tʃ', 'dʒ', 
                        'i', 'ɪ', 'eɪ', 'ɛ', 'æ', 'ɑ', 'ʌ', 'ɔ', 'oʊ', 'ʊ', 'u', 
                        'ɜː', 'ə', 'aɪ', 'aʊ', 'ɔɪ']
    english_phoneme_dict = {k: v for k, v in processor_P.tokenizer.get_vocab().items() if k in english_phonemes}
    english_phoneme_dict.values()
    for each_sentence in paths:
        tg = textgrid.TextGrid.fromFile(each_sentence[:-3]+"TextGrid")
        tg_sentence = [i for i in tg[0] if i.mark!=""]
        tg_word = [i for i in tg[1] if i.mark!="" and i.mark!="sp"]
        tg_sentence = [each for _,each in enumerate(tg_sentence) if _ in set_list]
        '''sentence16_end_time=tg_sentence[15].maxTime
        tg_sentence = [i for i in tg_sentence if i.maxTime<=sentence16_end_time]
        tg_word = [i for i in tg_word if i.maxTime<=sentence16_end_time]'''
        
        wave, sr = librosa.load(each_sentence)
        wave_res = librosa.resample(wave, orig_sr=sr, target_sr=16000)
        #wave_res = wave_res[:int(sentence16_end_time*16000)]
        for each_tg in tg_sentence:
            start=round(each_tg.minTime*16000)
            end=round(each_tg.maxTime*16000)
            input=processor(wave_res[start:end],sampling_rate=16000, return_tensors="pt").input_values
            input=input.to(device)
            model.to(device)
            with torch.no_grad():
                out_encoder1=model(input).logits
            selected=out_encoder1
            mask = np.ones(selected.shape[-1], dtype=bool)
            mask[list(english_phoneme_dict.values())] = False
            selected[:, :, mask] = 0
            outind=torch.argmax(selected,dim=-1).cpu().numpy()
            #outind=torch.argmax(out_encoder1,dim=-1).cpu().numpy()
            transcription = processor.batch_decode(outind)[0].split(" ")
            phonemeindex = CTC_index(processor,outind)
            out_FE=model.wav2vec2.feature_extractor(input)[0].transpose(1,0).cpu().detach().numpy()
            for i in range(len(transcription)-1):
                key = transcription[i] + transcription[i + 1]
                if key not in native_dict:
                    native_dict[key] = []
                native_dict[key].append(np.vstack((out_FE[phonemeindex[i]], out_FE[phonemeindex[i + 1]])))
        torch.cuda.empty_cache()
    torch.cuda.empty_cache()
    return native_dict
    #'..\\data\\raw\\ALL_CMN_ENG_HT1\\ALL_032_M_CMN_ENG_HT1.wav'
def get_test_list(file_path,key_word,sentenceID,model,processor):
    english_phonemes = ['<pad>', '<s>', '</s>', '<unk>', 'p', 'b', 't', 'd', 'k', 'g', 'f', 'v', 'θ', 'ð', 's', 'z', 'ʃ', 'ʒ', 
                    'h', 'm', 'n', 'ŋ', 'l', 'ɹ', 'w', 'j', 'tʃ', 'dʒ', 
                    'i', 'ɪ', 'eɪ', 'ɛ', 'æ', 'ɑ', 'ʌ', 'ɔ', 'oʊ', 'ʊ', 'u', 
                    'ɜː', 'ə', 'aɪ', 'aʊ', 'ɔɪ']
    english_phoneme_dict = {k: v for k, v in processor_P.tokenizer.get_vocab().items() if k in english_phonemes}

    sentenceID=int(sentenceID[-3:])-1
    #file_path= f'..\\data\\raw\\ALL_CMN_ENG_HT1\\{file_path[:-5]}.wav'
    
    tg = textgrid.TextGrid.fromFile(file_path[:-3]+"TextGrid")
    tg_sentence = [i for i in tg[0] if i.mark!=""][sentenceID]
    
    tg_word = [i for i in tg[1] if i.mark!="" and i.mark!="sp"]
    
    wave, sr = librosa.load(file_path)
    wave_res = librosa.resample(wave, orig_sr=sr, target_sr=16000)
    

    for each_word_tg in tg_word:
        if each_word_tg.minTime >= tg_sentence.minTime and each_word_tg.maxTime <= tg_sentence.maxTime:
            #print(each_word_tg.mark.lower(),key_word)
            if each_word_tg.mark.lower()==key_word:
                start=each_word_tg.minTime
                end=each_word_tg.maxTime
                break
                #print("start:",start,"end:",end)
    #word_length=len(wave_res)/16000
    out_list=[]
    
    sentence_total_length=tg_sentence.maxTime-tg_sentence.minTime
    word_cut_start=start-tg_sentence.minTime
    word_cut_end=end-tg_sentence.minTime
    
    input=processor(wave_res[int(tg_sentence.minTime*16000):round(tg_sentence.maxTime*16000)], sampling_rate=16000, return_tensors="pt").input_values.to(device)
    with torch.no_grad():
        out_encoder=model(input.to(device)).logits
        out_FE=model.wav2vec2.feature_extractor(input)[0].transpose(1,0).cpu().numpy()
    
    word_start=round(out_encoder.shape[1]*word_cut_start/sentence_total_length)
    word_end=round(out_encoder.shape[1]*word_cut_end/sentence_total_length)
    
    selected=out_encoder[:,word_start:word_end,:]
    mask = np.ones(selected.shape[-1], dtype=bool)
    mask[list(english_phoneme_dict.values())] = False
    selected[:, :, mask] = 0
    outind=torch.argmax(selected,dim=-1).cpu().numpy()
    phonemeindex = CTC_index(processor,outind)
    transcription = processor_P.batch_decode(outind)[0].split(" ")

    
    if len(phonemeindex)<2:
        each_FE = out_FE[word_start:,:]
        selected=out_encoder[:,word_start:,:]
        mask = np.ones(selected.shape[-1], dtype=bool)
        mask[list(english_phoneme_dict.values())] = False
        selected[:, :, mask] = 0
        outind=torch.argmax(selected,dim=-1).cpu().numpy()
        phonemeindex = CTC_index(processor,outind)
        transcription = processor_P.batch_decode(outind)[0].split(" ")
        
        diphone_key = transcription[0] + transcription[0 + 1]
        out_list.append((diphone_key, np.vstack((each_FE[phonemeindex[0]], each_FE[phonemeindex[0 + 1]]))))

    else:
        each_FE = out_FE[word_start:word_end,:]
        for i in range(len(transcription)-1):
            diphone_key = transcription[i] + transcription[i + 1]
            out_list.append((diphone_key, np.vstack((each_FE[phonemeindex[i]], each_FE[phonemeindex[i + 1]]))))
    torch.cuda.empty_cache()
    return out_list


In [42]:
ALL_ENG_ENG_path=r"..\data\raw_L1"
ALL_ENG_ENG_pathset=get_pathset(ALL_ENG_ENG_path)
ALL_ENG_ENG_dict = get_set_diphone(ALL_ENG_ENG_pathset, model_P, processor_P)

In [25]:
all_path=get_pathset(r"..\data\raw")
all_ENG_ENG_pathset=[s.replace("raw_L1", "raw") for s in get_pathset(r"..\data\raw_L1")]
training_talker_loc=human_result_1a.columns.get_loc("TrainingTalkerID")
TrainingTalkerID = human_result_1a.values[0][training_talker_loc]

if get_training_paths(TrainingTalkerID,all_path)[0] in all_ENG_ENG_pathset:
    print(1)

1


In [35]:
each_sentence=all_ENG_ENG_pathset[19]
tg = textgrid.TextGrid.fromFile(each_sentence[:-3]+"TextGrid")
set1_list=[0,1,2,3,4,5,6,7,8,9,10,12,13,14,15,16]
set2_list=[17,18,19,20,21,22,24,25,26,27,28,29,30,31,37,40]
tg_sentence = [i for i in tg[0] if i.mark!=""]
tg_sentence = [each for _,each in enumerate(tg_sentence) if _ in set1_list]

In [45]:
TrainingTalkerID

'ENG_M_070, ENG_M_133, ENG_M_066, ENG_M_131, ENG_M_055'

In [24]:
get_test_list(test_file[0], key_word, sentenceID, model, processor)

'..\\data\\raw_L1\\ALL_ENG_ENG_HT1\\ALL_070_M_ENG_ENG_HT1.wav'

In [22]:

filename_loc=df.columns.get_loc("Filename")
test_file = [each for each in all_path if os.path.split(human_result_1a.values[2423][filename_loc])[-1][:-5] in each]
training_files_path=get_training_paths(TrainingTalkerID,all_path)[0]
training_files_path=get_training_paths(TrainingTalkerID,all_path)
training_dict=build_exposure_set(training_files_path, all_eng_dict,train_set, model, processor)

'..\\data\\raw\\ALL_ENG_ENG_HT1\\ALL_070_M_ENG_ENG_HT1.wav'

In [57]:
df=human_result_1a
each_=df.values[2423]
filename_loc=df.columns.get_loc("Filename")
keyword_loc=df.columns.get_loc("Keyword")
training_talker_loc=df.columns.get_loc("TrainingTalkerID")

all_path=get_pathset(r"..\data\raw")
all_ENG_ENG_pathset=get_pathset(r"..\data\raw_L1")

set1_list=[0,1,2,3,4,5,6,7,8,9,10,12,13,14,15,16]
set2_list=[17,18,19,20,21,22,24,25,26,27,28,29,30,31,37,40]
if each_[df.columns.get_loc("TrainingTestSet")] == "set2,set1":
    train_set=set2_list
    test_set=set1_list
else:
    train_set=set1_list
    test_set=set2_list

#print(each_[filename_loc])
test_file = [each for each in all_path if os.path.split(each_[filename_loc])[-1][:-5] in each]
#print(test_file)
key_word = each_[keyword_loc] #string
TrainingTalkerID = each_[training_talker_loc] #list of string
sentenceID = each_[df.columns.get_loc("SentenceID")]
training_files_path=get_training_paths(TrainingTalkerID,all_path)
training_dict=build_exposure_set(training_files_path, copy.deepcopy(ALL_ENG_ENG_dict),train_set, model_P, processor_P)
test_list= get_test_list(test_file[0],key_word,sentenceID,model_P,processor_P)# word level, list

In [62]:
for i in [i[0] for i in test_list]:
    print(i in training_dict.keys())

False
True
True
True
True
True
True


In [56]:
human_result_1a.values[2423]

array(['Big dogs can be dangerous.', '2e2d58024ab7c3cd1300ffbfa25b8e08',
       'test', 6, 2, 'talker_CMN_M_035/ALL_035_M_CMN_ENG_HT1_S003',
       'big dogs can be dangerous', '5Acc_Diff1Acc', 'set2,set1', 'a',
       nan, 35, 'ALL_035_M_CMN', 5, 'at_work', 'no', nan, '5Acc_Diff1Acc',
       nan, 'Phillipines', 'monolingual', 'I have only lived in America.',
       'a', 41.0, 'NonHisp', 'RSRB00045955', 'white;', nan, 'Male', '5',
       '2017-02-06T15:30:45Z', 'yes', 373, 'HT1_S003',
       'big, dogs, be, dangerous', 4, 51, 1.0, 4,
       'Multi-talker\ntraining', 'Multi-talker', 'month', 'excellent',
       'headphones', 'in-ear', 35, 'CMN_M_035', False, nan,
       'CMN_M_016, CMN_M_043, CMN_M_037, CMN_M_021, CMN_M_032', 'no',
       'no', 'yes', 'no', 'Test talker 5', 'dangerous',
       'big dogs can be dangerous', 1, '1a', 'Multi-talker', 0, 1, 0,
       'Multi-talker', -0.25, 0.5, 0.25, 0.3682950401014196,
       0.2481983465853671], dtype=object)

In [49]:
filename_loc=human_result_1a.columns.get_loc("Filename")
test_file = [each for each in all_path if os.path.split(human_result_1a.values[2423][filename_loc])[-1][:-5] in each]
test_file

['..\\data\\raw\\ALL_CMN_ENG_HT1\\ALL_035_M_CMN_ENG_HT1.wav']

In [63]:
def sim_measure(df,all_eng_dict, model, processor):
    sim_mean_max_list,sim_mean_std_list,sim_mean_mean_list,isincluded_list,diphone_count =[], [], [], [], []
    
    train_set_dict={}
    test_word_dict={}
    
    for each_ in tqdm.tqdm(df.values):
        filename_loc=df.columns.get_loc("Filename")
        keyword_loc=df.columns.get_loc("Keyword")
        training_talker_loc=df.columns.get_loc("TrainingTalkerID")
        
        all_path=get_pathset(r"..\data\raw")
        all_ENG_ENG_pathset=[s.replace("raw_L1", "raw") for s in get_pathset(r"..\data\raw_L1")]
        
        set1_list=[0,1,2,3,4,5,6,7,8,9,10,12,13,14,15,16]
        set2_list=[17,18,19,20,21,22,24,25,26,27,28,29,30,31,37,40]
        if each_[df.columns.get_loc("TrainingTestSet")] == "set2,set1":
            train_set=set2_list
            test_set=set1_list
        else:
            train_set=set1_list
            test_set=set2_list
        
        #print(each_[filename_loc])
        test_file = [each for each in all_path if os.path.split(each_[filename_loc])[-1][:-5] in each]
        #print(test_file)
        key_word = each_[keyword_loc] #string
        TrainingTalkerID = each_[training_talker_loc] #list of string
        sentenceID = each_[df.columns.get_loc("SentenceID")]
        training_files_path=get_training_paths(TrainingTalkerID,all_path)
        
        if training_files_path[0] in all_ENG_ENG_pathset:
            training_dict=copy.deepcopy(all_eng_dict)
        else:
            if TrainingTalkerID not in train_set_dict:
                train_set_dict[TrainingTalkerID]={}
                
            if each_[df.columns.get_loc("TrainingTestSet")] not in train_set_dict[TrainingTalkerID]:
                training_dict=build_exposure_set(training_files_path, copy.deepcopy(all_eng_dict), train_set, model, processor)
                train_set_dict[TrainingTalkerID][each_[df.columns.get_loc("TrainingTestSet")]]=copy.deepcopy(training_dict)
            else:
                training_dict=train_set_dict[TrainingTalkerID][each_[df.columns.get_loc("TrainingTestSet")]]

    
        if test_file[0] not in test_word_dict:
            test_word_dict[test_file[0]]={}
        if sentenceID not in test_word_dict[test_file[0]]:
            test_word_dict[test_file[0]][sentenceID]={}
        if key_word not in test_word_dict[test_file[0]][sentenceID]:
            test_list = get_test_list(test_file[0], key_word, sentenceID, model, processor)
            test_word_dict[test_file[0]][sentenceID][key_word]=copy.deepcopy(test_list)
        else:
            test_list=test_word_dict[test_file[0]][sentenceID][key_word]
            
        # word level, list
        sim_max=[]
        sim_std=[]
        sim_mean=[]
        isincluded=[]
        #sim_count=[]
        for each_diphone in test_list:
            
            sims=[]
            if each_diphone[0] in training_dict.keys():
                isincluded.append(1)
                
                for each_vec in training_dict[each_diphone[0]]:
                    d=euclidean(each_diphone[1].ravel(),each_vec.ravel())
                    sim=np.exp(-0.1*d)
                    sims.append(sim)
                    #sims.append(0)
            else:
                isincluded.append(0)
                sims.append(0)
            #sim_count.append(len(sim))
            sim_max.append(np.max(sims))
            sim_std.append(np.std(sims))
            sim_mean.append(np.mean(sims))
            
            
        sim_mean_max=np.mean(sim_max)
        sim_mean_std=np.mean(sim_std)
        sim_mean_mean=np.mean(sim_mean)
        sim_mean_max_list.append(sim_mean_max)
        sim_mean_std_list.append(sim_mean_std)
        sim_mean_mean_list.append(sim_mean_mean)
        isincluded_list.append(np.count_nonzero(isincluded))
        diphone_count.append(len(isincluded))
        
    return sim_mean_max_list,sim_mean_std_list,sim_mean_mean_list,isincluded_list,diphone_count



In [64]:
sim_mean_max_list,sim_mean_std_list,sim_mean_mean_list,isincluded_list,diphone_count=sim_measure(human_result_1a,ALL_ENG_ENG_dict,model_P, processor_P)

100%|██████████| 16477/16477 [12:00<00:00, 22.88it/s] 


In [65]:
human_result_1a["sim_mean_max"]=sim_mean_max_list
human_result_1a["sim_mean_std"]=sim_mean_std_list
human_result_1a["sim_mean_mean"] = sim_mean_mean_list
human_result_1a["diphone_overlapped"]=isincluded_list
human_result_1a["NumDiphone_word"]=diphone_count
human_result_1a.to_excel('similarities.xlsx')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  human_result_1a["sim_mean_max"]=sim_mean_max_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  human_result_1a["sim_mean_std"]=sim_mean_std_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  human_result_1a["sim_mean_mean"] = sim_mean_mean_list
A value is trying to be set on a copy of a slice fr