In [72]:
import torch
import torchaudio
from datasets import load_dataset, load_metric
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import re
import unidecode

In [73]:
lang_code = "rw"
language = "kinyarwanda"
model = f"lucio/wav2vec2-large-xlsr-{language}"

In [74]:
import numpy as np



In [75]:

processor = Wav2Vec2Processor.from_pretrained(model)
model = Wav2Vec2ForCTC.from_pretrained(model)
model.to("cuda")

wer = load_metric("wer")


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=158.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=268.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=138.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=85.0, style=ProgressStyle(description_w…




Special tokens have been added in the vocabulary, make sure the associated word embedding are fine-tuned or trained.


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1600.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1262065047.0, style=ProgressStyle(descr…




In [76]:
## Text pre-processing


chars_to_ignore_regex = r'[!"#$%&()*+,./:;<=>?@\[\]\\_{}|~£¤¨©ª«¬®¯°·¸»¼½¾ðʺ˜˝ˮ‐–—―‚“”„‟•…″‽₋€™−√�]'
chars_to_ignore_pattern = re.compile(chars_to_ignore_regex)

def remove_special_characters(batch):
    batch["text"] = re.sub(r'[ʻʽʼ‘’´`]', r"'", batch["sentence"])
    batch["text"] = re.sub(chars_to_ignore_regex, "", batch["text"]).lower().strip()
    batch["text"] = re.sub(r"(-|' | '|  +)", " ", batch["text"])
    batch["text"] = unidecode.unidecode(batch["text"])
    batch["length"] = len(batch["text"])
    return batch


## Audio pre-processing
resampler = torchaudio.transforms.Resample(48_000, 16_000)

def speech_file_to_array_fn(batch):
    speech_array, sampling_rate = torchaudio.load(batch["path"])
    batch["speech"] = resampler(speech_array).squeeze().numpy()
    batch["sampling_rate"] = 16_000
    return batch


# Text transformation and audio resampling
def cv_prepare(batch):
    batch = remove_special_characters(batch)
    batch = speech_file_to_array_fn(batch)

    return batch

# Number of CPUs or None
num_proc = 8



In [78]:
test_dataset = load_dataset("common_voice", lang_code, split="test", cache_dir=f"/workspace/raw_data/{lang_code}")

test_dataset = test_dataset.map(cv_prepare, num_proc=1)

Reusing dataset common_voice (/workspace/raw_data/rw/common_voice/rw/6.1.0/0041e06ab061b91d0a23234a2221e87970a19cf3a81b20901474cffffeb7869f)


HBox(children=(FloatProgress(value=0.0, max=15724.0), HTML(value='')))




In [79]:
def evaluate(batch):
    inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)

    with torch.no_grad():
        logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits

    pred_ids = torch.argmax(logits, dim=-1)
    batch["pred_strings"] = processor.batch_decode(pred_ids)
    return batch

result = test_dataset.map(evaluate, batched=True, batch_size=8)


HBox(children=(FloatProgress(value=0.0, max=1966.0), HTML(value='')))

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [84]:
Counter(zip(test_dataset['age'], test_dataset['gender']))

Counter({('', ''): 6831,
         ('twenties', 'male'): 3716,
         ('twenties', 'female'): 2670,
         ('thirties', 'male'): 336,
         ('fourties', 'male'): 5,
         ('fourties', 'female'): 7,
         ('teens', 'other'): 35,
         ('teens', 'female'): 1579,
         ('twenties', 'other'): 71,
         ('teens', 'male'): 474})

In [83]:
# WER Metric computation

import jiwer

def chunked_wer(targets, predictions, chunk_size=None):                                          
    if chunk_size is None: return jiwer.wer(targets, predictions)                                
    start = 0                                                                                    
    end = chunk_size                                                                             
    H, S, D, I = 0, 0, 0, 0                                                                      
    while start < len(targets):                                                                  
        chunk_metrics = jiwer.compute_measures(targets[start:end], predictions[start:end])       
        H = H + chunk_metrics["hits"]                                                            
        S = S + chunk_metrics["substitutions"]                                                   
        D = D + chunk_metrics["deletions"]                                                       
        I = I + chunk_metrics["insertions"]                                                      
        start += chunk_size                                                                      
        end += chunk_size                                                                        
    return float(S + D + I) / float(H + S + D)

print("WER: {:2f}".format(100 * chunked_wer(result["text"], result["pred_strings"], chunk_size=4000)))


WER: 46.040170


In [81]:
female_result = result.filter(lambda example: example["gender"] == "female")
print("Female data WER: {:2f}".format(100 * chunked_wer(predictions=female_result["pred_strings"], targets=female_result["text"], chunk_size=4000)))

male_result = result.filter(lambda example: example["gender"] == "male")
print("Male data WER: {:2f}".format(100 * chunked_wer(predictions=male_result["pred_strings"], targets=male_result["text"], chunk_size=4000)))

unk_result = result.filter(lambda example: example["gender"] == "")
print("UNK gender data WER: {:2f}".format(100 * chunked_wer(predictions=unk_result["pred_strings"], targets=unk_result["text"], chunk_size=4000)))

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))


Female data WER: 36.208978


HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))


Male data WER: 46.595275


HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))


UNK gender data WER: 52.137815


In [82]:
list(zip(result[:10]['text'], result[:10]['pred_strings']))

[("yaherukaga gukora igitaramo nk'iki mu mujyi wa namur mu bubiligi ",
  'yaherukagu gukora igitaramo yuki mugi wa na nkurmu biliki'),
 ('ibi rero ntibizashoboka kandi nawe arabizi ',
  "inyo rero ntibizashoboka guhandi n'unkurabizi"),
 ('yakomeje agira ati turateganya ko hazakomeza kubaho no mu myaka izakurikira ',
  'yakomeje agira ati turateganya ko azakomeza kubaho no mu myaka izakurikira'),
 ('iki kigo cyakira abaturage bo mu turere twa gisagara mu mirenge ya kigembe na nyanza ',
  'iti kiga cyakira abaturage bo mu turere twagisagara umirinde ya kigemde na nyanza'),
 ('mama yambyukije murukerera ', 'nyuma yapyukije mu ikerera'),
 ('uwo muhungu wamubikiraga aravuga ati komera ',
  'uwo muhungu wamubikiraga aravuga ati komera k'),
 ('friso wamaze igihe kingana gityo ataragarura ubwenge ',
  'firiso wamaze igihe kingana gutyo ataragarura ubwenge'),
 ('ndetse ngo namwifurize umunsi mwiza ',
  'ndetse ngo namwifurije umunsi mwiza'),
 ("kw'ibumoso ", "w'ibukozo"),
 ('kanda hano wumve in

In [86]:
unk_result = result.filter(lambda example: example["down_votes"] == 0)
print("good data WER: {:2f}".format(100 * chunked_wer(predictions=unk_result["pred_strings"], targets=unk_result["text"], chunk_size=4000)))

HBox(children=(FloatProgress(value=0.0, max=16.0), HTML(value='')))


good data WER: 43.545790


In [22]:


def extract_all_chars(batch):
    all_text = " ".join(batch["text"])
    vocab = list(set(all_text))
    return {"vocab": [vocab], "all_text": [all_text]}

def strip_accents(batch):
   batch["sentence"] = unidecode.unidecode(batch["sentence"])


In [7]:
valid_dataset = load_dataset("common_voice", "rw", split="validation", cache_dir="/workspace/raw_data/rw")
valid_dataset = valid_dataset.map(remove_special_characters, remove_columns=['path'], num_proc=4)


Reusing dataset common_voice (/workspace/raw_data/rw/common_voice/rw/6.1.0/0041e06ab061b91d0a23234a2221e87970a19cf3a81b20901474cffffeb7869f)


    

HBox(children=(FloatProgress(value=0.0, description='#2', max=3758.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='#3', max=3758.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='#1', max=3758.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='#0', max=3758.0, style=ProgressStyle(description_width='i…







In [9]:
len(valid_dataset), valid_dataset[0]

(15032,
 {'accent': '',
  'age': 'teens',
  'client_id': '5e85947436bbc805e503bf926beb533b07da07cef77c491a476c8cd1d9bce7b9a4cdc8171564447ea1d2c9c591a203b2e37545a4c8a8f7d0b2881e9e64441e5b',
  'down_votes': 1,
  'gender': 'male',
  'locale': 'rw',
  'segment': "''",
  'sentence': 'Umuntungo we ntabwo aba agomba kuwumarira mu gushakisha imyenda yo kwambara.',
  'text': 'umuntungo we ntabwo aba agomba kuwumarira mu gushakisha imyenda yo kwambara ',
  'up_votes': 2})

In [87]:
train_dataset = load_dataset("common_voice", "rw", split="train", cache_dir="/workspace/raw_data/rw")
train_dataset = train_dataset.map(remove_special_characters, remove_columns=['path'], num_proc=4)

Reusing dataset common_voice (/workspace/raw_data/rw/common_voice/rw/6.1.0/0041e06ab061b91d0a23234a2221e87970a19cf3a81b20901474cffffeb7869f)


 

Loading cached processed dataset at /workspace/raw_data/rw/common_voice/rw/6.1.0/0041e06ab061b91d0a23234a2221e87970a19cf3a81b20901474cffffeb7869f/cache-f26b4b6f0d41515a.arrow


 

Loading cached processed dataset at /workspace/raw_data/rw/common_voice/rw/6.1.0/0041e06ab061b91d0a23234a2221e87970a19cf3a81b20901474cffffeb7869f/cache-2064fc16cd638593.arrow


 

Loading cached processed dataset at /workspace/raw_data/rw/common_voice/rw/6.1.0/0041e06ab061b91d0a23234a2221e87970a19cf3a81b20901474cffffeb7869f/cache-ce61ebe019cae2d3.arrow


 

Loading cached processed dataset at /workspace/raw_data/rw/common_voice/rw/6.1.0/0041e06ab061b91d0a23234a2221e87970a19cf3a81b20901474cffffeb7869f/cache-8cec50e0a7e00cde.arrow


In [90]:
clean_data = train_dataset.filter(lambda x: x['down_votes'] == 0)
len(clean_data)

HBox(children=(FloatProgress(value=0.0, max=516.0), HTML(value='')))




442503

In [93]:
len(train_dataset), clean_data[100]

(515197,
 {'accent': '',
  'age': 'twenties',
  'client_id': 'd4439c64c8f13b84cd2ce31d5d9eeae2a81147d89abb00cebaaf11b60b7166c24dd257a44e73c72c73c93cae29d904bed135824aca06e5970e001e9406e8a891',
  'down_votes': 0,
  'gender': 'male',
  'length': 38,
  'locale': 'rw',
  'segment': "''",
  'sentence': 'kugira ngo inama za komisiyo ziterane',
  'text': 'kugira ngo inama za komisiyo ziterane ',
  'up_votes': 2})

In [None]:
len(test_dataset), max(test_dataset['length']), len(test_dataset[0]['speech']), test_dataset[0]

In [58]:
len(test_dataset), sum(test_dataset['length'])/len(test_dataset), test_dataset[0]

(515197,
 50.70219352985363,
 {'accent': '',
  'age': 'twenties',
  'client_id': 'd4439c64c8f13b84cd2ce31d5d9eeae2a81147d89abb00cebaaf11b60b7166c24dd257a44e73c72c73c93cae29d904bed135824aca06e5970e001e9406e8a891',
  'down_votes': 1,
  'gender': 'male',
  'length': 53,
  'locale': 'rw',
  'segment': "''",
  'sentence': 'akunda u rwanda cyane cyane ku byerekeye isuku ihaba',
  'text': 'akunda u rwanda cyane cyane ku byerekeye isuku ihaba ',
  'up_votes': 2})

In [19]:
from collections import Counter 

Counter(zip(valid_dataset['age'], valid_dataset['gender']))

NameError: name 'valid_dataset' is not defined

In [92]:
Counter(zip(clean_data['age'], clean_data['gender']))

Counter({('twenties', 'male'): 117836,
         ('teens', 'female'): 32882,
         ('thirties', 'male'): 49188,
         ('twenties', ''): 39584,
         ('thirties', 'female'): 18498,
         ('twenties', 'female'): 128450,
         ('teens', 'male'): 53338,
         ('', ''): 2727})

In [55]:
vocab = train_dataset.map(
    extract_all_chars,
    batched=True,
    batch_size=-1,
    keep_in_memory=True,
    remove_columns=train_dataset.column_names,
)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [19]:
vocab_test = test_dataset.map(
    extract_all_chars,
    batched=True,
    batch_size=-1,
    keep_in_memory=True,
    remove_columns=test_dataset.column_names,
)

NameError: name 'test_dataset' is not defined

In [56]:
from collections import Counter
cnts = Counter(vocab[0]['all_text'])
cnts

Counter({'a': 3648720,
         'k': 991786,
         'u': 1878946,
         'n': 1462878,
         'd': 287040,
         ' ': 4185822,
         'r': 1332884,
         'w': 654944,
         'c': 179634,
         'y': 998491,
         'e': 1489157,
         'b': 1266316,
         'i': 2363305,
         's': 555158,
         'h': 569783,
         'm': 1030066,
         'l': 74865,
         'z': 499214,
         'g': 847924,
         'f': 100863,
         'j': 135886,
         'o': 1129970,
         't': 619214,
         "'": 155269,
         'v': 97299,
         'p': 78286,
         'x': 1681,
         'q': 1413})

In [46]:
import unidecode
[(unidecode.unidecode(k), cnts[k]) for k in cnts if cnts[k] < 100]

[('c', 11),
 ('e', 53),
 ('a', 34),
 ('o', 80),
 ('i', 62),
 ('e', 32),
 ('i', 75),
 ('', 1),
 ('oe', 15),
 ('s', 6),
 ('s', 5),
 ('n', 6),
 ('', 7),
 ('c', 93),
 ('a', 26),
 ('e', 3),
 ('a', 5),
 ('v', 4),
 ('o', 20),
 ('i', 69),
 ('u', 29),
 ('u', 84),
 ('o', 45),
 ('y', 4),
 ('i', 15),
 ("s'", 2),
 ('a', 98),
 ('g', 5),
 ('u', 34),
 ('u', 1),
 ('e', 30),
 ('l', 3),
 ('u', 7),
 ('o', 57),
 ('o', 5),
 ('h', 6),
 ('i', 2),
 ('i', 3),
 ('o', 2),
 ('ss', 1),
 ('ae', 4),
 ('z', 2),
 ('k', 5),
 ('c', 4),
 ('z', 2),
 ('i', 9),
 ('r', 1),
 ('', 1),
 ('m', 1),
 ('s', 1),
 ('a', 3),
 ('u', 7),
 ('o', 2),
 ('ie', 1),
 ('e', 1),
 ('i', 2),
 ('a', 1),
 ('t', 1),
 ('b', 1),
 ('ae', 1),
 ('sh', 1),
 ('a', 1),
 ('o', 2),
 ('u', 1),
 ('z', 1),
 ('f', 1),
 ('fi', 1),
 ('', 2)]

In [57]:
list(zip(train_dataset['sentence'][:100], train_dataset['text'][:100]))

[('akunda u rwanda cyane cyane ku byerekeye isuku ihaba',
  'akunda u rwanda cyane cyane ku byerekeye isuku ihaba '),
 ('cy icyuma kandi libani izagwa f igushijwe n umunyambaraga',
  'cy icyuma kandi libani izagwa f igushijwe n umunyambaraga '),
 ('mu nkuge babijugunya mu nyanja ngo boroshye',
  'mu nkuge babijugunya mu nyanja ngo boroshye '),
 ('ubwoko bw ibikorwa isosiyete ifitemo',
  'ubwoko bw ibikorwa isosiyete ifitemo '),
 ('uti «ubutungane bwanjye ni bwo bwateye uhoraho kunzana muri iki gihugu ngo nkigarurire. ubwigomeke bw’ayo mahanga ni bwo bwateye uhoraho kubanyaga ibyabo ngo abiguhe.',
  "uti ubutungane bwanjye ni bwo bwateye uhoraho kunzana muri iki gihugu ngo nkigarurire ubwigomeke bw'ayo mahanga ni bwo bwateye uhoraho kubanyaga ibyabo ngo abiguhe "),
 ('maso hanjye ho nta wushobora kuhabona',
  'maso hanjye ho nta wushobora kuhabona '),
 ('nsenga maze umumarayika arambwira ngo ngutumeho',
  'nsenga maze umumarayika arambwira ngo ngutumeho '),
 ('mbere ku isoko ry imari n 

In [47]:
import unicodedata
unicodedata.name('́')

'COMBINING ACUTE TONE MARK'

In [67]:
processor.tokenizer.get_vocab()

{'y': 0,
 'd': 1,
 's': 2,
 'n': 3,
 'c': 4,
 'l': 5,
 'h': 6,
 'w': 7,
 'u': 8,
 'z': 9,
 't': 11,
 'g': 12,
 'j': 13,
 'm': 14,
 'o': 15,
 'e': 16,
 'k': 17,
 'r': 18,
 'i': 19,
 'b': 20,
 'f': 21,
 'a': 22,
 'v': 23,
 'p': 24,
 '|': 10,
 '[UNK]': 25,
 '[PAD]': 26,
 '<s>': 27,
 '</s>': 28}

In [77]:
from transformers.trainer_utils import get_last_checkpoint
import os
import logging

logger = logging.getLogger(__name__)

In [71]:
get_last_checkpoint("/workspace/checkpoints/lg/wav2vec2-large-xlsr-lg-augment")

'/workspace/checkpoints/lg/wav2vec2-large-xlsr-lg-augment/checkpoint-4096'

In [75]:
output_dir="/workspace/checkpoints/lg/wav2vec2-large-xlsr-lg-augment/" 

In [82]:
last_checkpoint = None
if os.path.isdir(output_dir) and True and not False:
    print(f"Looking for last checkpoint in {output_dir}")
    last_checkpoint = get_last_checkpoint(output_dir)
    if last_checkpoint is None and len(os.listdir(output_dir)) > 0:
        raise ValueError(
            f"Output directory ({output_dir}) already exists and is not empty. "
            "Use --overwrite_output_dir to overcome."
        )
    elif last_checkpoint is not None:
        print(
            f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
            "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
        )

Looking for last checkpoint in /workspace/checkpoints/lg/wav2vec2-large-xlsr-lg-augment/
Checkpoint detected, resuming training at /workspace/checkpoints/lg/wav2vec2-large-xlsr-lg-augment/checkpoint-4096. To avoid this behavior, change the `--output_dir` or add `--overwrite_output_dir` to train from scratch.


In [81]:
os.path.isdir(output_dir) and True and not False

True

In [152]:
import librosa
import torchaudio
import soundfile
from audiomentations import Compose, AddGaussianNoise, Gain, PitchShift, Shift

resampler = torchaudio.transforms.Resample(48_000, 16_000)

augment = Compose([
    AddGaussianNoise(min_amplitude=0.0001, max_amplitude=0.01, p=1),
    PitchShift(min_semitones=-3, max_semitones=3, p=0.8),
    Gain(min_gain_in_db=-6, max_gain_in_db=6, p=0.8),
    Shift(min_fraction=-0.5, max_fraction=0.5, p=0.8),
])

def speech_file_to_array_fn_torchaudio(batch):
    speech_array, sampling_rate = torchaudio.load(batch["path"])
    batch["speech"] = resampler(speech_array).squeeze().numpy()
    batch["sampling_rate"] = 16_000
    return batch

def speech_file_to_array_fn_librosa(batch):
    speech_array, sample_rate = torchaudio.load(batch["path"])
    batch["speech"] = librosa.resample(speech_array.squeeze().numpy(), sample_rate, 16_000)
    return batch

def augmented_speech_file_to_array_fn(batch):
    try:
        speech_array, sampling_rate = soundfile.read(batch["path"] + "-augmented.wav")
    except:
        speech_array, sampling_rate = torchaudio.load(batch["path"])
        speech_array = resampler(speech_array)
        speech_array = augment(samples=speech_array, sample_rate=sampling_rate).squeeze()
        soundfile.write(batch["path"]+"-augmented.wav", speech_array, sampling_rate, subtype='PCM_24')

    batch["speech"] = speech_array
    batch["sampling_rate"] = 16_000
    return batch

In [153]:
speech_file_to_array_fn_librosa(test_dataset[23])

{'accent': '',
 'age': '',
 'client_id': 'a0c75a2b3ef19a055ebcc7587ebf341614373519ceb63f67e67e4eae0eb4858dc2031cd42b08b389168e6c84bce198dc8f7405fdb443195eaaf6cd7dc981425c',
 'down_votes': 1,
 'gender': '',
 'locale': 'rw',
 'path': '/workspace/raw_data/rw/downloads/extracted/6755b117c96a17977cf94d8a118d06e13daa8d123092dfaa6ccb5bd13f06e60f/cv-corpus-6.1-2020-12-11/rw/clips/common_voice_rw_22948194.mp3',
 'segment': "''",
 'sentence': 'ni uko batari bagiha agaciro isanduku n’ibyo yavuze ku ri bo',
 'up_votes': 2,
 'speech': array([ 0.        ,  0.        ,  0.        , ..., -0.00116267,
        -0.00045365, -0.00068105], dtype=float32)}

In [154]:
augmented_speech_file_to_array_fn(test_dataset[24])

{'accent': '',
 'age': '',
 'client_id': 'a0c75a2b3ef19a055ebcc7587ebf341614373519ceb63f67e67e4eae0eb4858dc2031cd42b08b389168e6c84bce198dc8f7405fdb443195eaaf6cd7dc981425c',
 'down_votes': 1,
 'gender': '',
 'locale': 'rw',
 'path': '/workspace/raw_data/rw/downloads/extracted/6755b117c96a17977cf94d8a118d06e13daa8d123092dfaa6ccb5bd13f06e60f/cv-corpus-6.1-2020-12-11/rw/clips/common_voice_rw_22948195.mp3',
 'segment': "''",
 'sentence': 'amata kwera zari zikeye mu maso kurusha',
 'up_votes': 2,
 'speech': array([-0.43122205, -0.42071682, -0.4279239 , ..., -0.4282133 ,
        -0.4141648 , -0.3871291 ], dtype=float32),
 'sampling_rate': 16000}

In [155]:
augmented_speech_file_to_array_fn(test_dataset[24])['speech'].squeeze()

array([-0.05630261, -0.03208497, -0.05551476, ...,  0.00743804,
       -0.00030689, -0.08274259], dtype=float32)

In [156]:
test_dataset.map(augmented_speech_file_to_array_fn)

HBox(children=(FloatProgress(value=0.0, max=4717.0), HTML(value='')))




KeyboardInterrupt: 

In [40]:
set('ʻʽʼ‘’´`')

{'`', '´', 'ʻ', 'ʼ', 'ʽ', '‘', '’'}

In [52]:
''.join(sorted(set('\[\],?.!;:%*½¾¼+=$€£™&©·°"“”(){}‟ˮ˝ʺ″«»/…‽�–—¨@#¬_•®\\₋<>~˜√ð‚¸‐ª¯„−―|¤')))

'!"#$%&()*+,./:;<=>?@[\\]_{|}~£¤¨©ª«¬®¯°·¸»¼½¾ðʺ˜˝ˮ‐–—―‚“”„‟•…″‽₋€™−√�'