In [0]:
import numpy as np
import torch
torch.set_printoptions(edgeitems=2, threshold=50)

In [0]:
import scipy.io.wavfile as wavfile

(44100, array([ -388, -3387, -4634, ...,  2289,  1327,    90], dtype=int16))

In [0]:
freq, waveform_arr = wavfile.read('https://raw.githubusercontent.com/deep-learning-with-pytorch/dlwpt-code/master/data/p1ch4/audio-chirp/1-100038-A-14.wav')
freq, waveform_arr

In [0]:
waveform = torch.from_numpy(waveform_arr).float()
waveform.shape



torch.Size([220500])

In [0]:
from scipy import signal

f_arr, t_arr, sp_arr = signal.spectrogram(waveform_arr, freq)

sp_mono = torch.from_numpy(sp_arr)
sp_mono.shape

torch.Size([129, 984])

In [0]:
sp_left = sp_right = sp_arr
sp_left_t = torch.from_numpy(sp_left)
sp_right_t = torch.from_numpy(sp_right)
sp_left_t.shape, sp_right_t.shape

(torch.Size([129, 984]), torch.Size([129, 984]))

In [0]:
sp_t = torch.stack((sp_left_t, sp_right_t), dim=0)
sp_t.shape

torch.Size([2, 129, 984])

Jane Austen

In [0]:
import numpy as np
import torch
torch.set_printoptions(edgeitems = 2, threshold=50)

In [0]:
with open('https://raw.githubusercontent.com/microsoft/SynthaCorpus/master/ProjectGutenberg/1342-0.txt', encoding='utf8') as f:
    text = f.read()

In [0]:
lines = text.split('\n')
line = lines[200]
line


'“Impossible, Mr. Bennet, impossible, when I am not acquainted with him'

In [0]:
letter_t = torch.zeros(len(line),128)
letter_t.shape

torch.Size([70, 128])

In [0]:
for i,letter in enumerate(line.lower().strip()):
    letter_index = ord(letter) if ord(letter) < 128 else 0
    letter_t[i][letter_index] = 1

In [0]:
def clean_words(input_str):
    punctuation = '.,;:"!?”“_-'
    word_list = input_str.lower().replace('\n','').split()
    word_list = [word.strip(punctuation) for word in word_list]
    return word_list

In [0]:

words_in_line = clean_words(line)
line, words_in_line


('“Impossible, Mr. Bennet, impossible, when I am not acquainted with him',
 ['impossible',
  'mr',
  'bennet',
  'impossible',
  'when',
  'i',
  'am',
  'not',
  'acquainted',
  'with',
  'him'])

In [0]:
word_list = sorted(set(clean_words(text)))
word2index_dict = {word: i for (i, word) in enumerate(word_list)}

In [0]:
len(word2index_dict), word2index_dict['impossible']

(15514, 6925)

In [0]:

word_t = torch.zeros(len(words_in_line), len(word2index_dict))
for i, word in enumerate(words_in_line):
    word_index = word2index_dict[word]
    word_t[i][word_index] = 1
    print('{:2} {:4} {}'.format(i, word_index, word))

 0 6925 impossible
 1 8832 mr
 2 1906 bennet
 3 6925 impossible
 4 14844 when
 5 6769 i
 6  714 am
 7 9198 not
 8  312 acquainted
 9 15085 with
10 6387 him


In [0]:
    
print(word_t.shape)

torch.Size([11, 15514])


In [0]:

word_t = word_t.unsqueeze(1)
word_t.shape


torch.Size([11, 1, 15514])

In [0]:
[(c, ord(c)) for c in sorted(set(text))]

[('\n', 10),
 (' ', 32),
 ('!', 33),
 ('#', 35),
 ('$', 36),
 ('%', 37),
 ("'", 39),
 ('(', 40),
 (')', 41),
 ('*', 42),
 (',', 44),
 ('-', 45),
 ('.', 46),
 ('/', 47),
 ('0', 48),
 ('1', 49),
 ('2', 50),
 ('3', 51),
 ('4', 52),
 ('5', 53),
 ('6', 54),
 ('7', 55),
 ('8', 56),
 ('9', 57),
 (':', 58),
 (';', 59),
 ('?', 63),
 ('@', 64),
 ('A', 65),
 ('B', 66),
 ('C', 67),
 ('D', 68),
 ('E', 69),
 ('F', 70),
 ('G', 71),
 ('H', 72),
 ('I', 73),
 ('J', 74),
 ('K', 75),
 ('L', 76),
 ('M', 77),
 ('N', 78),
 ('O', 79),
 ('P', 80),
 ('Q', 81),
 ('R', 82),
 ('S', 83),
 ('T', 84),
 ('U', 85),
 ('V', 86),
 ('W', 87),
 ('X', 88),
 ('Y', 89),
 ('Z', 90),
 ('[', 91),
 (']', 93),
 ('_', 95),
 ('a', 97),
 ('b', 98),
 ('c', 99),
 ('d', 100),
 ('e', 101),
 ('f', 102),
 ('g', 103),
 ('h', 104),
 ('i', 105),
 ('j', 106),
 ('k', 107),
 ('l', 108),
 ('m', 109),
 ('n', 110),
 ('o', 111),
 ('p', 112),
 ('q', 113),
 ('r', 114),
 ('s', 115),
 ('t', 116),
 ('u', 117),
 ('v', 118),
 ('w', 119),
 ('x', 120),
 ('y',