### TRANSLATOR ###

In [None]:
#importing packages
import matplotlib.pyplot as plt
from matplotlib.backend_bases import RendererBase
from scipy import signal
from scipy.io import wavfile
import os
import numpy as np
from PIL import Image
from scipy.fftpack import fft
%matplotlib inline
from pydub import AudioSegment
import librosa
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.preprocessing import image
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import PIL

In [None]:
#reading files containing audio and the path where the image has to be stored 
audio_path = '..input/kan_audio_2'
pict_Path = '..input/picturedata'

# loading audio 

In [None]:
subFolderList = []
for x in os.listdir(audio_path):
    if os.path.isdir(audio_path + '/' + x):
        subFolderList.append(x)

In [None]:
if not os.path.exists(pict_Path):
    os.makedirs(pict_Path)
subFolderList = []
for x in os.listdir(audio_path):
    if os.path.isdir(audio_path + '/' + x):
        subFolderList.append(x)
        if not os.path.exists(pict_Path + '/' + x):
            os.makedirs(pict_Path +'/'+ x)

In [None]:
sample_audio = []
for i in subFolderList:
    files=librosa.util.find_files(audio_path+"/"+i)
    files=np.asarray(files)
    for j in files:
        data=librosa.util.find_files(j)
        sample_audio.append(j)

# converting audio to spectrogram

In [None]:
def log_specgram(audio, sample_rate, window_size=20,
                 step_size=10, eps=1e-10):
    nperseg = int(round(window_size * sample_rate / 1e3))
    noverlap = int(round(step_size * sample_rate / 1e3))
    freqs, _, spec = signal.spectrogram(audio,
                                    fs=sample_rate,
                                    window='hann',
                                    nperseg=nperseg,
                                    noverlap=noverlap,
                                    detrend=False)
    return freqs, np.log(spec.T.astype(np.float32) + eps)

In [None]:
fig = plt.figure(figsize=(10,10))

# for each of the samples
for i, filepath in enumerate(sample_audio[:9]):
    # Make subplots
    plt.subplot(3,3,i+1)
    
    # pull the labels
    label = filepath.split('/')
    plt.title(label)
    
    # create spectogram
    samplerate, test_sound  = wavfile.read(filepath)
    _, spectrogram = log_specgram(test_sound, samplerate)
    
    plt.imshow(spectrogram.T, aspect='auto', origin='lower')
    plt.axis('off')

In [None]:
def wav2img(wav_path, targetdir='', figsize=(4,4)):
    fig = plt.figure(figsize=figsize)    
    # use soundfile library to read in the wave files
    samplerate, test_sound  = wavfile.read(filepath)
    _, spectrogram = log_specgram(test_sound, samplerate)
    
    ## create output path
    output_file = wav_path.split('/')[-1].split('.wav')[0]
    output_file = targetdir +'/'+ output_file
    #plt.imshow(spectrogram.T, aspect='auto', origin='lower')
    plt.imsave('%s.jpg' % output_file, spectrogram)
    plt.close()
    

In [None]:
# get all the spectro image in one folder
for i, x in enumerate(subFolderList[:10]):
    print(i, ':', x)
    all_files = [y for y in os.listdir(audio_path +'/'+ x) if '.wav' in y]
    for file in all_files[:30]:
        wav2img(audio_path + x + '/' + file, pict_Path +'/'+ x)

# create a .csv containing the images and name of labels. 

In [None]:
train=pd.read_csv("..input/image_class.csv")

In [None]:
train_image = []
for i in tqdm(range(train.shape[0])):
    img = image.load_img('..input/picturedata'+'/'+train['id'][i]+'.jpg')
    img = image.img_to_array(img)
    img = img/255
    train_image.append(img)

In [None]:
#splitting train & test
X = np.array(train_image)
y = np.array(train.drop(['id', 'label'],axis=1))
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [None]:
#defining conv2d model
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=(5, 5), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=32, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
#compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
#fitting model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=100)

# testing the model

In [None]:
voice_file='..input/test_audio_file'
image_file='..input/test_image'
wav2img(voice_file,image_file)
for i in os.listdir(image_file):
    img=Image.open(image_file+"/"+i)
    img = image.img_to_array(img)
    img = img/255

In [None]:
classes = np.array(train.columns[2:])
proba = model.predict(img.reshape(1,266,480,3))
top3 = np.argsort(proba[0])[:-4:-1]
for i in range(3):
    print("{}".format(classes[top_3[i]])+" ({:.3})".format(proba[0][top_3[i]]))

### TRANSLATOR ###

In [None]:
import numpy as np
import pandas as pd
from numpy import array
from numpy import argmax
import re
from unicodedata import normalize
import string
from pickle import load
from pickle import dump
from numpy.random import rand
from numpy.random import shuffle
import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.layers import Dense
from keras.utils import model_to_dot
from keras.utils.vis_utils import plot_model
from keras.utils import plot_model
from keras.callbacks import ModelCheckpoint
from keras .models import load_model
from nltk.translate.bleu_score import corpus_bleu

In [None]:
##defining functions##
#loading document
def load_doc(filename):
    file = open(filename, mode='rt')
    text = file.read()
    file.close()
    return text
# split a loaded document into sentences
def to_pairs(doc):
    lines = doc.strip().split('\n')
    pairs = [line.split('\t') for line in  lines]
    return pairs
#preprocessing data
def clean_pairs(lines):
    cleaned = list()
    re_print = re.compile('[^%s]' % re.escape(string.printable))
    table = str.maketrans('', '', string.punctuation)
    for pair in lines:
        clean_pair = list()
        for line in pair:
            line = normalize('NFD', line).encode('ascii', 'ignore')
            line = line.decode('UTF-8')
            line = line.split()
            line = [word.lower() for word in line]
            line = [word.translate(table) for word in line]
            line = [re_print.sub('', w) for w in line]
            line = [word for word in line if word.isalpha()]
            clean_pair.append(' '.join(line))
        cleaned.append(clean_pair)
    return array(cleaned)
def save_clean_data(sentences, filename):
    dump(sentences, open(filename, 'wb'))
    print('Saved: %s' % filename)
    
    
    
filename = "..input/kan_eng.txt"
doc = load_doc(filename)
pairs = to_pairs(doc)
clean_pairs = clean_pairs(pairs)
save_clean_data(clean_pairs,'kannada-english.pkl')

for i in range ():
    print('[%s] => [%s]' % (clean_pairs[i,0],clean_pairs[i,1]))

In [None]:
def load_clean_sentences(filename):
    return load(open(filename, 'rb'))
 def save_clean_data(sentences, filename):
    dump(sentences, open(filename, 'wb'))
    print('Saved: %s' % filename)
    
raw_dataset = load_clean_sentences('kannada-english.pkl')
n_sentences = 1000
dataset = raw_dataset[:n_sentences, :]
shuffle(dataset)
#splitting data set
train, test = dataset[:800], dataset[800:]
save_clean_data(dataset, 'kannada-english-both.pkl')
save_clean_data(train, 'kannada-english-train.pkl')
save_clean_data(test, 'kannada-english-test.pkl')

In [None]:
def load_clean_sentences(filename):
    return load(open(filename, 'rb'))
dataset = load_clean_sentences('kannada-english-both.pkl')
train = load_clean_sentences('kannada-english-train.pkl')
test = load_clean_sentences('kannada-english-test.pkl')

In [None]:
# fit a tokenizer
def create_tokenizer(lines):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer
def max_length(lines):
    return max(len(line.split()) for line in lines)

In [None]:
# prepare kannada tokenizer
kan_tokenizer = create_tokenizer(dataset[:, 1])
kan_vocab_size = len(kan_tokenizer.word_index) + 1
kan_length = max_length(dataset[:, 0])
print('Kannada Vocabulary Size: %d' % kan_vocab_size)
print('Kannada Max Length: %d' % (kan_length))

In [None]:
#prepare english tokenizer
eng_tokenizer = create_tokenizer(dataset[:, 0])
eng_vocab_size = len(eng_tokenizer.word_index) + 1
eng_length = max_length(dataset[:, 1])
print('English Vocabulary Size: %d' % eng_vocab_size)
print('English Max Length: %d' % (eng_length))

In [None]:
# encode and pad sequences
def encode_sequences(tokenizer, length, lines):
    X = tokenizer.texts_to_sequences(lines)
    X = pad_sequences(X, maxlen=length, padding='post')
    return X
# one hot encode target sequence
def encode_output(sequences, vocab_size):
    ylist = list()
    for sequence in sequences:
        encoded = to_categorical(sequence, num_classes=vocab_size)
        ylist.append(encoded)
    y = array(ylist)
    y = y.reshape(sequences.shape[0], sequences.shape[1], vocab_size)
    return y

In [None]:
trainX = encode_sequences(kan_tokenizer, kan_length, train[:, 1])
trainY = encode_sequences(eng_tokenizer, eng_length, train[:, 0])
trainY = encode_output(trainY, kan_vocab_size)
testX = encode_sequences(kan_tokenizer, kan_length, test[:, 1])
testY = encode_sequences(eng_tokenizer, eng_length, test[:, 0])
testY = encode_output(testY, kan_vocab_size)

In [None]:
#define model
def define_model(src_vocab, tar_vocab, src_timesteps, tar_timesteps, n_units):
    model = Sequential()
    model.add(Embedding(src_vocab, n_units, input_length=src_timesteps, mask_zero=True))
    model.add(LSTM(n_units))
    model.add(RepeatVector(tar_timesteps))
    model.add(LSTM(n_units, return_sequences=True))
    model.add(TimeDistributed(Dense(tar_vocab, activation='softmax')))
    return model
model = define_model(eng_vocab_size, kan_vocab_size, eng_length, kan_length, 256)
model.compile(optimizer='adam', loss='categorical_crossentropy')
print(model.summary())

In [None]:
# fit model
filename = '..input/kan_eng.txt'
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
model.fit(trainX, trainY, epochs=30, batch_size=500, validation_data=(testX, testY), callbacks=[checkpoint], verbose=2)

In [None]:
# map an integer to a word
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

In [None]:
#generate target given source sequence
def predict_sequence(model, tokenizer, source):
    prediction = model.predict(source, verbose=0)[0]
    integers = [argmax(vector) for vector in prediction]
    target = list()
    for i in integers:
        word = word_for_id(i, tokenizer)
        if word is None:
            break
        target.append(word)
    return ' '.join(target)

In [None]:
def evaluate_model(model, tokenizer, sources, raw_dataset):
    actual, predicted = list(), list()
    for i, source in enumerate(sources):
        source = source.reshape((1, source.shape[0]))
        translation = predict_sequence(model, kan_tokenizer, source)
        raw_target, raw_src = raw_dataset[i]
        if i < 10:
            print('src=[%s], target=[%s], predicted=[%s]' % (raw_src, raw_target, translation))
        actual.append([raw_target.split()])
     #calculate BLEU score
    predicted.append(translation.split())
    print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
    print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
    print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
    print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

#testing the model

In [None]:
ataset = load_clean_sentences('kannada-english-both.pkl')
train = load_clean_sentences('kannada-english-train.pkl')
test = load_clean_sentences('kannada-english-test.pkl')
eng_tokenizer = create_tokenizer(dataset[:, 0])
eng_vocab_size = len(eng_tokenizer.word_index) + 1
eng_length = max_length(dataset[:, 0])
kan_tokenizer = create_tokenizer(dataset[:, 1])
kan_vocab_size = len(kan_tokenizer.word_index) + 1
kan_length = max_length(dataset[:, 1])
trainX = encode_sequences(kan_tokenizer, eng_length, train[:, 1])
testX = encode_sequences(kan_tokenizer, eng_length, test[:, 1])

In [None]:
model = load_model('..input/kan_eng.txt')
print('train')
evaluate_model(model, eng_tokenizer, trainX, train)
print('test')
evaluate_model(model, eng_tokenizer, testX, test)