# Model Use: *GRU*

# 1. Environement Setup

In [72]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense
from sklearn.model_selection import train_test_split

file_path = 'normalized_output.csv'

# 2. Data Processing

In [35]:
# Load the data
data = pd.read_csv(file_path)

# Function to convert string representation of a list to an actual list
def convert_string_to_list(string):
    return np.fromstring(string[1:-1], sep=',') 

X = np.array([convert_string_to_list(row) for row in data['ph_seq_encoded']])
y_note = np.array([convert_string_to_list(row) for row in data['note_seq_encoded']])
y_dur = np.array([convert_string_to_list(row) for row in data['ph_dur']])

# Reshape the arrays
X_reshaped = X.reshape((X.shape[0], X.shape[1], 1))
y_note_reshaped = y_note.reshape((y_note.shape[0], y_note.shape[1], 1))
y_dur_reshaped = y_dur.reshape((y_dur.shape[0], y_dur.shape[1], 1))

# print(X_reshaped[0].dtype, y_note_reshaped[0].dtype, y_dur_reshaped[0].dtype)

# Split the data into training and test sets
X_train, X_test, y_note_train, y_note_test, y_dur_train, y_dur_test = train_test_split(
    X_reshaped, y_note_reshaped, y_dur_reshaped, test_size=0.3, random_state=42)

y_dur_train.shape

(2815, 283, 1)

# 3. Model Training

In [88]:
# Input layer
input_layer = Input(shape=(X_train.shape[1], X_train.shape[2]))

# GRU layers
gru1 = GRU(64, return_sequences=True)(input_layer)
gru2 = GRU(32, return_sequences=True)(gru1)

# Output layers for y_note
y_note_output = Dense(10, activation='relu')(gru2)
y_note_output = Dense(1, activation='sigmoid', name='y_note_output')(y_note_output)

# Output layers for y_dur
y_dur_output = Dense(10, activation='relu')(gru2)
y_dur_output = Dense(1, activation='sigmoid', name='y_dur_output')(y_dur_output)

# Building the model
model = Model(inputs=input_layer, outputs=[y_note_output, y_dur_output])

# Compiling the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Training the model
history = model.fit(X_train, [y_note_train, y_dur_train], epochs=200, batch_size=64, validation_split=0.2)

# Evaluate the model on the test set
test_loss = model.evaluate(X_test, [y_note_test, y_dur_test])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [89]:
# Save the model
model.save('./models/model_rnn_gru.h5')

  saving_api.save_model(


# 4. Validation
Put user input and get output

### 4.1 Input

In [90]:
import json
import joblib
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the encoding dictionary from the ph_token_to_int.json file
with open('ph_token_to_int.json', 'r') as file:
    ph_token_to_int = json.load(file)

# Input sequence
input_sequence = "AP n ei f a g e n a j i f u y u a p u AP n ei f a g e n a l e y u d ao en AP"

# Split the input sequence into individual phonemes
input_phonemes = input_sequence.split()

# Convert the phonemes to their corresponding integers using the encoding dictionary
encoded_sequence = [ph_token_to_int[phoneme] for phoneme in input_phonemes]

# Convert the sequence to a numpy array and pad it to the right length
ph_seq_encoded = np.array([encoded_sequence]) 

user_input = pad_sequences(ph_seq_encoded, maxlen=283, padding='post', truncating='post', dtype='float64')


### 4.2 Inference

In [91]:
# Prediction
y_note_pred, y_dur_pred = model.predict(user_input)
print(y_note_pred.shape, y_note_pred.shape)

(1, 283, 1) (1, 283, 1)


### 4.3 Decoding

In [92]:
# Resize the result
input_length = len(encoded_sequence)
y_note_pred = y_note_pred[:, :input_length, :]
y_dur_pred = y_dur_pred[:, :input_length, :]

# print(y_note_pred.shape, y_dur_pred.shape)

# Load the saved scaler
note_scaler = joblib.load('note_scaler.pkl')

y_note_pred_flattened = y_note_pred.flatten()

# Inverse transform the predictions
y_note_pred_original = note_scaler.inverse_transform(y_note_pred_flattened.reshape(-1, 1))

# Reshape back to the original shape if required
y_note_pred_original = y_note_pred_original.reshape(y_note_pred.shape)
y_note_pred_original = np.round(y_note_pred_original).astype(int)

f0_timestep = 0.005
f0_seq_constant = 250.1

total_time = np.sum(y_dur_pred)
print(total_time)
f0_size = int(total_time / f0_timestep)
# print(f0_size)

f0_seq = [f0_seq_constant] * f0_size


13.599577


### 4.5 Make it .ds file

In [93]:
y_note_pred_flattened = y_note_pred_original[0]
y_note_pred_output = [int(i) for i in y_note_pred_flattened]

y_dur_pred_flattened = y_dur_pred[0]
y_dur_pred_output = [float(i) for i in y_dur_pred_flattened]

# Load the token-to-int mappings from the JSON files
def load_mapping(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        token_to_int = json.load(f)
    # Invert the dictionary to create an int-to-token mapping
    int_to_token = {v: k for k, v in token_to_int.items()}
    return int_to_token

# Decoding function using the mappings
def decode_predictions(prediction_integers, mapping):
    return [mapping.get(i, 'Unknown') for i in prediction_integers]

note_int_to_token = load_mapping('note_token_to_int.json')

decoded_note_seq = decode_predictions(y_note_pred_output, note_int_to_token)

# Print or return the decoded sequences
# print("Decoded Phonetic Sequence:", input_phonemes)
# print("Decoded Note Sequence:", decoded_note_seq)

ph_dur = ' '.join(map(str, y_dur_pred_output))
note_seq = ' '.join(map(str, decoded_note_seq))
f0_seq = ' '.join(map(str, f0_seq))

file = {
    'ph_seq': input_sequence,
    'ph_dur': ph_dur,
    'note_seq': note_seq,
    'f0_seq': f0_seq,
    'f0_timestep': f0_timestep
}

with open('rnn_gru_output.ds', 'w') as json_file:
    json.dump(file, json_file, indent=4)

  y_note_pred_output = [int(i) for i in y_note_pred_flattened]
  y_dur_pred_output = [float(i) for i in y_dur_pred_flattened]
