In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/auto-pian

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1jn_llb-3OnamAo89wMXsdDKi__iyjswV/auto-pian


In [2]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import os

In [3]:
directory_path = 'PianoFingeringDataset_v1.2/PianoFingeringDataset_v1.2/FingeringFiles/'

# Loads in ONE big CONCATENATED dataset from ALL dataframes
# my_dfs = []

# for filename in os.listdir(directory_path):
#     file_path = os.path.join(directory_path, filename)
#     if os.path.isfile(file_path):
#         df = pd.read_table(file_path, sep="\t", skiprows=1, names=["noteID", "onset_time", "offset_time", "spelled_pitch", "onset_velocity", "offset_velocity", "channel", "finger_number"])
#         my_dfs.append(df)
# #         print(df)

# big_df = pd.concat(my_dfs)
# print(big_df)

# load in just the fingering for 014-3: Mozart Piano Sonata K 330 in C major, 2nd mov.
for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)
    if os.path.isfile(file_path) and filename == os.listdir(directory_path)[0]:
        df = pd.read_table(file_path, sep="\t", skiprows=1, names=["noteID", "onset_time", "offset_time", "spelled_pitch", "onset_velocity", "offset_velocity", "channel", "finger_number"])
        print(filename)
        print(df.head(10))



117-1_fingering.txt
   noteID  onset_time  offset_time spelled_pitch  onset_velocity  \
0       0     1.50000      1.66650            A4              62   
1       1     1.50000      1.66650            A5              62   
2       2     1.66650      1.83349           Bb4              62   
3       3     1.66650      1.83349           Bb5              62   
4       4     1.83349      2.00000            A4              62   
5       5     1.83349      2.00000            A5              62   
6       6     2.00000      2.66650           G#5              62   
7       7     2.00000      3.49999           G#4              62   
8       8     2.00000      2.16650            D2              62   
9       9     2.16650      2.33349           Bb2              62   

   offset_velocity  channel  finger_number  
0               80        0              1  
1               80        0              5  
2               80        0              1  
3               80        0              4  
4     

In [4]:
df.isna().sum()

Unnamed: 0,0
noteID,0
onset_time,0
offset_time,0
spelled_pitch,0
onset_velocity,0
offset_velocity,0
channel,0
finger_number,0


# Idea 1: simple linear regression to predict finger_number

In [5]:
spelled_pitch_values = set()

for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)
    if os.path.isfile(file_path):
        df = pd.read_table(file_path, sep="\t", skiprows=1, names=["noteID", "onset_time", "offset_time", "spelled_pitch", "onset_velocity", "offset_velocity", "channel", "finger_number"])
        spelled_pitch_values.update(df['spelled_pitch'].unique())



# convert "spelled pitch" field to a number: create the mapping in the first place
spelled_pitch_values = sorted(spelled_pitch_values)

pitch_to_int_mapping = {p:i for i, p in enumerate(spelled_pitch_values)}

# print(len(spelled_pitch_values))
# print(len(pitch_to_int_mapping))
print(pitch_to_int_mapping)

{'A1': 0, 'A2': 1, 'A3': 2, 'A4': 3, 'A5': 4, 'A6': 5, 'B1': 6, 'B2': 7, 'B3': 8, 'B4': 9, 'B5': 10, 'B6': 11, 'Bb1': 12, 'Bb2': 13, 'Bb3': 14, 'Bb4': 15, 'Bb5': 16, 'Bb6': 17, 'C#1': 18, 'C#2': 19, 'C#3': 20, 'C#4': 21, 'C#5': 22, 'C#6': 23, 'C#7': 24, 'C1': 25, 'C2': 26, 'C3': 27, 'C4': 28, 'C5': 29, 'C6': 30, 'C7': 31, 'D1': 32, 'D2': 33, 'D3': 34, 'D4': 35, 'D5': 36, 'D6': 37, 'D7': 38, 'E1': 39, 'E2': 40, 'E3': 41, 'E4': 42, 'E5': 43, 'E6': 44, 'E7': 45, 'Eb1': 46, 'Eb2': 47, 'Eb3': 48, 'Eb4': 49, 'Eb5': 50, 'Eb6': 51, 'Eb7': 52, 'F#1': 53, 'F#2': 54, 'F#3': 55, 'F#4': 56, 'F#5': 57, 'F#6': 58, 'F#7': 59, 'F1': 60, 'F2': 61, 'F3': 62, 'F4': 63, 'F5': 64, 'F6': 65, 'F7': 66, 'G#1': 67, 'G#2': 68, 'G#3': 69, 'G#4': 70, 'G#5': 71, 'G#6': 72, 'G1': 73, 'G2': 74, 'G3': 75, 'G4': 76, 'G5': 77, 'G6': 78}


In [6]:
for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)
    if os.path.isfile(file_path) and filename == os.listdir(directory_path)[0]:
        df = pd.read_table(file_path, sep="\t", skiprows=1, names=["noteID", "onset_time", "offset_time", "spelled_pitch", "onset_velocity", "offset_velocity", "channel", "finger_number"])

num_data, num_features = df.shape
x = df.iloc[:, 0:num_features - 1]
y = df.iloc[:, num_features - 1]


# convert "spelled pitch" field to a number
x['spelled_pitch'] = x['spelled_pitch'].map(pitch_to_int_mapping)


x = torch.tensor(x.values.tolist(), dtype=torch.float32)
y = torch.tensor(y.values.astype(float).tolist())
y = y.unsqueeze(1)   # conver from size [289] to size [289, 1]

print(f"x's shape is {x.shape}")
print(f"y's shape is {y.shape}")

x's shape is torch.Size([293, 7])
y's shape is torch.Size([293, 1])


In [7]:
print(f"num features is {num_features} and num data is {num_data}")

num features is 8 and num data is 293


In [8]:
# Pytorch code that does the same least squares fitting, but nn.Module-ized. Iterative regression

class LinearRegressionBaseline(nn.Module):
    def __init__(self, input_dims, output_dims):
        super(LinearRegressionBaseline, self).__init__()
        self.linear = nn.Linear(input_dims, output_dims)

    def forward(self, x):
        return self.linear(x)

In [9]:
my_linear_model = LinearRegressionBaseline(num_features - 1, 1)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(my_linear_model.parameters(), lr=1e-2)    # Needs a VERY small learning rate, else NaN.


# normalize inputs before feeding to model
x = (x - x.mean()) / x.std()
y = (y - y.mean()) / y.std()


print(x.shape)
print(y.shape)
print(my_linear_model(x).shape)


torch.Size([293, 7])
torch.Size([293, 1])
torch.Size([293, 1])


In [10]:

epochs = 5000
for epoch in range(epochs):
    # Forward pass
    y_pred = my_linear_model(x)
    loss = criterion(y_pred, y)
#     print(x.shape)
#     print(y.shape)
#     print(y_pred)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 5000 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [5000/5000], Loss: 0.6820


In [11]:
# Check on testing set

x_test = x
y_test = y




# Ensure your model is in evaluation mode
my_linear_model.eval()

# Disable gradient computation for evaluation
with torch.no_grad():
    # Forward pass: Predict on the test set
    y_test_pred = my_linear_model(x_test)

    # Calculate the loss on the test set
    test_loss = criterion(y_test_pred, y_test)

    # Optional: Convert predictions to numpy for further analysis if needed
    y_test_pred_np = y_test_pred.cpu().numpy() if y_test_pred.is_cuda else y_test_pred.numpy()
    y_test_np = y_test.cpu().numpy() if y_test.is_cuda else y_test.numpy()

# Print the results
print("Test Results:")
print(f"Test Loss: {test_loss.item():.4f}")
print("Sample Predictions:")
for i in range(min(5, len(y_test_pred))):  # Display up to 5 predictions
    print(f"Predicted: {y_test_pred_np[i]}, Actual: {y_test_np[i]}")

Test Results:
Test Loss: 0.6820
Sample Predictions:
Predicted: [0.2264247], Actual: [0.17805079]
Predicted: [0.20039988], Actual: [1.4822726]
Predicted: [0.22577], Actual: [0.17805079]
Predicted: [0.19974422], Actual: [1.1562172]
Predicted: [0.22935295], Actual: [0.17805079]


In [12]:
# Compare with Scikit Normal Equation Result

import numpy as np
from sklearn.linear_model import LinearRegression
import torch

# Assuming x and y are your training tensors
x_np = x.numpy() if isinstance(x, torch.Tensor) else x
y_np = y.numpy() if isinstance(y, torch.Tensor) else y
y_np = y_np.reshape(-1)

print(x_np.shape)
print(y_np.shape)


# Validate with Scikit-Learn
lr = LinearRegression(fit_intercept=True)
lr.fit(x_np, y_np)

# Print Scikit-Learn weights for comparison
print("Weights (Scikit-Learn):", np.hstack([lr.intercept_, lr.coef_]))


x_test_np = x_np
y_test_np = y_np

# Predict on the test set
y_test_pred = lr.predict(x_test_np)

# Compute Mean Squared Error (MSE) loss
test_loss = np.mean((y_test_pred - y_test_np) ** 2)

# Print the loss
print("Test Loss (Scikit-Learn):", test_loss)    # About the same loss: 0.53!

(293, 7)
(293,)
Weights (Scikit-Learn): [ -87.728645      0.22731297   12.838359    -15.811432      0.13658929
    0.22497249    0.22756538 -104.933     ]
Test Loss (Scikit-Learn): 0.233975


# Idea 2: Add more layers

In [13]:
# deep neural net with more layers

class DeepNeuralNet(nn.Module):
    def __init__(self, input_dims, output_dims):
        super(DeepNeuralNet, self).__init__()
        self.mlp_layer = nn.Sequential(
            nn.Linear(input_dims, 4 * input_dims),
            nn.ReLU(),
            nn.Linear(4 * input_dims, output_dims),
        )


    def forward(self, x):
        return self.mlp_layer(x)

In [14]:
dnn_model = DeepNeuralNet(num_features - 1, 1)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(dnn_model.parameters(), lr=1e-2)    # Needs a VERY small learning rate, else NaN.


# normalize inputs before feeding to model
x = (x - x.mean()) / x.std()
y = (y - y.mean()) / y.std()


print(x.shape)
print(y.shape)
print(dnn_model(x).shape)


# Adding more layers helps a lot!

epochs = 1000
for epoch in range(epochs):
    # Forward pass
    y_pred = dnn_model(x)
    loss = criterion(y_pred, y)
#     print(x.shape)
#     print(y.shape)
#     print(y_pred)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 5000 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

torch.Size([293, 7])
torch.Size([293, 1])
torch.Size([293, 1])


# Testing code

Testing on some random other piece

In [15]:
for filename in os.listdir(directory_path):
    file_path = os.path.join(directory_path, filename)
    if os.path.isfile(file_path) and filename == os.listdir(directory_path)[1]:
        test_df = pd.read_table(file_path, sep="\t", skiprows=1, names=["noteID", "onset_time", "offset_time", "spelled_pitch", "onset_velocity", "offset_velocity", "channel", "finger_number"])
        print(filename)

x_test = test_df.iloc[:, 0:num_features - 1]
y_test = test_df.iloc[:, num_features - 1]

x_test['spelled_pitch'] = x_test['spelled_pitch'].map(pitch_to_int_mapping)


# print(x_test)

# NORMALIZE FIRST
# Normalize x_test
for col in x_test.columns:
    std = x_test[col].std()
    if std == 0:  # Check if all values are the same
        x_test[col] = 0  # Assign all entries in this column to 0
    else:
        x_test[col] = (x_test[col] - x_test[col].mean()) / std

# Normalize y_test
y_test_std = y_test.std()
if y_test_std == 0:  # Check if all values are the same
    y_test = 0  # Assign all entries in y_test to 0
else:
    y_test = (y_test - y_test.mean()) / y_test_std



x_test = torch.tensor(x_test.values.tolist(), dtype=torch.float32)
y_test = torch.tensor(y_test.values.astype(float).tolist())
y_test = y_test.unsqueeze(1)   # conver from size [289] to size [289, 1]


# print(y_test)


# print(x_test.isna().sum())
# print(y_test.shape)

002-5_fingering.txt


In [16]:

# Check on testing set

# x_test = x
# y_test = y

# print(x_test)
# print(y_test)


# Ensure your model is in evaluation mode
dnn_model.eval()

# Disable gradient computation for evaluation
with torch.no_grad():
    # Forward pass: Predict on the test set
    y_test_pred = dnn_model(x_test)

    # Calculate the loss on the test set
    test_loss = criterion(y_test_pred, y_test)

    # Optional: Convert predictions to numpy for further analysis if needed
    y_test_pred_np = y_test_pred.cpu().numpy() if y_test_pred.is_cuda else y_test_pred.numpy()
    y_test_np = y_test.cpu().numpy() if y_test.is_cuda else y_test.numpy()

# Print the results
print("Test Results:")
print(f"Test Loss: {test_loss.item():.4f}")
print("Sample Predictions:")
for i in range(min(5, len(y_test_pred))):  # Display up to 5 predictions
    print(f"Predicted: {y_test_pred_np[i]}, Actual: {y_test_np[i]}")

Test Results:
Test Loss: 4124.6953
Sample Predictions:
Predicted: [-24.858881], Actual: [0.3052198]
Predicted: [-25.188915], Actual: [0.64844805]
Predicted: [-23.112537], Actual: [0.3052198]
Predicted: [-24.228292], Actual: [0.99167633]
Predicted: [-21.366205], Actual: [0.3052198]


# Testing if Chinese reward model works

In [38]:
# ASK WILLY HOW TO WRITE THE PREDICTION CODE TO PRINT OUT THE FINGERINGS

from reward import PianoFingeringModel, Hand, Finger, Note, NoteFingerPair # see reward.py file
reward_model = PianoFingeringModel()

# WHY THE FUCK IS THERE A 10TH FINGER

def convert_finger_systems(finger_num):
    """
    Convert between:
    Original: 0-9 (0-4 right hand, 5-9 left hand)
    Reward model: 1-5 for each hand, with hand direction indicated separately
    """
    # Extract hand and normalize finger number
    if finger_num >= 5:  # Left hand
        hand = Hand.LEFT
        finger = Finger(finger_num - 4)  # Convert 5-9 to 1-5
    else:  # Right hand
        hand = Hand.RIGHT
        finger = Finger(finger_num + 1)  # Convert 0-4 to 1-5

    return finger, hand

def compare_fingering_sequences(pred_sequence, true_sequence, spelled_pitches):
    def convert_finger_number(f):
        # Convert from [0,4],[6,10] to [-5,-1],[1,5]
        if f >= 6:  # Right hand
            return f - 5
        return -(5 - f)  # Left hand

    pred_pairs = []
    true_pairs = []

    for i in range(len(pred_sequence)):
        note = Note(pitch=spelled_pitches[i], position=i)
        pred_finger = convert_finger_number(pred_sequence[i])
        true_finger = convert_finger_number(true_sequence[i])
        pred_pairs.append(NoteFingerPair(note=note, finger=Finger(abs(pred_finger))))
        true_pairs.append(NoteFingerPair(note=note, finger=Finger(abs(true_finger))))

    pred_rewards = []
    true_rewards = []

    for i in range(1, len(pred_pairs)):
        hand = Hand.RIGHT if spelled_pitches[i] > spelled_pitches[i-1] else Hand.LEFT

        pred_reward = reward_model.calculate_reward([pred_pairs[i-1]], [pred_pairs[i]], hand)
        true_reward = reward_model.calculate_reward([true_pairs[i-1]], [true_pairs[i]], hand)

        pred_rewards.append(pred_reward)
        true_rewards.append(true_reward)

    return {
        'pred_total': sum(pred_rewards),
        'pred_avg': sum(pred_rewards)/len(pred_rewards),
        'true_total': sum(true_rewards),
        'true_avg': sum(true_rewards)/len(true_rewards)
    }

spelled_pitches = df['spelled_pitch'].map(pitch_to_int_mapping).tolist()
metrics = compare_fingering_sequences(predicted_fingering, labels, spelled_pitches)
print("Predicted fingering rewards:", metrics['pred_total'], metrics['pred_avg'])
print("Ground truth fingering rewards:", metrics['true_total'], metrics['true_avg'])

ValueError: 4 is not in list