<h3 style="color:rgb(0,120,170)">RCE Sample:</h3>

The dataset consists of images of musical notes labeled as Eight, Half, Quarter, Sixteenth, and Whole. The code carries out the following steps:

Import necessary libraries: The script imports required libraries such as os, numpy, pandas, matplotlib, PIL (Python Imaging Library), torch, and scikit-learn.

Load the dataset and create labels: The code defines a dictionary for label encoding and decoding

Split the dataset: The dataset is split into training and testing sets using scikit-learn's train_test_split function.

Load and preprocess images: The images are loaded using the PIL library, resized, and normalized to have values between -1 and 1.

Define RCE functions: Two functions, euclidean_distance and rce, are defined to compute the Euclidean distance between two points and calculate the RCE value

Further split the dataset: The preprocessed images and their corresponding labels are split again into training and testing sets.

Set the theta parameter: The theta parameter is set to a value of 0.1. This can be adjusted based on the dataset.

Train the RCE classifier: The RCE values for each image in the training set are computed and stored in a list.

Classify the images in the testing set: The RCE values for each image in the testing set are computed and compared with the stored RCE values from the training set. 



In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import torch
from sklearn.model_selection import train_test_split

# Load dataset and create labels
label_code = {'Eight': 0, 'Half': 1, 'Quarter': 2, 'Sixteenth': 3, 'Whole': 4}
label_decode = ['Eight', 'Half', 'Quarter', 'Sixteenth', 'Whole']
df = pd.DataFrame(columns=['path', 'label'])

for dirname, _, filenames in os.walk('filesDir'):
    for filename in filenames:
        path = os.path.join(dirname, filename)
        name = dirname.split('\\')[-1]  # Change the separator to '\\'
        label = label_code[name]
        new_row = pd.DataFrame({'path': [path], 'label': [label]})
        df = pd.concat([df, new_row], ignore_index=True)

df.head()

# Split the dataset into training and testing sets
train, test = train_test_split(df, test_size=0.2, random_state=77)

# Load and preprocess images
image_size = (100, 100)
images = []
labels = []

for index, row in df.iterrows():
    img = Image.open(row['path']).convert('RGB').resize(image_size)
    img = np.array(img, dtype='float32')
    img = 1 - img / 127.5
    images.append(img)
    labels.append(row['label'])

images = np.array(images)

# RCE functions
def euclidean_distance(a, b):
    return torch.sqrt(torch.sum((a - b) ** 2))

def rce(a, b, theta):
    distance = euclidean_distance(a, b)
    return (1 / distance) * torch.exp(-distance / theta)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Move data to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
X_train = torch.tensor(X_train, device=device)
X_test = torch.tensor(X_test, device=device)

theta = 0.1  # You can adjust this parameter based on your dataset
RCE_values = []

# Train the RCE classifier
for i in range(len(X_train)):
    rce_val = rce(X_train[i], X_train, theta)
    RCE_values.append(rce_val)

RCE_values = torch.stack(RCE_values)

# Classify the images in the testing set
predictions = []

for i in range(len(X_test)):
    rce_val = rce(X_test[i], X_train, theta)
    closest_class = torch.argmin(torch.abs(RCE_values - rce_val))
    predictions.append(y_train[closest_class])

# Calculate the classification accuracy
accuracy = np.sum(np.array(predictions) == np.array(y_test)) / len(y_test)
print("Classification accuracy:", accuracy)


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
from PIL import Image

# Load and preprocess new unlabeled images
unlabeled_images_path = 'filesDir'
unlabeled_image_size = (100, 100)
unlabeled_images = []

for filename in os.listdir(unlabeled_images_path):
    if filename.endswith('.bmp'):
        path = os.path.join(unlabeled_images_path, filename)
        img = Image.open(path).convert('RGB').resize(unlabeled_image_size)
        img = np.array(img, dtype='float32')
        img = 1 - img / 127.5
        unlabeled_images.append(img)

unlabeled_images = np.array(unlabeled_images)


# Move unlabeled images to GPU if available
unlabeled_images = torch.tensor(unlabeled_images, device=device)

# Classify the unlabeled images
unlabeled_predictions = []

for i in range(len(unlabeled_images)):
    rce_val = rce(unlabeled_images[i], X_train, theta)
    closest_class = torch.argmin(torch.abs(RCE_values - rce_val))
    unlabeled_predictions.append(y_train[closest_class])

# # Print the predicted labels for the new images
# for i, prediction in enumerate(unlabeled_predictions):
#     print(f"Image {i+1}: {label_decode[prediction]}")


ground_truth_labels = []  # Replace this with the actual labels for the new images

if ground_truth_labels:
    accuracy = np.sum(np.array(unlabeled_predictions) == np.array(ground_truth_labels)) / len(ground_truth_labels)
    print("Classification accuracy for unlabeled images:", accuracy)


In [1]:
import re
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from vars import filename, f_reprize_end, f_msimbol_39_end, out_path, f_8va15ma, f_Accent, f_Pedal8va

# Read the file content
with open(f_Pedal8va, "r") as f:
    content = f.read()
    
#doesnt recognize 
#Name of ID...Unknown_zUnknown ID 457Score  0 9.bmp

# Extract the weights and class IDs
weights_and_classes = re.findall(r"ClassID\s+=\s+(\d+)\s+:.*?:\s+Weights\s+=\s+([\d\.,\s]+)", content)
classes, weights = zip(*weights_and_classes)
classes = list(map(int, classes))
weights = [list(map(float, w.strip().split(", "))) for w in weights]

# Convert to a numpy array
weights = np.array(weights)

# Create a DataFrame with the weights
df_weights = pd.DataFrame(weights)
df_weights['Class'] = classes

# Plot a separate heatmap for each class
for class_id in sorted(set(classes)):
    class_weights = df_weights[df_weights['Class'] == class_id].drop('Class', axis=1)
    #plt.figure(figsize=(15, 100))
    plt.figure(figsize=(15, 15))
    sns.set()
    ax = sns.heatmap(class_weights, annot=True, fmt=".2f", cmap="coolwarm")
    #ax = sns.heatmap(class_weights.head(100), annot=True, fmt=".2f", cmap="coolwarm")
    
    output_path = f'{out_path}f_Pedal8va_{class_id}.png'
    ax.get_figure().savefig(output_path, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Image for class {class_id} saved to {output_path}")



Image for class 0 saved to C:\Users\azatv\JupyterProjects\RCE\first_test\Outside stave\f_Pedal8va_0.png
Image for class 1 saved to C:\Users\azatv\JupyterProjects\RCE\first_test\Outside stave\f_Pedal8va_1.png


In [3]:
import os
import re
from collections import defaultdict
from vars import input_directory, output_file_path

# Count the samples for each class
def count_samples(file_path):
    with open(file_path, "r") as file:
        data = file.read()
        class_counts = defaultdict(int)
        midlayer_pattern = re.compile(r"ClassID = (\d+) : Ramda")
        matches = midlayer_pattern.findall(data)
        for class_id in matches:
            class_counts[class_id] += 1
    return class_counts

# Read dat files from the input directory
dat_files = [f for f in os.listdir(input_directory) if f.endswith(".dat")]

# Write the output to a file
with open(output_file_path, "w") as output_file:
    for dat_file in dat_files:
        file_path = os.path.join(input_directory, dat_file)
        counts = count_samples(file_path)
        output_file.write(f"File: {dat_file}\n")
        for class_id, count in counts.items():
            output_file.write(f"Class {class_id}: {count} samples\n")
        output_file.write("\n")
