In [1]:
import os
from music21 import converter, note, chord
import pandas as pd

def calculate_descriptive_stats(folder_paths):
    data = {'Composer': [], 'Mean Note Duration': [], 'Tempo': [], 'Key Signatures': [], 'Chord Complexity': [], 'Pitch Range': []}

    for folder_path in folder_paths:
        composer = os.path.basename(folder_path)

        # Initialize variables for each composer
        durations = []
        tempos = []
        key_signatures = set()
        chord_complexities = []
        pitches = []

        for file_name in os.listdir(folder_path):
            if file_name.endswith(".mid"):
                file_path = os.path.join(folder_path, file_name)
                midi = converter.parse(file_path)

                # Calculate mean note duration
                for note_or_chord in midi.flatten().notesAndRests:
                    if isinstance(note_or_chord, note.Note):
                        durations.append(note_or_chord.duration.quarterLength)
                    elif isinstance(note_or_chord, chord.Chord):
                        avg_duration = sum(n.duration.quarterLength for n in note_or_chord) / len(note_or_chord)
                        durations.append(avg_duration)

                # Get time signature to calculate tempo
                time_signature = midi.flatten().getElementsByClass('TimeSignature')[0]
                beats_per_measure = time_signature.beatCount

                # Check if denominator is an integer or not
                if isinstance(time_signature.denominator, int):
                    beat_duration_quarterLength = 4.0 / time_signature.denominator
                else:
                    beat_duration_quarterLength = 4.0 / time_signature.denominator.type

                tempo = 60 * beats_per_measure / beat_duration_quarterLength

                tempos.append(tempo)

                # Get key signatures
                key_signatures.update([str(k) for k in midi.flatten().getElementsByClass('KeySignature')])

                # Calculate chord complexity (average number of notes per chord)
                chord_symbols = midi.chordify().flatten().getElementsByClass('ChordSymbol')
                if chord_symbols:
                    chord_complexities.append(len(midi.chordify().flatten().getElementsByClass(chord.Chord)) / len(chord_symbols))
                else:
                    chord_complexities.append(0)  # Handle the case where there are no chord symbols

                # Calculate pitch range
                all_pitches = [p.midi for p in midi.flatten().pitches]
                pitch_range = max(all_pitches) - min(all_pitches)
                pitches.append(pitch_range)

        # Calculate mean values for each composer
        mean_duration = sum(durations) / len(durations)
        mean_tempo = sum(tempos) / len(tempos)
        mean_chord_complexity = sum(chord_complexities) / len(chord_complexities) if chord_complexities else 0
        mean_pitch_range = sum(pitches) / len(pitches)

        data['Composer'].append(composer)
        data['Mean Note Duration'].append(mean_duration)
        data['Tempo'].append(mean_tempo)
        data['Key Signatures'].append(", ".join(key_signatures))
        data['Chord Complexity'].append(mean_chord_complexity)
        data['Pitch Range'].append(mean_pitch_range)

    df = pd.DataFrame(data)
    return df

folder_paths = [
    "C:\\Users\\MEEVAL ANNA SAJI\\Desktop\\DATASET\\chopin",
    "C:\\Users\\MEEVAL ANNA SAJI\\Desktop\\DATASET\\beeth",
    "C:\\Users\\MEEVAL ANNA SAJI\\Desktop\\DATASET\\haydn",
    "C:\\Users\\MEEVAL ANNA SAJI\\Desktop\\DATASET\\liszt"
]

composer_stats_df = calculate_descriptive_stats(folder_paths)
print(composer_stats_df)


ModuleNotFoundError: No module named 'music21'

In [None]:
pip install pandas

In [None]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Check for Missing Values
print("Missing Values:")
print(composer_stats_df.isnull().sum())

# Identify categorical columns
categorical_cols = [col for col in composer_stats_df.columns if composer_stats_df[col].dtype == 'object']

# Drop non-numeric columns
numeric_data = composer_stats_df.drop(columns=categorical_cols)

# Handle missing values for numeric data
imputer = SimpleImputer(strategy='mean')
composer_stats_df_imputed = pd.DataFrame(imputer.fit_transform(numeric_data), columns=numeric_data.columns)

# Encode categorical columns
encoder = OneHotEncoder()
transformers = [('encoder', encoder, categorical_cols)]
column_transformer = ColumnTransformer(transformers, remainder='passthrough')
composer_stats_df_encoded = pd.DataFrame(column_transformer.fit_transform(composer_stats_df), columns=column_transformer.get_feature_names_out())

# Standardize the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(composer_stats_df_encoded)

# Calculate centralized mean for each feature
centralized_means = {}
for feature in composer_stats_df_encoded.columns:
    mean_value = composer_stats_df_encoded[feature].mean()
    centralized_means[feature] = mean_value

# Centralize the data by subtracting the mean from each feature value
centralized_data = composer_stats_df_encoded.copy()
for feature, mean_value in centralized_means.items():
    centralized_data[feature] -= mean_value

# Calculate cosine similarity matrix between composers
cosine_sim_matrix = cosine_similarity(centralized_data)

# Convert the cosine similarity matrix to a DataFrame for better visualization
composers = composer_stats_df['Composer'].tolist()
cosine_sim_df = pd.DataFrame(cosine_sim_matrix, index=composers, columns=composers)

print("\nCentralized Cosine Similarity Matrix:")
print(cosine_sim_df)

ModuleNotFoundError: No module named 'pandas'

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

cosine_similarity_matrix = [
    [1.000000, 0.955355, -0.599510, -0.416699 ],
    [0.955355 ,1.000000 ,-0.694818, -0.305791 ],
    [-0.599510 ,-0.694818 , 1.000000, -0.464440 ],
    [-0.416699, -0.305791, -0.464440,  1.000000 ]
]

# Define the row and column labels
composers = ['Chopin','Beethoven', 'Haydn', 'Liszt']

# Create a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(cosine_similarity_matrix, annot=True, cmap='coolwarm', xticklabels=composers, yticklabels=composers)
plt.title('Centralized Cosine Similarity Matrix')
plt.xlabel('Composers')
plt.ylabel('Composers')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.show()

: 

In [None]:
import os
from shutil import copyfile

# Define your musician names and similarity matrix as before
musician_names = ['chopin',  'beeth', 'haydn', 'liszt']
cosine_similarity_matrix = [
    [1.000000, 0.955355, -0.599510, -0.416699 ],
    [0.955355 ,1.000000 ,-0.694818, -0.305791 ],
    [-0.599510 ,-0.694818 , 1.000000, -0.464440 ],
    [-0.416699, -0.305791, -0.464440,  1.000000 ]
]

# Define a similarity threshold
similarity_threshold = 0.7

# Initialize a dictionary to store grouped MIDI datasets
grouped_datasets = {}

# Iterate through the similarity matrix to group similar datasets
for i in range(len(cosine_similarity_matrix)):
    for j in range(i + 1, len(cosine_similarity_matrix[i])):
        if cosine_similarity_matrix[i][j] >= similarity_threshold:
            # Add datasets to the same group
            if i not in grouped_datasets:
                grouped_datasets[i] = [i]
            grouped_datasets[i].append(j)

# Create combined folder and copy MIDI files from corresponding musician folders
output_folder = 'C:\\Users\\MEEVAL ANNA SAJI\\Desktop\\DATASET\\combined'
os.makedirs(output_folder, exist_ok=True)

# Copy MIDI files to the combined folder based on grouped indices
for group_indices in grouped_datasets.values():
    group_names = [musician_names[i] for i in group_indices]
    print("Group Indices:", group_indices)
    print("Group Names:", group_names)
    for musician_name in group_names:
        musician_folder = f'C:\\Users\\MEEVAL ANNA SAJI\\Desktop\\DATASET\\{musician_name}'
        for file_name in os.listdir(musician_folder):
            if file_name.endswith(".mid"):
                source_file = os.path.join(musician_folder, file_name)
                destination_file = os.path.join(output_folder, file_name)
                copyfile(source_file, destination_file)

# Print the name pairs of combined musicians
for group_indices in grouped_datasets.values():
    group_names = [musician_names[i] for i in group_indices]
    print("Combined musicians:", group_names)


: 

In [None]:
import os
from shutil import copyfile

# Define your musician names and similarity matrix as before
musician_names = ['chopin', 'beeth', 'haydn', 'liszt']
cosine_similarity_matrix = [
    [1.000000, 0.955355, -0.599510, -0.416699],
    [0.955355, 1.000000, -0.694818, -0.305791],
    [-0.599510, -0.694818, 1.000000, -0.464440],
    [-0.416699, -0.305791, -0.464440, 1.000000]
]

# Define a similarity threshold
similarity_threshold = 0.7

# Initialize a dictionary to store grouped MIDI datasets
grouped_datasets = {}

# Iterate through the similarity matrix to group similar datasets
for i in range(len(cosine_similarity_matrix)):
    for j in range(i + 1, len(cosine_similarity_matrix[i])):
        if cosine_similarity_matrix[i][j] >= similarity_threshold:
            # Add datasets to the same group
            if i not in grouped_datasets:
                grouped_datasets[i] = [i]
            grouped_datasets[i].append(j)

# Create folders for combined MIDI files based on grouped indices
output_folder = 'C:\\Users\\MEEVAL ANNA SAJI\\Desktop\\DATASET\\combined'
os.makedirs(output_folder, exist_ok=True)

# Copy MIDI files to the combined folders based on grouped indices
for group_indices in grouped_datasets.values():
    group_names = [musician_names[i] for i in group_indices]
    group_folder_name = '_'.join(group_names)
    group_folder_path = os.path.join(output_folder, group_folder_name)
    os.makedirs(group_folder_path, exist_ok=True)

    for musician_name in group_names:
        musician_folder = f'C:\\Users\\MEEVAL ANNA SAJI\\Desktop\\DATASET\\{musician_name}'
        for file_name in os.listdir(musician_folder):
            if file_name.endswith(".mid"):
                source_file = os.path.join(musician_folder, file_name)
                destination_file = os.path.join(group_folder_path, file_name)
                copyfile(source_file, destination_file)

# Print the name pairs of combined musicians and their corresponding folders
for group_indices in grouped_datasets.values():
    group_names = [musician_names[i] for i in group_indices]
    print("Combined musicians:", group_names)
    group_folder_name = '_'.join(group_names)
    print("Group Folder:", group_folder_name)


: 

In [None]:
from music21 import instrument, note, chord

def extract_notes(file):
    notes = []
    pick = None
    for j in file:
        songs = instrument.partitionByInstrument(j)
        for part in songs.parts:
            pick = part.recurse()
            for element in pick:
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                elif isinstance(element, chord.Chord):
                    notes.append(".".join(str(n) for n in element.normalOrder))

    return notes

# Function to calculate and display total notes for each folder
def display_total_notes(folder_paths):
    for folder_path in folder_paths:
        musician = os.path.basename(folder_path).lower()
        all_midis = combine_datasets([folder_path])
        total_notes = extract_notes(all_midis)
        print(f"Total notes in {musician} midis in the dataset: {len(total_notes)}")

display_total_notes(folder_paths)

: 

In [None]:
def display_first_10_notes(folder_paths):
    for folder_path in folder_paths:
        musician = os.path.basename(folder_path).lower()
        all_midis = combine_datasets([folder_path])
        first_10_notes = extract_notes(all_midis)[:10]
        print(f"First 10 values in {musician} midis in the dataset:", first_10_notes)

display_first_10_notes(folder_paths)              

: 

In [None]:
from collections import Counter

# Function to create a count dictionary for each musician's folder
def count_notes(folder_paths):
    all_notes = []
    
    for folder_path in folder_paths:
        musician = os.path.basename(folder_path).lower()
        all_midis = combine_datasets([folder_path])
        notes = extract_notes(all_midis)
        all_notes.extend(notes)

        # Display total unique notes for each musician's folder
        count_num = Counter(notes)
        print(f"Total unique notes in {musician} midis in the dataset: {len(count_num)}")

    # Display total unique notes for the entire dataset
    count_num_all = Counter(all_notes)
    print("Total unique notes in the entire dataset:", len(count_num_all))
    
    
count_notes(folder_paths)

: 

In [None]:
# Function to explore the notes dictionary and calculate average recurrence
def explore_notes_dictionary(folder_paths):
    all_notes = []
    
    for folder_path in folder_paths:
        musician = os.path.basename(folder_path).lower()
        all_midis = combine_datasets([folder_path])
        notes = extract_notes(all_midis)
        all_notes.extend(notes)

        # Display statistics for each musician's folder
        count_num = Counter(notes)
        avg_recurrence = sum(count_num.values()) / len(count_num)
        print(f"\nStatistics for {musician} midis in the dataset:")
        print("Total unique notes:", len(count_num))
        print("Average recurrence for a note:", avg_recurrence)
        print("Most frequent note appeared:", max(count_num.values()), "times")
        print("Least frequent note appeared:", min(count_num.values()), "time")

    # Display overall statistics for the entire dataset
    count_num_all = Counter(all_notes)
    avg_recurrence_all = sum(count_num_all.values()) / len(count_num_all)
    print("\nStatistics for the entire dataset:")
    print("Total unique notes:", len(count_num_all))
    print("Average recurrence for a note:", avg_recurrence_all)
    print("Most frequent note appeared:", max(count_num_all.values()), "times")
    print("Least frequent note appeared:", min(count_num_all.values()), "time")
    
# Explore the notes dictionary and calculate average recurrence for each folder and the entire dataset
explore_notes_dictionary(folder_paths)


: 

In [None]:
# Function to get a list of rare chords and display statistics
def get_rare_chords(folder_paths, threshold=100):
    all_notes = []
    rare_notes = []
    
    for folder_path in folder_paths:
        musician = os.path.basename(folder_path).lower()
        all_midis = combine_datasets([folder_path])
        notes = extract_notes(all_midis)
        all_notes.extend(notes)

        # Display statistics for each musician's folder
        count_num = Counter(notes)
        rare_notes_folder = [note for note, count in count_num.items() if count < threshold]
        rare_notes.extend(rare_notes_folder)
        print(f"\nStatistics for {musician} midis in the dataset:")
        print("Total unique notes:", len(count_num))
        print("Total number of notes that occur less than", threshold, "times:", len(rare_notes_folder))

    # Display overall statistics for the entire dataset
    count_num_all = Counter(all_notes)
    rare_notes_all = [note for note, count in count_num_all.items() if count < threshold]
    rare_notes.extend(rare_notes_all)
    print("\nStatistics for the entire dataset:")
    print("Total unique notes:", len(count_num_all))
    print("Total number of notes that occur less than", threshold, "times:", len(rare_notes_all))

    return rare_notes

# Get a list of rare chords and display statistics for each folder and the entire dataset
rare_chords = get_rare_chords(folder_paths)
print("Total number of notes that occur less than 100 times in the entire dataset:", len(rare_chords))


: 

In [None]:
# Function to eliminate rare chords and display updated statistics
def eliminate_rare_chords(folder_paths, rare_notes):
    all_notes = []
    
    for folder_path in folder_paths:
        musician = os.path.basename(folder_path).lower()
        all_midis = combine_datasets([folder_path])
        notes = extract_notes(all_midis)
        all_notes.extend(notes)

        # Eliminate rare notes for each musician's folder
        notes_filtered_folder = [note for note in notes if note not in rare_notes]
        print(f"\nStatistics for {musician} midis in the dataset after eliminating rare notes:")
        print("Updated total unique notes:", len(set(notes_filtered_folder)))

    # Eliminate rare notes for the entire dataset
    notes_filtered_all = [note for note in all_notes if note not in rare_notes]
    print("\nStatistics for the entire dataset after eliminating rare notes:")
    print("Updated total unique notes:", len(set(notes_filtered_all)))

    return notes_filtered_all

# Eliminate rare chords and get the updated notes for each folder and the entire dataset
updated_notes = eliminate_rare_chords(folder_paths, rare_chords)

# Data preprocessing to build a mapping dictionary
symb = sorted(list(set(updated_notes)))

L_corpus = len(updated_notes)  # length of the corpus
L_symb = len(symb)  # length of total unique characters

# Building a dictionary to access the vocabulary from indices and vice versa
mapping = dict((c, i) for i, c in enumerate(symb))
reverse_mapping = dict((i, c) for i, c in enumerate(symb))

print("Total number of characters:", L_corpus)
print("Number of unique characters:", L_symb)


: 

In [None]:
# Splitting the Corpus into equal length sequences and output targets
length = 40
features_list = []
targets_list = []

for folder_path in folder_paths:
    musician = os.path.basename(folder_path).lower()
    all_midis = combine_datasets([folder_path])
    notes = extract_notes(all_midis)
    
    features = []
    targets = []

    for i in range(0, len(notes) - length, 1):
        feature = notes[i:i + length]
        target = notes[i + length]

        # Check if each note is in the mapping dictionary before retrieving its index
        if all(note in mapping for note in feature) and target in mapping:
            features.append([mapping[j] for j in feature])
            targets.append(mapping[target])

    features_list.append(features)
    targets_list.append(targets)


: 

In [None]:
import numpy as np
import tensorflow.keras.utils

# Combine features and targets from all folders
all_features = []
all_targets = []

for folder_features, folder_targets in zip(features_list, targets_list):
    all_features.extend(folder_features)
    all_targets.extend(folder_targets)

# Reshape X and normalize
X = (np.reshape(all_features, (len(all_features), length, 1))) / float(L_symb)

# One hot encode the output variable
y = tensorflow.keras.utils.to_categorical(all_targets)

: 

In [None]:
from sklearn.model_selection import train_test_split

# Assuming X and y are your input features and target variable
X_train, X_seed, y_train, y_seed = train_test_split(X, y, test_size=0.2, random_state=42)

: 

In [None]:
#MODEL BUILDING
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow.keras.backend as K
from tensorflow.keras.optimizers import Adamax
import seaborn as sns

#Initialising the Model
model = Sequential()
#Adding layers
model.add(LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(512))
model.add(Dense(512))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
#Compiling the model for training  
opt = Adamax(learning_rate=0.001)
model.compile(metrics=['accuracy'],loss='categorical_crossentropy', optimizer=opt)

: 

In [None]:
model.summary()

: 

In [None]:
from keras.callbacks import EarlyStopping

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Training the Model with Early Stopping
history = model.fit(X_train, y_train, epochs=200, validation_data=(X_seed, y_seed), callbacks=[early_stopping])


: 

In [None]:
model.save('LSTM_MODEL_MAIN.h5')

: 

In [None]:
from music21 import stream, note, chord
from keras.models import load_model

model = load_model('LSTM_MODEL_MAIN.h5')

# Function to convert a list of chords and notes into a music stream
def chords_n_notes(snippet):
    melody = []
    offset = 0  # Incremental

    for i in snippet:
        # If it is a chord
        if ("." in i or i.isdigit()):
            chord_notes = i.split(".")  # Separating the notes in a chord
            notes = []

            for j in chord_notes:
                inst_note = int(j)
                note_snip = note.Note(inst_note)
                notes.append(note_snip)

            chord_snip = chord.Chord(notes)
            chord_snip.offset = offset
            melody.append(chord_snip)
        # If it is a note
        else:
            note_snip = note.Note(i)
            note_snip.offset = offset
            melody.append(note_snip)

        # Increase offset each iteration so that notes do not stack
        offset += 1

    melody_stream = stream.Stream(melody)
    return melody_stream

def Melody_Generator(Note_Count, model, length, L_symb, reverse_mapping):
    seed = X_seed[np.random.randint(0, len(X_seed) - 1)]
    Music = ""
    Notes_Generated = []

    for i in range(Note_Count):
        seed = seed.reshape(1, length, 1)
        prediction = model.predict(seed, verbose=0)[0]
        prediction = np.log(prediction) / 1.0  # diversity
        exp_preds = np.exp(prediction)
        prediction = exp_preds / np.sum(exp_preds)
        index = np.argmax(prediction)
        index_N = index / float(L_symb)
        Notes_Generated.append(index)
        Music = [reverse_mapping[char] for char in Notes_Generated]
        seed = np.insert(seed[0], len(seed[0]), index_N)
        seed = seed[1:]

    # Now, we have music in the form of a list of chords and notes, and we want to create a MIDI file.
    Melody = chords_n_notes(Music)
    Melody_midi = stream.Stream(Melody)
    
    return Music, Melody_midi

# Define a function to let the user dynamically select a musician
def select_musician():
    print("Available musicians:")
    for folder_path in folder_paths:
        musician = os.path.basename(folder_path).lower()
        print(musician)

    selected_musician = input("Enter the musician's name: ")
    return selected_musician

# Example usage:
selected_musician = select_musician()
Music_notes, Melody = Melody_Generator(250, model, length, L_symb, reverse_mapping)

# Save the generated melody to a MIDI file
Melody.write('midi', 'generated_music.mid')


: 

In [None]:
from IPython.display import Audio, display

# Specify the path to your audio file
audio_path = "C:\\Users\\MEEVAL ANNA SAJI\\Downloads\\haydn.mp3"

# Create an Audio object and display it
audio = Audio(filename=audio_path)
display(audio)


: 