## Imports and File Reads

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')
sns.set_theme(style='whitegrid', palette='deep')

import pretty_midi as pm

import tensorflow as tf
import torch as pt

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

In [13]:
import os

def get_size(start_path):
    total_size = 0
    file_count = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            if f.endswith('.mid'):
                fp = os.path.join(dirpath, f)
                total_size += os.path.getsize(fp)
                file_count += 1
    return total_size, file_count

def human_readable_size(size, decimal_places=2):
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            break
        size /= 1024.0
    return f"{size:.{decimal_places}f} {unit}"

root_dir = '../aai-511_group1/midiclassics'

total_size, file_count = get_size(root_dir)

print(f"Total number of MIDI files: {file_count}")
print(f"Total size of MIDI files: {human_readable_size(total_size)}")

# Print breakdown by composer
print("\nBreakdown by composer:")
for composer in os.listdir(root_dir):
    composer_dir = os.path.join(root_dir, composer)
    if os.path.isdir(composer_dir):
        composer_size, composer_file_count = get_size(composer_dir)
        print(f"  {composer}: {human_readable_size(composer_size)} ({composer_file_count} files)")

Total number of MIDI files: 1530
Total size of MIDI files: 38.56 MB

Breakdown by composer:
  Mozart: 10.58 MB (257 files)
  Chopin: 2.83 MB (136 files)
  Beethoven: 13.25 MB (212 files)
  Bach: 11.89 MB (925 files)


#### References
1. https://machinelearningmastery.com/cnn-long-short-term-memory-networks/