From 6e7101b1e9ffc01e437c4018f2f100b856d5d384 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 9 May 2025 18:31:54 -0700
Subject: [PATCH 1/5] Formatting

---
 dsp/NoiseGate.h             |  2 +-
 dsp/RecursiveLinearFilter.h | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/dsp/NoiseGate.h b/dsp/NoiseGate.h
index 691e3cd..c3af01c 100644
--- a/dsp/NoiseGate.h
+++ b/dsp/NoiseGate.h
@@ -60,7 +60,7 @@ class TriggerParams
   , mRatio(ratio)
   , mOpenTime(openTime)
   , mHoldTime(holdTime)
-  , mCloseTime(closeTime){};
+  , mCloseTime(closeTime) {};
 
   double GetTime() const { return this->mTime; };
   double GetThreshold() const { return this->mThreshold; };
diff --git a/dsp/RecursiveLinearFilter.h b/dsp/RecursiveLinearFilter.h
index 737d297..291c268 100644
--- a/dsp/RecursiveLinearFilter.h
+++ b/dsp/RecursiveLinearFilter.h
@@ -56,7 +56,7 @@ class LevelParams : public dsp::Params
 public:
   LevelParams(const double gain)
   : Params()
-  , mGain(gain){};
+  , mGain(gain) {};
   double GetGain() const { return this->mGain; };
 
 private:
@@ -68,7 +68,7 @@ class Level : public Base
 {
 public:
   Level()
-  : Base(1, 0){};
+  : Base(1, 0) {};
   // Invalid usage: require a pointer to recursive_linear_filter::Params so
   // that SetCoefficients() is defined.
   void SetParams(const LevelParams& params) { this->mInputCoefficients[0] = params.GetGain(); };
@@ -85,7 +85,7 @@ class BiquadParams : public dsp::Params
   , mFrequency(frequency)
   , mGainDB(gainDB)
   , mQuality(quality)
-  , mSampleRate(sampleRate){};
+  , mSampleRate(sampleRate) {};
 
   // Parameters defined in
   // https://webaudio.github.io/Audio-EQ-Cookbook/audio-eq-cookbook.html
@@ -105,7 +105,7 @@ class Biquad : public Base
 {
 public:
   Biquad()
-  : Base(3, 3){};
+  : Base(3, 3) {};
   virtual void SetParams(const BiquadParams& params) = 0;
 
 protected:
@@ -139,7 +139,7 @@ class HighPassParams : public dsp::Params
   HighPassParams(const double sampleRate, const double frequency)
   : dsp::Params()
   , mFrequency(frequency)
-  , mSampleRate(sampleRate){};
+  , mSampleRate(sampleRate) {};
 
   double GetAlpha() const
   {
@@ -156,7 +156,7 @@ class HighPass : public Base
 {
 public:
   HighPass()
-  : Base(2, 2){};
+  : Base(2, 2) {};
   void SetParams(const HighPassParams& params)
   {
     const double alpha = params.GetAlpha();
@@ -174,7 +174,7 @@ class LowPassParams : public dsp::Params
   LowPassParams(const double sampleRate, const double frequency)
   : dsp::Params()
   , mFrequency(frequency)
-  , mSampleRate(sampleRate){};
+  , mSampleRate(sampleRate) {};
 
   double GetAlpha() const
   {
@@ -191,7 +191,7 @@ class LowPass : public Base
 {
 public:
   LowPass()
-  : Base(1, 2){};
+  : Base(1, 2) {};
   void SetParams(const LowPassParams& params)
   {
     const double alpha = params.GetAlpha();

From e08ebd3523959b291d804041eb89174b94a2d05f Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 9 May 2025 18:32:34 -0700
Subject: [PATCH 2/5] Refactored wav.cpp. Should function the same as before.

---
 dsp/wav.cpp | 380 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 272 insertions(+), 108 deletions(-)

diff --git a/dsp/wav.cpp b/dsp/wav.cpp
index eca231e..1ebfdc0 100644
--- a/dsp/wav.cpp
+++ b/dsp/wav.cpp
@@ -16,27 +16,92 @@
 
 #include "wav.h"
 
+struct WaveFileData
+{
+  // TODO use types like uint32_t, etc
+  struct RiffChunk
+  {
+    bool valid = false; // Have we gotten this info yet?
+    int size; // NB: Of the rest of the file
+    char format[4];
+  } riffChunk;
+
+  struct FmtChunk
+  {
+    bool valid = false;
+    int size;
+    // PCM: 1
+    // IEEE: 3
+    // A-law: 6
+    // mu-law: 7
+    // Extensible: 65534
+    unsigned short audioFormat;
+    short numChannels;
+    int sampleRate;
+    int byteRate;
+    short blockAlign;
+    short bitsPerSample;
+    struct Extensible
+    {
+      uint16_t validBitsPerSample;
+      uint16_t channelMask;
+      uint32_t subFormat; // PCM, IEEE
+    } extensible;
+  } fmtChunk;
+
+  struct FactChunk
+  {
+    bool valid = false;
+    int size;
+    int numSamples;
+  } factChunk;
+
+  struct DataChunk
+  {
+    bool valid = false;
+    char id[4];
+    int size;
+  } dataChunk;
+};
+
+const int AUDIO_FORMAT_PCM = 1;
+const int AUDIO_FORMAT_IEEE = 3;
+const int AUDIO_FORMAT_ALAW = 6;
+const int AUDIO_FORMAT_MULAW = 7;
+const int AUDIO_FORMAT_EXTENSIBLE = 65534;
+
 bool idIsNotJunk(char* id)
 {
   return strncmp(id, "RIFF", 4) == 0 || strncmp(id, "WAVE", 4) == 0 || strncmp(id, "fmt ", 4) == 0
          || strncmp(id, "data", 4) == 0;
 }
 
-bool ReadChunkAndSkipJunk(std::ifstream& file, char* chunkID)
+int ReadInt(std::ifstream& file)
 {
-  file.read(chunkID, 4);
-  while (!idIsNotJunk(chunkID) && file.good())
-  {
-    int junkSize;
-    file.read(reinterpret_cast<char*>(&junkSize), 4);
-    file.ignore(junkSize);
-    // Unused byte if junkSize is odd
-    if ((junkSize % 2) == 1)
-      file.ignore(1);
-    // And now we should be ready for data...
-    file.read(chunkID, 4);
-  }
-  return file.good();
+  int value;
+  file.read(reinterpret_cast<char*>(&value), 4);
+  return value;
+}
+
+short ReadShort(std::ifstream& file)
+{
+  short value;
+  file.read(reinterpret_cast<char*>(&value), 2);
+  return value;
+}
+
+unsigned short ReadUnsignedShort(std::ifstream& file)
+{
+  unsigned short value;
+  file.read(reinterpret_cast<char*>(&value), 2);
+  return value;
+}
+
+dsp::wav::LoadReturnCode ReadJunk(std::ifstream& file)
+{
+  int chunkSize = ReadInt(file);
+  file.ignore(chunkSize + (chunkSize % 2)); // Pad to 2 bytes at a time
+  return file.good() ? dsp::wav::LoadReturnCode::SUCCESS : dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
 }
 
 std::string dsp::wav::GetMsgForLoadReturnCode(LoadReturnCode retCode)
@@ -67,175 +132,274 @@ std::string dsp::wav::GetMsgForLoadReturnCode(LoadReturnCode retCode)
   return message.str();
 }
 
-dsp::wav::LoadReturnCode dsp::wav::Load(const char* fileName, std::vector<float>& audio, double& sampleRate)
+dsp::wav::LoadReturnCode ReadRiffChunk(std::ifstream& wavFile, WaveFileData::RiffChunk& chunk)
 {
-  // FYI: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
-  // Open the WAV file for reading
-  std::ifstream wavFile(fileName, std::ios::binary);
-
-  // Check if the file was opened successfully
-  if (!wavFile.is_open())
-  {
-    std::cerr << "Error opening WAV file" << std::endl;
-    return dsp::wav::LoadReturnCode::ERROR_OPENING;
-  }
-
-  // WAV file has 3 "chunks": RIFF ("RIFF"), format ("fmt ") and data ("data").
-  // Read the WAV file header
-  char chunkId[4];
-  if (!ReadChunkAndSkipJunk(wavFile, chunkId))
+  if (chunk.valid)
   {
-    std::cerr << "Error while reading for next chunk." << std::endl;
+    std::cerr << "Error: RIFF chunk already read." << std::endl;
     return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
   }
-
-  if (strncmp(chunkId, "RIFF", 4) != 0)
+  chunk.size = ReadInt(wavFile);
+  wavFile.read(chunk.format, 4);
+  if (strncmp(chunk.format, "WAVE", 4) != 0)
   {
-    std::cerr << "Error: File does not start with expected RIFF chunk. Got" << chunkId << " instead." << std::endl;
-    return dsp::wav::LoadReturnCode::ERROR_NOT_RIFF;
-  }
-
-  int chunkSize;
-  wavFile.read(reinterpret_cast<char*>(&chunkSize), 4);
-
-  char format[4];
-  wavFile.read(format, 4);
-  if (strncmp(format, "WAVE", 4) != 0)
-  {
-    std::cerr << "Error: Files' second chunk (format) is not expected WAV. Got" << format << " instead." << std::endl;
+    std::cerr << "Error: File format is not expected 'WAVE'. Got '" << chunk.format << "' instead." << std::endl;
     return dsp::wav::LoadReturnCode::ERROR_NOT_WAVE;
   }
+  chunk.valid = true;
+  return dsp::wav::LoadReturnCode::SUCCESS;
+}
 
-  // Read the format chunk
-  char subchunk1Id[4];
-  if (!ReadChunkAndSkipJunk(wavFile, subchunk1Id))
+dsp::wav::LoadReturnCode ReadFmtChunk(std::ifstream& wavFile, WaveFileData& wfd, double& sampleRate)
+{
+  if (wfd.fmtChunk.valid)
   {
-    std::cerr << "Error while reading for next chunk." << std::endl;
+    std::cerr << "Error: Format chunk already read." << std::endl;
     return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
   }
-  if (strncmp(subchunk1Id, "fmt ", 4) != 0)
+  if (!wfd.riffChunk.valid)
   {
-    std::cerr << "Error: Invalid WAV file missing expected fmt section; got " << subchunk1Id << " instead."
-              << std::endl;
-    return dsp::wav::LoadReturnCode::ERROR_MISSING_FMT;
+    std::cerr << "Error: Missing RIFF chunk." << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
   }
 
-  int subchunk1Size;
-  wavFile.read(reinterpret_cast<char*>(&subchunk1Size), 4);
-  if (subchunk1Size < 16)
+  wfd.fmtChunk.size = ReadInt(wavFile);
+  if (wfd.fmtChunk.size < 16)
   {
-    std::cerr << "WAV chunk 1 size is " << subchunk1Size
+    std::cerr << "WAV chunk 1 size is " << wfd.fmtChunk.size
               << ", which is smaller than the requried 16 to fit the expected "
                  "information."
               << std::endl;
     return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
   }
 
-  unsigned short audioFormat;
-  wavFile.read(reinterpret_cast<char*>(&audioFormat), 2);
-  const short AUDIO_FORMAT_PCM = 1;
-  const short AUDIO_FORMAT_IEEE = 3;
-  std::unordered_set<short> supportedFormats{AUDIO_FORMAT_PCM, AUDIO_FORMAT_IEEE};
-  if (supportedFormats.find(audioFormat) == supportedFormats.end())
+  wfd.fmtChunk.audioFormat = ReadUnsignedShort(wavFile);
+  std::unordered_set<short> supportedFormats{AUDIO_FORMAT_PCM, AUDIO_FORMAT_IEEE}; // AUDIO_FORMAT_EXTENSIBLE
+  if (supportedFormats.find(wfd.fmtChunk.audioFormat) == supportedFormats.end())
   {
     std::cerr << "Error: Unsupported WAV format detected. ";
-    switch (audioFormat)
+    switch (wfd.fmtChunk.audioFormat)
     {
-      case 6: std::cerr << "(Got: A-law)" << std::endl; return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_ALAW;
-      case 7:
+      case AUDIO_FORMAT_ALAW:
+        std::cerr << "(Got: A-law)" << std::endl;
+        return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_ALAW;
+      case AUDIO_FORMAT_MULAW:
         std::cerr << "(Got: mu-law)" << std::endl;
         return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_MULAW;
-      case 65534:
+      case AUDIO_FORMAT_EXTENSIBLE: // TODO remove
         std::cerr << "(Got: Extensible)" << std::endl;
         return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_EXTENSIBLE;
       default:
-        std::cerr << "(Got unknown format " << audioFormat << ")" << std::endl;
+        std::cerr << "(Got unknown format " << wfd.fmtChunk.audioFormat << ")" << std::endl;
         return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
     }
   }
 
-  short numChannels;
-  wavFile.read(reinterpret_cast<char*>(&numChannels), 2);
+  wfd.fmtChunk.numChannels = ReadShort(wavFile);
   // HACK
-  if (numChannels != 1)
+  // Note for future: for multi-channel files, samples are laid out with channel in the inner loop.
+  if (wfd.fmtChunk.numChannels != 1)
   {
     std::cerr << "Require mono (using for IR loading)" << std::endl;
     return dsp::wav::LoadReturnCode::ERROR_NOT_MONO;
   }
 
-  int iSampleRate;
-  wavFile.read(reinterpret_cast<char*>(&iSampleRate), 4);
-  // Store in format we assume (SR is double)
-  sampleRate = (double)iSampleRate;
-
-  int byteRate;
-  wavFile.read(reinterpret_cast<char*>(&byteRate), 4);
-
-  short blockAlign;
-  wavFile.read(reinterpret_cast<char*>(&blockAlign), 2);
+  wfd.fmtChunk.sampleRate = ReadInt(wavFile);
+  wfd.fmtChunk.byteRate = ReadInt(wavFile);
+  wfd.fmtChunk.blockAlign = ReadShort(wavFile);
+  wfd.fmtChunk.bitsPerSample = ReadShort(wavFile);
 
-  short bitsPerSample;
-  wavFile.read(reinterpret_cast<char*>(&bitsPerSample), 2);
+  if (wfd.fmtChunk.audioFormat == AUDIO_FORMAT_EXTENSIBLE)
+  {
+    // Do we need to assert or modify the data loading below if this doesn't match bitsPerSample?
+    wfd.fmtChunk.extensible.validBitsPerSample = ReadUnsignedShort(wavFile);
+    auto read_u32 = [&]() -> uint32_t {
+      uint8_t b[4];
+      wavFile.read((char*)b, 4);
+      return b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
+    };
+    wfd.fmtChunk.extensible.channelMask = read_u32();
+    uint8_t guid[16];
+    wavFile.read((char*)guid, 16);
+    wfd.fmtChunk.extensible.subFormat = guid[1] << 8 | guid[0];
+  }
 
   // The default is for there to be 16 bytes in the fmt chunk, but sometimes
   // it's different.
-  if (subchunk1Size > 16)
+  else if (wfd.fmtChunk.size > 16)
   {
-    const int extraBytes = subchunk1Size - 16;
+    const int extraBytes = wfd.fmtChunk.size - 16;
     const int skipChars = extraBytes / 4 * 4; // truncate to dword size
     wavFile.ignore(skipChars);
     const int remainder = extraBytes % 4;
-    wavFile.read(reinterpret_cast<char*>(&byteRate), remainder);
+    // Is this right? Don't we already have the byteRate?
+    // This must be here because of some weird WAVE file I've seen, but I don't know which.
+    wavFile.read(reinterpret_cast<char*>(&wfd.fmtChunk.byteRate), remainder);
   }
 
-  // Read the data chunk
-  char subchunk2Id[4];
-  if (!ReadChunkAndSkipJunk(wavFile, subchunk2Id))
+  // Skip any extra bytes in the fmt chunk
+  if (wfd.fmtChunk.size > 16)
   {
-    std::cerr << "Error while reading for next chunk." << std::endl;
+    wavFile.ignore(wfd.fmtChunk.size - 16);
+  }
+
+  // Store SR for final return
+  sampleRate = (double)wfd.fmtChunk.sampleRate;
+
+  wfd.fmtChunk.valid = true;
+  return dsp::wav::LoadReturnCode::SUCCESS;
+}
+
+dsp::wav::LoadReturnCode ReadFactChunk(std::ifstream& wavFile, WaveFileData& wfd)
+{
+  if (wfd.factChunk.valid)
+  {
+    std::cerr << "Error: Duplicate fact chunk." << std::endl;
     return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
   }
-  if (strncmp(subchunk2Id, "data", 4) != 0)
+  if (!wfd.riffChunk.valid)
   {
-    std::cerr << "Error: Invalid WAV file" << std::endl;
+    std::cerr << "Error: Missing RIFF chunk." << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
+  }
+  // We could assert that the fmt chunk was also read first but I'm not sure that's necessary for the file to be valid.
+
+  wfd.factChunk.size = ReadInt(wavFile);
+  if (wfd.factChunk.size != 4)
+  {
+    std::cerr << "Error: Invalid fact chunk size. Only 4 is supported; got " << wfd.factChunk.size << " instead."
+              << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
+  }
+  wfd.factChunk.numSamples = ReadInt(wavFile);
+
+  return dsp::wav::LoadReturnCode::SUCCESS;
+}
+
+int GetAudioFormat(WaveFileData& wfd)
+{
+  return wfd.fmtChunk.audioFormat == AUDIO_FORMAT_EXTENSIBLE ? wfd.fmtChunk.extensible.subFormat
+                                                             : wfd.fmtChunk.audioFormat;
+}
+
+dsp::wav::LoadReturnCode ReadDataChunk(std::ifstream& wavFile, WaveFileData& wfd, std::vector<float>& audio)
+{
+  if (wfd.dataChunk.valid)
+  {
+    std::cerr << "Error: Already read data chunk." << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
+  }
+  if (!wfd.riffChunk.valid)
+  {
+    std::cerr << "Error: Missing RIFF chunk." << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
+  }
+  if (!wfd.fmtChunk.valid) // fmt chunk must come before data chunk
+  {
+    std::cerr << "Error: Tried to read data chunk before fmt chunk." << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
+  }
+  if (wfd.fmtChunk.audioFormat == AUDIO_FORMAT_EXTENSIBLE
+      && !wfd.factChunk.valid) // fact chunk must come before data chunk
+  {
+    std::cerr << "Error: Tried to read data chunk before fact chunk for extensible format WAVE file." << std::endl;
     return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
   }
 
   // Size of the data chunk, in bits.
-  int subchunk2Size;
-  wavFile.read(reinterpret_cast<char*>(&subchunk2Size), 4);
+  wfd.dataChunk.size = ReadInt(wavFile);
 
+  const int audioFormat = GetAudioFormat(wfd);
   if (audioFormat == AUDIO_FORMAT_IEEE)
   {
-    if (bitsPerSample == 32)
-      dsp::wav::_LoadSamples32(wavFile, subchunk2Size, audio);
+    if (wfd.fmtChunk.bitsPerSample == 32)
+      dsp::wav::_LoadSamples32(wavFile, wfd.dataChunk.size, audio);
     else
     {
-      std::cerr << "Error: Unsupported bits per sample for IEEE files: " << bitsPerSample << std::endl;
+      std::cerr << "Error: Unsupported bits per sample for IEEE files: " << wfd.fmtChunk.bitsPerSample << std::endl;
       return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_BITS_PER_SAMPLE;
     }
   }
   else if (audioFormat == AUDIO_FORMAT_PCM)
   {
-    if (bitsPerSample == 16)
-      dsp::wav::_LoadSamples16(wavFile, subchunk2Size, audio);
-    else if (bitsPerSample == 24)
-      dsp::wav::_LoadSamples24(wavFile, subchunk2Size, audio);
-    else if (bitsPerSample == 32)
-      dsp::wav::_LoadSamples32(wavFile, subchunk2Size, audio);
+    if (wfd.fmtChunk.bitsPerSample == 16)
+      dsp::wav::_LoadSamples16(wavFile, wfd.dataChunk.size, audio);
+    else if (wfd.fmtChunk.bitsPerSample == 24)
+      dsp::wav::_LoadSamples24(wavFile, wfd.dataChunk.size, audio);
+    else if (wfd.fmtChunk.bitsPerSample == 32)
+      dsp::wav::_LoadSamples32(wavFile, wfd.dataChunk.size, audio);
     else
     {
-      std::cerr << "Error: Unsupported bits per sample for PCM files: " << bitsPerSample << std::endl;
+      std::cerr << "Error: Unsupported bits per sample for PCM files: " << wfd.fmtChunk.bitsPerSample << std::endl;
       return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_BITS_PER_SAMPLE;
     }
   }
+  wfd.dataChunk.valid = true;
+  return dsp::wav::LoadReturnCode::SUCCESS;
+}
 
-  // Close the WAV file
-  wavFile.close();
+dsp::wav::LoadReturnCode dsp::wav::Load(const char* fileName, std::vector<float>& audio, double& sampleRate)
+{
+  // FYI: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
+  // Open the WAV file for reading
+  std::ifstream wavFile(fileName, std::ios::binary);
 
-  // Print the number of samples
-  // std::cout << "Number of samples: " << samples.size() << std::endl;
+  // Check if the file was opened successfully
+  if (!wavFile.is_open())
+  {
+    std::cerr << "Error opening WAV file" << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_OPENING;
+  }
 
+  char chunkId[4];
+  auto ReadChunkID = [&]() { wavFile.read(chunkId, 4); };
+
+  WaveFileData wfd;
+  dsp::wav::LoadReturnCode returnCode;
+  while (!wfd.dataChunk.valid && !wavFile.eof())
+  {
+    ReadChunkID();
+    if (!wfd.riffChunk.valid && strncmp(chunkId, "RIFF", 4) != 0)
+    {
+      {
+        std::cerr << "Error: File does not start with expected RIFF chunk. Got" << chunkId << " instead." << std::endl;
+        wavFile.close();
+        return dsp::wav::LoadReturnCode::ERROR_NOT_RIFF;
+      }
+    }
+    // Read the various chunks
+    if (strncmp(chunkId, "RIFF", 4) == 0)
+    {
+      returnCode = ReadRiffChunk(wavFile, wfd.riffChunk);
+    }
+    else if (strncmp(chunkId, "fmt ", 4) == 0)
+    {
+      returnCode = ReadFmtChunk(wavFile, wfd, sampleRate);
+    }
+    else if (strncmp(chunkId, "fact", 4) == 0)
+    {
+      returnCode = ReadFactChunk(wavFile, wfd);
+    }
+    else if (strncmp(chunkId, "data", 4) == 0)
+    {
+      returnCode = ReadDataChunk(wavFile, wfd, audio);
+    }
+    else
+    { // There might be junk chunks; just ignore them.
+      returnCode = ReadJunk(wavFile);
+    }
+    if (returnCode != dsp::wav::LoadReturnCode::SUCCESS)
+    {
+      wavFile.close();
+      return returnCode;
+    }
+  }
+  wavFile.close();
+  if (!wfd.dataChunk.valid)
+  { // This implicitly asserts that the fmt chunk was read and gave us the sample rate
+    std::cerr << "Error: File does not contain expected data chunk." << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
+  }
   return dsp::wav::LoadReturnCode::SUCCESS;
 }
 

From e688e287c1c2b2be4ca70526ee4486709bf0a39a Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 9 May 2025 19:55:32 -0700
Subject: [PATCH 3/5] Implement support for extensible-format WAVE files that
 are PCM or IEEE

---
 dsp/wav.cpp | 61 +++++++++++++++++++++++++++++------------------------
 dsp/wav.h   |  2 +-
 2 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/dsp/wav.cpp b/dsp/wav.cpp
index 1ebfdc0..3a6bc27 100644
--- a/dsp/wav.cpp
+++ b/dsp/wav.cpp
@@ -5,6 +5,7 @@
 //  Created by Steven Atkinson on 12/31/22.
 //
 
+#include <bitset> // For debugging
 #include <cstring> // strncmp
 #include <cmath> // pow
 #include <fstream>
@@ -64,11 +65,11 @@ struct WaveFileData
   } dataChunk;
 };
 
-const int AUDIO_FORMAT_PCM = 1;
-const int AUDIO_FORMAT_IEEE = 3;
-const int AUDIO_FORMAT_ALAW = 6;
-const int AUDIO_FORMAT_MULAW = 7;
-const int AUDIO_FORMAT_EXTENSIBLE = 65534;
+const unsigned short AUDIO_FORMAT_PCM = 1;
+const unsigned short AUDIO_FORMAT_IEEE = 3;
+const unsigned short AUDIO_FORMAT_ALAW = 6;
+const unsigned short AUDIO_FORMAT_MULAW = 7;
+const unsigned short AUDIO_FORMAT_EXTENSIBLE = 65534;
 
 bool idIsNotJunk(char* id)
 {
@@ -120,9 +121,7 @@ std::string dsp::wav::GetMsgForLoadReturnCode(LoadReturnCode retCode)
     case (LoadReturnCode::ERROR_INVALID_FILE): message << "WAV file contents are invalid."; break;
     case (LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_ALAW): message << "Unsupported file format \"A-law\""; break;
     case (LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_MULAW): message << "Unsupported file format \"mu-law\""; break;
-    case (LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_EXTENSIBLE):
-      message << "Unsupported file format \"extensible\"";
-      break;
+    case (LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_OTHER): message << "Unsupported file format."; break;
     case (LoadReturnCode::ERROR_NOT_MONO): message << "File is not mono."; break;
     case (LoadReturnCode::ERROR_UNSUPPORTED_BITS_PER_SAMPLE): message << "Unsupported bits per sample"; break;
     case (dsp::wav::LoadReturnCode::ERROR_OTHER): message << "???"; break;
@@ -174,7 +173,7 @@ dsp::wav::LoadReturnCode ReadFmtChunk(std::ifstream& wavFile, WaveFileData& wfd,
   }
 
   wfd.fmtChunk.audioFormat = ReadUnsignedShort(wavFile);
-  std::unordered_set<short> supportedFormats{AUDIO_FORMAT_PCM, AUDIO_FORMAT_IEEE}; // AUDIO_FORMAT_EXTENSIBLE
+  std::unordered_set<unsigned short> supportedFormats{AUDIO_FORMAT_PCM, AUDIO_FORMAT_IEEE, AUDIO_FORMAT_EXTENSIBLE};
   if (supportedFormats.find(wfd.fmtChunk.audioFormat) == supportedFormats.end())
   {
     std::cerr << "Error: Unsupported WAV format detected. ";
@@ -186,9 +185,6 @@ dsp::wav::LoadReturnCode ReadFmtChunk(std::ifstream& wavFile, WaveFileData& wfd,
       case AUDIO_FORMAT_MULAW:
         std::cerr << "(Got: mu-law)" << std::endl;
         return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_MULAW;
-      case AUDIO_FORMAT_EXTENSIBLE: // TODO remove
-        std::cerr << "(Got: Extensible)" << std::endl;
-        return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_EXTENSIBLE;
       default:
         std::cerr << "(Got unknown format " << wfd.fmtChunk.audioFormat << ")" << std::endl;
         return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
@@ -208,9 +204,11 @@ dsp::wav::LoadReturnCode ReadFmtChunk(std::ifstream& wavFile, WaveFileData& wfd,
   wfd.fmtChunk.byteRate = ReadInt(wavFile);
   wfd.fmtChunk.blockAlign = ReadShort(wavFile);
   wfd.fmtChunk.bitsPerSample = ReadShort(wavFile);
+  int bytesRead = 16;
 
   if (wfd.fmtChunk.audioFormat == AUDIO_FORMAT_EXTENSIBLE)
   {
+    unsigned short cbSize = ReadUnsignedShort(wavFile);
     // Do we need to assert or modify the data loading below if this doesn't match bitsPerSample?
     wfd.fmtChunk.extensible.validBitsPerSample = ReadUnsignedShort(wavFile);
     auto read_u32 = [&]() -> uint32_t {
@@ -221,26 +219,29 @@ dsp::wav::LoadReturnCode ReadFmtChunk(std::ifstream& wavFile, WaveFileData& wfd,
     wfd.fmtChunk.extensible.channelMask = read_u32();
     uint8_t guid[16];
     wavFile.read((char*)guid, 16);
+    std::bitset<8> bits(guid[0]);
+    std::cout << "GUID ";
+    for (auto g : guid)
+    {
+      bits = g;
+      std::cout << bits << " ";
+    }
+    std::cout << std::endl;
     wfd.fmtChunk.extensible.subFormat = guid[1] << 8 | guid[0];
-  }
-
-  // The default is for there to be 16 bytes in the fmt chunk, but sometimes
-  // it's different.
-  else if (wfd.fmtChunk.size > 16)
-  {
-    const int extraBytes = wfd.fmtChunk.size - 16;
-    const int skipChars = extraBytes / 4 * 4; // truncate to dword size
-    wavFile.ignore(skipChars);
-    const int remainder = extraBytes % 4;
-    // Is this right? Don't we already have the byteRate?
-    // This must be here because of some weird WAVE file I've seen, but I don't know which.
-    wavFile.read(reinterpret_cast<char*>(&wfd.fmtChunk.byteRate), remainder);
+    bytesRead += cbSize + 2; // Don't forget the 2 for the cbSize itself!
   }
 
   // Skip any extra bytes in the fmt chunk
-  if (wfd.fmtChunk.size > 16)
+  // This should probably be a remainder of a dword so that we're mod-4
+  if (wfd.fmtChunk.size > bytesRead)
   {
-    wavFile.ignore(wfd.fmtChunk.size - 16);
+    const int extraBytes = wfd.fmtChunk.size - bytesRead;
+    if (extraBytes >= 4)
+    {
+      std::cerr << "More than 4 extra bytes in fmt chunk." << std::endl;
+      return dsp::wav::LoadReturnCode::ERROR_INVALID_FILE;
+    }
+    wavFile.ignore(extraBytes);
   }
 
   // Store SR for final return
@@ -273,6 +274,7 @@ dsp::wav::LoadReturnCode ReadFactChunk(std::ifstream& wavFile, WaveFileData& wfd
   }
   wfd.factChunk.numSamples = ReadInt(wavFile);
 
+  wfd.factChunk.valid = true;
   return dsp::wav::LoadReturnCode::SUCCESS;
 }
 
@@ -334,6 +336,11 @@ dsp::wav::LoadReturnCode ReadDataChunk(std::ifstream& wavFile, WaveFileData& wfd
       return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_BITS_PER_SAMPLE;
     }
   }
+  else
+  {
+    std::cerr << "Error: Unsupported audio format: " << audioFormat << std::endl;
+    return dsp::wav::LoadReturnCode::ERROR_UNSUPPORTED_FORMAT_OTHER;
+  }
   wfd.dataChunk.valid = true;
   return dsp::wav::LoadReturnCode::SUCCESS;
 }
diff --git a/dsp/wav.h b/dsp/wav.h
index 336eddf..772565e 100644
--- a/dsp/wav.h
+++ b/dsp/wav.h
@@ -25,7 +25,7 @@ enum class LoadReturnCode
   ERROR_INVALID_FILE,
   ERROR_UNSUPPORTED_FORMAT_ALAW,
   ERROR_UNSUPPORTED_FORMAT_MULAW,
-  ERROR_UNSUPPORTED_FORMAT_EXTENSIBLE,
+  ERROR_UNSUPPORTED_FORMAT_OTHER,
   ERROR_UNSUPPORTED_BITS_PER_SAMPLE,
   ERROR_NOT_MONO,
   ERROR_OTHER

From 5cdf059ceacef906da80f4f1cce7644af19d4311 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 9 May 2025 19:56:53 -0700
Subject: [PATCH 4/5] Remove debugging lines

---
 dsp/wav.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/dsp/wav.cpp b/dsp/wav.cpp
index 3a6bc27..c72fe81 100644
--- a/dsp/wav.cpp
+++ b/dsp/wav.cpp
@@ -5,7 +5,6 @@
 //  Created by Steven Atkinson on 12/31/22.
 //
 
-#include <bitset> // For debugging
 #include <cstring> // strncmp
 #include <cmath> // pow
 #include <fstream>
@@ -219,14 +218,6 @@ dsp::wav::LoadReturnCode ReadFmtChunk(std::ifstream& wavFile, WaveFileData& wfd,
     wfd.fmtChunk.extensible.channelMask = read_u32();
     uint8_t guid[16];
     wavFile.read((char*)guid, 16);
-    std::bitset<8> bits(guid[0]);
-    std::cout << "GUID ";
-    for (auto g : guid)
-    {
-      bits = g;
-      std::cout << bits << " ";
-    }
-    std::cout << std::endl;
     wfd.fmtChunk.extensible.subFormat = guid[1] << 8 | guid[0];
     bytesRead += cbSize + 2; // Don't forget the 2 for the cbSize itself!
   }

From d07016066627a1fc307d621f53db157871498684 Mon Sep 17 00:00:00 2001
From: Steven Atkinson <steven@atkinson.mn>
Date: Fri, 9 May 2025 20:03:19 -0700
Subject: [PATCH 5/5] Bump minor version for breaking change

---
 dsp/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dsp/version.h b/dsp/version.h
index ab41709..6ad7ced 100644
--- a/dsp/version.h
+++ b/dsp/version.h
@@ -2,7 +2,7 @@
 #define version_h
 
 #define AUDIO_DSP_TOOLS_VERSION_MAJOR 0
-#define AUDIO_DSP_TOOLS_VERSION_MINOR 0
+#define AUDIO_DSP_TOOLS_VERSION_MINOR 1
 #define AUDIO_DSP_TOOLS_VERSION_PATCH 0
 
 #endif