From 5bba607934b47bcb015f560a1baffd46a8e29234 Mon Sep 17 00:00:00 2001
From: wmaycisco <wmaycisco>
Date: Mon, 29 Jan 2007 22:20:51 +0000
Subject: [PATCH] Change some files for Peter; add mpeg1 layer 2 audio encoding

---
 FEATURES.html                           |    2 +-
 VERSION                                 |    2 +-
 configure.in                            |   29 +-
 doc/MAINREADME.html                     |    1 +
 doc/MP4LIVE_README.html                 |    8 +-
 doc/ietf/draft-ietf-avt-rtp-h264-10.txt | 4349 -----------------------
 include/mpeg4ip_version.h               |    4 +-
 lib/mpeg2ps/mpeg2ps.c                   |   28 +-
 lib/mpeg2ps/mpeg2ps_private.h           |    3 +-
 lib/mpeg2ps/ps_info.cpp                 |    2 +-
 lib/mpeg2t/mpeg2_transport.c            |   11 +-
 lib/mpeg2t/mpeg2t_private.h             |    5 +-
 lib/mpeg2t/mpeg2t_video.c               |  172 +
 lib/rtp/Makefile.am                     |    6 +-
 lib/rtp/rijndael-alg-fst.c              |  441 ---
 lib/rtp/rijndael-alg-fst.h              |   40 -
 lib/rtp/rijndael-api-fst.c              |  498 ---
 lib/rtp/rijndael-api-fst.h              |  100 -
 mp4live_encoder_check.sh                |    7 +
 player/plugin/video/ffmpeg/ffmpeg.cpp   |    3 +
 server/mp4live/Makefile.am              |    7 +-
 server/mp4live/audio_encoder_base.cpp   |   51 +
 server/mp4live/audio_encoder_tables.cpp |    4 +
 server/mp4live/audio_twolame.cpp        |  377 ++
 server/mp4live/audio_twolame.h          |   96 +
 server/mp4live/profile_audio.h          |    1 +
 server/mp4live/video_ffmpeg.cpp         |   12 +-
 27 files changed, 807 insertions(+), 5452 deletions(-)
 delete mode 100755 doc/ietf/draft-ietf-avt-rtp-h264-10.txt
 delete mode 100644 lib/rtp/rijndael-alg-fst.c
 delete mode 100644 lib/rtp/rijndael-alg-fst.h
 delete mode 100644 lib/rtp/rijndael-api-fst.c
 delete mode 100644 lib/rtp/rijndael-api-fst.h
 create mode 100644 server/mp4live/audio_twolame.cpp
 create mode 100644 server/mp4live/audio_twolame.h
diff --git a/FEATURES.html b/FEATURES.html
index ebb50397..fea7e137 100644
--- a/FEATURES.html
+++ b/FEATURES.html
@@ -66,7 +66,7 @@ <h2>Mp4live Features</h2>
 Audio Encoders:
 <br>AAC (faac, with required download), MP3 (lame, with required download)
 Mpeg2, layer 2 (with ffmpeg), AMR NB and WB (through ffmpeg), G.711 alaw
-and ulaw 
+and ulaw, Mpeg1, layer 2 (with twolame)
 <p>
 Video Filters:
 <br>de-interlace (Y only), decimate
diff --git a/VERSION b/VERSION
index 8df6b88a..f6ed4357 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.5.14
+1.5.15
diff --git a/configure.in b/configure.in
index fcf16e96..0047cf4d 100644
--- a/configure.in
+++ b/configure.in
@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-AC_INIT(mpeg4ip, 1.5.14)
+AC_INIT(mpeg4ip, 1.5.15)
 AM_CONFIG_HEADER(mpeg4ip_config.h)
 dnl Prevent user from running just ./configure
 AC_CHECK_FILE(./bootstrapped, [did_bootstrap=yes], [did_bootstrap=no])
@@ -524,6 +524,7 @@ AM_CONDITIONAL(HAVE_FFMPEG, [test x$have_ffmpeg = xtrue])
 LAME_LIB=
 FAAC_LIB=
 X264_LIB=
+TWOLAME_LIB=
 
 if test x$have_mp4live = xtrue; then
 
@@ -554,6 +555,31 @@ else
    LAME_LIB=
 fi
 
+AC_ARG_ENABLE(twolame,
+[AC_HELP_STRING([--disable-twolame],[disable use of installed twolame library])],
+[ case "${enableval}" in
+  no)  disable_it=yes ;;
+  yes) disable_it=no ;;
+  *) AC_MSG_ERROR([bad valud ${enableval} for --disable-twolame]) ;;
+esac],
+  [disable_it=no])
+
+if test x$disable_it = xyes; then
+  have_twolame=false
+  AC_MSG_NOTICE([disabling twolame library])
+else
+  saved_LIBS="$LIBS"
+  LIBS="$LIBS -lm -lz"
+  AC_CHECK_LIB(twolame, twolame_init, [have_twolame=true], [have_twolame=false])
+  LIBS=$saved_LIBS
+fi
+if test x$have_twolame = xtrue; then
+   AC_DEFINE(HAVE_TWOLAME, 1, [enable twolame for mp4live])
+   TWOLAME_LIB=-ltwolame
+else
+   TWOLAME_LIB=
+fi
+
 AC_ARG_ENABLE(faac,
 [AC_HELP_STRING([--disable-faac],[disable use of installed faac library])],
 [ case "${enableval}" in
@@ -612,6 +638,7 @@ fi
 AC_SUBST(LAME_LIB)
 AC_SUBST(FAAC_LIB)
 AC_SUBST(X264_LIB)
+AC_SUBST(TWOLAME_LIB)
 AM_CONDITIONAL(HAVE_X264, [test x$have_x264 = xtrue])
 	   
 AC_ARG_ENABLE(ipv6,
diff --git a/doc/MAINREADME.html b/doc/MAINREADME.html
index bd60d093..fbdbf489 100644
--- a/doc/MAINREADME.html
+++ b/doc/MAINREADME.html
@@ -204,6 +204,7 @@ <h2>Prerequisites for Building</h2>
 <li><a href="http://www.audiocoding.com">faac</a>, version 1.20.1 or later<br>
 If you are using a released or CVS version after version 1.24, use the --with=mp4v2=no option </li>
 <li><a href="http://lame.sourceforge.net">lame</a> (mp3 encoding)</li>
+<li><a href="http://www.twolame.org">twolame</a> (mpeg1, layer 2 encoding)</li>
 <li><a href="http://ffmpeg.sourceforge.net">ffmpeg</a> (H.263, mpeg-4, mpeg-2 
 video content, AMR, mpeg-2 audio)</li>
 <li><a href="http://www.xvid.org">xvid</a> (mpeg-4 video encoding)</li>
diff --git a/doc/MP4LIVE_README.html b/doc/MP4LIVE_README.html
index 492b8b45..1b416c57 100644
--- a/doc/MP4LIVE_README.html
+++ b/doc/MP4LIVE_README.html
@@ -174,8 +174,8 @@ <h2>WARNINGS!</h2>
 <p>
 We no longer include a mpeg-4 encoder native with the package.  You will
 have to download ffmpeg or xvid if you want to encode with mpeg-4.  You 
-will also want to include a good audio encoder, like faac or lame.  See
-the main readme for more information.
+will also want to include a good audio encoder, like faac, lame or twolame.  
+See the main readme for more information.
 <P>
 By far the easiest route is to use a Linux distribution that already has a
 2.6 kernel and the bttv driver, and the associated i2c module built into it.
@@ -715,6 +715,10 @@ <h2>Links</h2>
   <td>LAME</td>			
   <td><a href="http://www.sourceforge.net/projects/lame">http://www.sourceforge.net/projects/lame</a></td>
 </tr>
+<tr>
+  <td>TWOLAME</td>			
+  <td><a href="http://www.twolame.org">http://www.twolame.org</a></td>
+</tr>
 <tr>
   <td>FAAC</td>			
   <td><a href="http://www.audiocoding.com">http://www.audiocoding.com</a></td>
diff --git a/doc/ietf/draft-ietf-avt-rtp-h264-10.txt b/doc/ietf/draft-ietf-avt-rtp-h264-10.txt
deleted file mode 100755
index cdfc6f86..00000000
--- a/doc/ietf/draft-ietf-avt-rtp-h264-10.txt
+++ /dev/null
@@ -1,4349 +0,0 @@
- 
-Network Working Group                                       S. Wenger 
-Internet Draft                                        M.M. Hannuksela 
-Document: draft-ietf-avt-rtp-h264-10.txt               T. Stockhammer 
-Expires: January 2005                                   M. Westerlund 
-                                                             D. Singer 
-                                                             July 2004 
-                                                                       
-                                               
-    
-    
-    
-                    RTP payload Format for H.264 Video 
-    
-    
-    
-Status of this Memo  
-    
-   By submitting this Internet-Draft, I (we) certify that any 
-   applicable patent or other IPR claims of which I am (we are) aware 
-   have been disclosed, and any of which I (we) become aware will be 
-   disclosed, in accordance with RFC 3668 (BCP 79). 
- 
-   By submitting this Internet-Draft, I (we) accept the provisions of 
-   Section 3 of RFC 3667 (BCP 78). 
- 
-   Internet-Drafts are working documents of the Internet Engineering 
-   Task Force (IETF), its areas, and its working groups.  Note that 
-   other groups may also distribute working documents as Internet-
-   Drafts. 
-    
-   Internet-Drafts are draft documents valid for a maximum of six 
-   months and may be updated, replaced, or obsoleted by other documents 
-   at any time.  It is inappropriate to use Internet-Drafts as 
-   reference material or to cite them other than as "work in progress." 
-    
-   The list of current Internet-Drafts can be accessed at 
-   http://www.ietf.org/1id-abstracts.txt 
-    
-   The list of Internet-Draft Shadow Directories can be accessed at 
-   http://www.ietf.org/shadow.html 
-    
-   This document is a submission of the IETF AVT WG.  Comments should 
-   be directed to the AVT WG mailing list, avt@ietf.org. 
-    
-Abstract 
-    
-   This memo describes an RTP Payload format for the ITU-T 
-   Recommendation H.264 video codec and the technically identical 
-   ISO/IEC International Standard 14496-10 video codec.  The RTP 
-   payload format allows for packetization of one or more Network 
-   Abstraction Layer Units (NALUs), produced by an H.264 video encoder, 
-   in each RTP payload.  The payload format has wide applicability 
-
- 
-Wenger et. al.           Expires January 2005                 [Page 1] 
-
-Internet Draft                                              July, 2004 
- 
-   supporting from simple low-bit rate conversational usage to Internet 
-   video streaming with interleaved transmission, all the way to high 
-   bit-rate video-on-demand applications.  
- 
-Table of Contents 
-    
-1. Introduction.......................................................4 
-  1.1. The H.264 codec................................................4 
-  1.2. Parameter Set Concept..........................................5 
-  1.3. Network Abstraction Layer Unit Types...........................6 
-2. Conventions........................................................7 
-3. Scope..............................................................7 
-4. Definitions and Abbreviations......................................7 
-  4.1. Definitions....................................................7 
-  4.2. Abbreviations..................................................9 
-5. RTP Payload Format.................................................9 
-  5.1. RTP Header Usage...............................................9 
-  5.2. Common structure of the RTP payload format....................12 
-  5.3. NAL Unit Octet Usage..........................................13 
-  5.4. Packetization Modes...........................................15 
-  5.5. Decoding Order Number (DON)...................................16 
-  5.6. Single NAL Unit Packet........................................18 
-  5.7. Aggregation Packets...........................................19 
-  5.8. Fragmentation Units (FUs).....................................27 
-6. Packetization Rules...............................................30 
-  6.1. Common Packetization Rules....................................31 
-  6.2. Single NAL Unit Mode..........................................31 
-  6.3. Non-Interleaved Mode..........................................32 
-  6.4. Interleaved Mode..............................................32 
-7. De-Packetization Process (Informative)............................32 
-  7.1. Single NAL Unit and Non-Interleaved Mode......................32 
-  7.2. Interleaved Mode..............................................33 
-  7.3. Additional De-Packetization Guidelines........................35 
-8. Payload Format Parameters.........................................36 
-  8.1. MIME Registration.............................................36 
-  8.2. SDP Parameters................................................49 
-  8.3. Examples......................................................55 
-  8.4. Parameter Set Considerations..................................57 
-9. Security Considerations...........................................59 
-10. Congestion Control...............................................60 
-11. IANA Consideration...............................................61 
-12. Informative Appendix: Application Examples.......................61 
-  12.1. Video Telephony according to ITU-T Recommendation H.241  Annex 
-  A..................................................................61 
-  12.2. Video Telephony, No Slice Data Partitioning, No NAL Unit  
-  Aggregation........................................................61 
-  12.3. Video Telephony, Interleaved Packetization Using NAL Unit 
-  Aggregation........................................................62 
-  12.4. Video Telephony, with Data Partitioning......................62 
-  12.5. Video Telephony or Streaming, with FUs and Forward Error 
-  Correction.........................................................63 
- 
-Wenger et. al.          Expires December 2004                [Page 2] 
-
-Internet Draft                                              July, 2004 
- 
-  12.6. Low-Bit-Rate Streaming.......................................65 
-  12.7. Robust Packet Scheduling in Video Streaming..................66 
-13. Informative Appendix: Rationale for Decoding Order Number........66 
-  13.1. Introduction.................................................66 
-  13.2. Example of Multi-Picture Slice Interleaving..................67 
-  13.3. Example of Robust Packet Scheduling..........................68 
-  13.4. Robust Transmission Scheduling of Redundant Coded Slices.....72 
-  13.5. Remarks on Other Design Possibilities........................72 
-14. Acknowledgements.................................................73 
-15. Full Copyright Statement.........................................73 
-16. Intellectual Property Notice.....................................73 
-17. References.......................................................74 
-  17.1. Normative References.........................................74 
-  17.2. Informative References.......................................74 
-18. RFC Editor Considerations........................................76 
-Annex A: Changes relative to draft-ietf-avt-rtp-h264-07.txt..........77 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Wenger et. al.          Expires December 2004                [Page 3] 
-
-Internet Draft                                              July, 2004 
- 
- 
-1.    Introduction 
- 
-1.1.      The H.264 codec 
- 
-   This memo specifies an RTP payload specification for the video 
-   coding standard known as ITU-T Recommendation H.264 [1] and ISO/IEC 
-   International Standard 14496 Part 10 (both also known as Advanced 
-   Video Coding, AVC) [2].  Recommendation H.264 was approved by ITU-T 
-   on May 2003, and the approved draft specification is available for 
-   public review [9].  In this memo the H.264 acronym is used for the 
-   codec and the standard, but the memo is equally applicable to the 
-   ISO/IEC counterpart of the coding standard. 
- 
-   The H.264 video codec has a very broad application range that covers 
-   all forms of digital compressed video from low bit rate Internet 
-   streaming applications to HDTV broadcast and Digital Cinema 
-   applications with near loss-less coding.  The overall performance of 
-   H.264 is as such that bit rate savings of 50% or more, compared to 
-   the current state of technology, are reported.  Digital Satellite TV 
-   quality, for example, was reported to be achievable at 1.5 Mbit/s, 
-   compared to the current operation point of MPEG 2 video at around 
-   3.5 Mbit/s [10]. 
-    
-   The codec specification [1] itself distinguishes conceptually 
-   between a video coding layer (VCL), and a network abstraction layer 
-   (NAL).  The VCL contains the signal processing functionality of the 
-   codec, mechanisms such as transform, quantization, motion 
-   compensated prediction, and a loop filter.  It follows the general 
-   concept of most of today's video codecs, a macroblock-based coder 
-   that utilizes inter picture prediction with motion compensation, and 
-   transform coding of the residual signal.  The VCL encoder outputs 
-   slices: a bit string that contains the macroblock data of an integer 
-   number of macroblocks, and the information of the slice header 
-   (containing the spatial address of the first macroblock in the 
-   slice, the initial quantization parameter, and similar).  
-   Macroblocks in slices are ordered in scan order unless a different 
-   macroblock allocation is specified, using the so-called Flexible 
-   Macroblock Ordering syntax.  In-picture prediction is used only 
-   within a slice.  More information is provided in [9].  
-    
-   The Network Abstraction Layer (NAL) encoder encapsulates the slice 
-   output of the VCL encoder into Network Abstraction Layer Units (NAL 
-   units), which are suitable for the transmission over packet networks 
-   or the use in packet oriented multiplex environments.  Annex B of 
-   H.264 defines an encapsulation process to transmit such NAL units 
-   over byte-stream oriented networks.  In the scope of this memo Annex 
-   B is not relevant. 
-    
-   Internally, the NAL uses NAL units.  A NAL unit consists of a one-
-   byte header and the payload byte string.  The header indicates the 
- 
-Wenger et. al.          Expires December 2004                [Page 4] 
-
-Internet Draft                                              July, 2004 
- 
-   type of the NAL unit, the (potential) presence of bit errors or 
-   syntax violations in the NAL unit payload, and information regarding 
-   the relative importance of the NAL unit for the decoding process.  
-   This RTP payload specification is designed to be unaware of the bit 
-   string in the NAL unit payload. 
-    
-   One of the main properties of H.264 is the complete decoupling of 
-   the transmission time, the decoding time, and the sampling or 
-   presentation time of slices and pictures.  The decoding process 
-   specified in H.264 is unaware of time, and the H.264 syntax does not 
-   carry information such as the number of skipped frames (as common in 
-   the form of the Temporal Reference in earlier video compression 
-   standards).  Also, there are NAL units that affect many pictures and 
-   are, hence, inherently time-less.  For this reason, the handling of 
-   the RTP timestamp requires some special considerations for those NAL 
-   units for which the sampling or presentation time is not defined, 
-   or, at transmission time, unknown. 
-    
-    
-1.2.      Parameter Set Concept 
-    
-   One very fundamental design concept of H.264 is to generate self-
-   contained packets, to make mechanisms such as the header duplication 
-   of RFC 2429 [12] or MPEG-4's Header Extension Code (HEC) [13] 
-   unnecessary.  The way that this was achieved is to decouple 
-   information that is relevant to more than one slice from the media 
-   stream.  This higher layer meta information should be sent reliably, 
-   asynchronously and in advance from the RTP packet stream that 
-   contains the slice packets.  (Provisions for sending this 
-   information in-band are also available for such applications that do 
-   not have an out-of-band transport channel appropriate for the 
-   purpose.)  The combination of the higher-level parameters is called 
-   a parameter set.  The H.264 specification includes two types of 
-   parameter sets: sequence parameter set and picture parameter set.  
-   An active sequence parameter set remains unchanged throughout a 
-   coded video sequence, and an active picture parameter set remains 
-   unchanged within a coded picture.  The sequence and picture 
-   parameter set structures contain information such as picture size, 
-   optional coding modes employed, and macroblock to slice group map. 
-      
-   In order to be able to change picture parameters (such as the 
-   picture size), without having the need to transmit parameter set 
-   updates synchronously to the slice packet stream, the encoder and 
-   decoder can maintain a list of more than one sequence and picture 
-   parameter set.  Each slice header contains a codeword that indicates 
-   the sequence and picture parameter set to be used.   
-    
-   This mechanism allows the decoupling of the transmission of 
-   parameter sets from the packet stream, and the transmission of them 
-   by external means, e.g. as a side effect of the capability exchange, 
-   or through a (reliable or unreliable) control protocol.  It may even 
- 
-Wenger et. al.          Expires December 2004                [Page 5] 
-
-Internet Draft                                              July, 2004 
- 
-   be possible that they get never transmitted but are fixed by an 
-   application design specification. 
-    
-    
-1.3.      Network Abstraction Layer Unit Types 
- 
-   Tutorial information on the NAL design can be found in [14], 
-   [15] and [16]. 
-    
-   All NAL units consist of a single NAL unit type octet, which also 
-   co-serves as the payload header of this RTP payload format.  The 
-   payload of a NAL unit follows immediately.   
-    
-   The syntax and semantics of the NAL unit type octet are specified in 
-   [1], but the essential properties of the NAL unit type octet are 
-   summarized below.  The NAL unit type octet has the following format: 
-    
-   +---------------+ 
-   |0|1|2|3|4|5|6|7| 
-   +-+-+-+-+-+-+-+-+ 
-   |F|NRI|  Type   | 
-   +---------------+ 
-    
-   The semantics of the components of the NAL unit type octet, as 
-   specified in the H.264 specification, are described briefly below. 
-    
-   F: 1 bit 
-       forbidden_zero_bit.  The H.264 specification declares a value of 
-       1 as a syntax violation.   
-        
-   NRI: 2 bits 
-       nal_ref_idc.  A value of 00 indicates that the content of the 
-       NAL unit is not used to reconstruct reference pictures for inter 
-       picture prediction.  Such NAL units can be discarded without 
-       risking the integrity of the reference pictures.  Values greater 
-       than 00 indicate that the decoding of the NAL unit is required 
-       to maintain the integrity of the reference pictures. 
-    
-   Type: 5 bits 
-       nal_unit_type.  This component specifies the NAL unit payload 
-       type as defined in table 7-1 of [1], and later within this memo. 
-       For a reference of all currently defined NAL unit types and 
-       their semantics please refer to section 7.4.1 in [1].   
-    
-   This memo introduces new NAL unit types, which are presented in 
-   section 5.2.  The NAL unit types defined in this memo are marked as 
-   unspecified in [1].  Moreover, this specification extends the 
-   semantics of F and NRI as described in section 5.3. 
-    
-    
-
- 
-Wenger et. al.          Expires December 2004                [Page 6] 
-
-Internet Draft                                              July, 2004 
- 
-2.    Conventions 
- 
-   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", 
-   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this 
-   document are to be interpreted as described in RFC 2119 [3]. 
-    
-   This specification uses the notion of setting and clearing a bit 
-   when handling bit fields.  Setting a bit is the same as assigning 
-   that bit the value of 1 (On).  Clearing a bit is the same as 
-   assigning that bit the value of 0 (Off). 
-    
- 
-3.    Scope 
- 
-   This payload specification can only be used to carry the "naked" 
-   H.264 NAL unit stream over RTP, and not the bitstream format 
-   discussed in Annex B of H.264.  Likely, the first applications of 
-   this specification will be in the conversational multimedia field, 
-   video telephony or video conferencing, but the payload format also 
-   covers other applications such as Internet streaming and TV over IP. 
-    
-    
-4.    Definitions and Abbreviations 
-    
-4.1.      Definitions 
- 
-   This document uses the definitions of [1].  The following terms 
-   defined in [1] are summed up below for convenience: 
-    
-       access unit: A set of NAL units always containing a primary 
-       coded picture.  In addition to the primary coded picture, an 
-       access unit may also contain one or more redundant coded 
-       pictures or other NAL units not containing slices or slice data 
-       partitions of a coded picture.  The decoding of an access unit 
-       always results in a decoded picture. 
-    
-       coded video sequence: A sequence of access units that consists, 
-       in decoding order, of an instantaneous decoding refresh (IDR) 
-       access unit followed zero or more non-IDR access units including 
-       all subsequent access units up to but not including any 
-       subsequent IDR access unit. 
-        
-       IDR access unit: An access unit in which the primary coded 
-       picture is an IDR picture. 
-        
-       IDR picture: A coded picture containing only slices with I or SI 
-       slice types that causes a "reset" in the decoding process.  
-       After the decoding of an IDR picture all following coded 
-       pictures in decoding order can be decoded without inter 
-       prediction from any picture decoded prior to the IDR picture. 
-    
- 
-Wenger et. al.          Expires December 2004                [Page 7] 
-
-Internet Draft                                              July, 2004 
- 
-       primary coded picture: The coded representation of a picture to 
-       be used by the decoding process for a bitstream conforming to 
-       H.264.  The primary coded picture contains all macroblocks of 
-       the picture. 
-    
-       redundant coded picture: A coded representation of a picture or 
-       a part of a picture.  The content of a redundant coded picture 
-       shall not be used by the decoding process for a bitstream 
-       conforming to H.264.  The content of a redundant coded picture 
-       may be used by the decoding process for a bitstream that 
-       contains errors or losses. 
-        
-       VCL NAL unit: A collective term used to refer to coded slice and 
-       coded data partition NAL units. 
-    
-   In addition, the following definitions apply: 
-    
-       decoding order number (DON): A field in the payload structure or 
-       a derived variable indicating NAL unit decoding order.  Values 
-       of DON are in the range of 0 to 65535, inclusive.  After 
-       reaching the maximum value, the value of DON wraps around to 0. 
-        
-       NAL unit decoding order: A NAL unit order that conforms to the 
-       constraints on NAL unit order given in section 7.4.1.2 in [1].   
-    
-       transmission order: The order of packets in ascending RTP 
-       sequence number order (in modulo arithmetic).  Within an 
-       aggregation packet, the NAL unit transmission order is the same 
-       as the order of appearance of NAL units in the packet. 
-    
-       Media aware network element (MANE): A network element, such as a 
-       middlebox or (application layer) gateway that is capable of 
-       parsing certain aspects of the RTP payload headers or the RTP 
-       payload, and reacting on the contents. 
-        
-          Informative note: The concept of a MANE goes beyond normal 
-          routers or gateways in that a MANE has to be aware of the 
-          signalling (e.g. to learn about the payload type mappings of 
-          the media streams) and that it has to be trusted when working 
-          with SRTP. The advantage of using MANEs is that they allow to 
-          drop packets according to the needs of the media coding. For 
-          example, if a MANE needs to drop packets due to congestion on 
-          a certain link, it can identify those packets whose dropping 
-          has the smallest negative impact on the user experience, and 
-          remove those in order to remove the congestion and/or keep 
-          the delay low. 
-    
-    
-
-
-
-
- 
-Wenger et. al.          Expires December 2004                [Page 8] 
-
-Internet Draft                                              July, 2004 
- 
-   Abbreviations 
-    
-   DON:        Decoding Order Number 
-   DONB:       Decoding Order Number Base 
-   DOND:       Decoding Order Number Difference 
-   FEC:        Forward Error Correction 
-   FU:         Fragmentation Unit 
-   IDR:        Instantaneous Decoding Refresh 
-   IEC:        International Electrotechnical Commission 
-   ISO:        International Organization for Standardization 
-   ITU-T:      International Telecommunication Union, Telecommunication 
-               Standardization Sector 
-   MANE:       Media Aware Network Element 
-   MTAP:       Multi-Time Aggregation Packet 
-   MTAP16:          MTAP with 16-bit timestamp offset 
-   MTAP24:          MTAP with 24-bit timestamp offset 
-   NAL:        Network Abstraction Layer 
-   NALU:       NAL Unit 
-   SEI:        Supplemental Enhancement Information 
-   STAP:       Single-Time Aggregation Packet 
-   STAP-A:          STAP type A 
-   STAP-B:          STAP type B 
-   TS:         Timestamp 
-   VCL:        Video Coding Layer 
-    
-    
-5.    RTP Payload Format 
-    
-5.1.      RTP Header Usage 
- 
-   The format of the RTP header is specified in RFC 3550 [4] and 
-   reprinted in Figure 1 for convenience.  This payload format uses the 
-   fields of the header in a manner consistent with that specification. 
-    
-   When encapsulating one NAL unit per RTP packet, the RECOMMENDED RTP 
-   payload format is specified in section 5.6.  The RTP payload (and 
-   the settings for some RTP header bits) for aggregation packets and 
-   fragmentation units are specified in sections 5.7 and 5.8, 
-   respectively.   
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 
-Wenger et. al.          Expires December 2004                [Page 9] 
-
-Internet Draft                                              July, 2004 
- 
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |V=2|P|X|  CC   |M|     PT      |       sequence number         | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                           timestamp                           | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |           synchronization source (SSRC) identifier            | 
-   +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ 
-   |            contributing source (CSRC) identifiers             | 
-   |                             ....                              | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 1: RTP header according RFC 3550. 
-    
-    
-   The RTP header information to be set according to this RTP payload 
-   format is set as follows:  
- 
-   Marker bit (M): 1 bit 
-       Set for the very last packet of the access unit indicated by the 
-       RTP timestamp, in line with the normal use of the M bit in video 
-       formats and to allow an efficient playout buffer handling.  For 
-       aggregation packets (STAP and MTAP) the marker bit in the RTP 
-       header MUST be set to the value that the marker bit of the last 
-       NAL unit of the aggregation packet would have if it were 
-       transported in its own RTP packet.  Decoders MAY use this bit as 
-       an early indication of the last packet of an access unit, but 
-       MUST NOT rely on this property.  
-        
-          Informative note: Only one M bit is associated with an 
-          aggregation packet carrying multiple NAL units, and thus if a 
-          gateway has re-packetized an aggregation packet into several 
-          packets, it cannot reliably set the M bit of those packets.  
-    
-   Payload type (PT): 7 bits 
-       The assignment of an RTP payload type for this new packet format 
-       is outside the scope of this document, and will not be specified 
-       here.  The assignment of a payload type needs to be performed 
-       either through the profile used or in a dynamic way. 
-    
-   Sequence number (SN): 16 bits 
-       Set and used in accordance with RFC 3550.  For the single NALU 
-       and non-interleaved packetization mode, the sequence number is 
-       used to determine decoding order for the NALU.  
-    
-   Timestamp: 32 bits 
-       The RTP timestamp is set to the sampling timestamp of the 
-       content.  A 90 kHz clock rate MUST be used.   
-        
- 
-Wenger et. al.          Expires December 2004               [Page 10] 
-
-Internet Draft                                              July, 2004 
- 
-       If the NAL unit has no timing properties of its own (e.g. 
-       parameter set and SEI NAL units), the RTP timestamp is set to 
-       the RTP timestamp of the primary coded picture of the access 
-       unit in which the NAL unit is included according to section 
-       7.4.1.2 of [1].   
-        
-       The setting of the RTP Timestamp for MTAPs is defined in section 
-       5.7.2. 
-        
-       Receivers SHOULD ignore any picture timing SEI messages included 
-       in access units that have only one display timestamp.  Instead, 
-       receivers SHOULD use the RTP timestamp for synchronizing the 
-       display process. 
-        
-       RTP senders SHOULD NOT transmit picture timing SEI messages for 
-       pictures that are not supposed to be displayed as multiple 
-       fields. 
-        
-       In case that one access unit has more than one display timestamp 
-       carried in a picture timing SEI message, then the information in 
-       the SEI message SHOULD be treated as relative to the RTP 
-       timestamp, with the earliest event occurring at the time given 
-       by the RTP timestamp, and subsequent events later, as given by 
-       the difference in SEI message picture timing values.  Let tSEI1, 
-       tSEI2, ..., tSEIn be the display timestamps carried in the SEI 
-       message of an access unit, where tSEI1 is the earliest of all 
-       such timestamps.  Let tmadjst() be a function that adjusts the 
-       SEI messages time scale to a 90-kHz time scale.  Let TS be the 
-       RTP timestamp.  Then, the display time for the event associated 
-       with tSEI1 is TS.  The display time for the event with tSEIx, 
-       where x is [2..n] is TS + tmadjst (tSEIx - tSEI1). 
-        
-          Informative note: Displaying coded frames as fields is needed 
-          commonly in an operation known as 3:2 pulldown where film 
-          content that consists of coded frames is displayed on an 
-          display using interlaced scanning.  The picture timing SEI 
-          message enables carriage of multiple timestamps for the same 
-          coded picture, and therefore the 3:2 pulldown process is 
-          perfectly controlled.  The picture timing SEI message 
-          mechanism is necessary, because only one timestamp per coded 
-          frame can be conveyed in the RTP timestamp.  
-        
-          Informative note: Due to the fact that H.264 allows the 
-          decoding order to be different from the display order, values 
-          of RTP timestamps may not be monotonically non-decreasing as 
-          a function of RTP sequence numbers.  Furthermore, the value 
-          for interarrival jitter reported in the RTCP reports may not 
-          be a trustworthy indication of the network performance, as 
-          the calculation rules for interarrival jitter (section 6.4.1 
-          of RFC 3550) assume that the RTP timestamp of a packet is 
-          directly proportional to its transmission time. 
- 
-Wenger et. al.          Expires December 2004               [Page 11] 
-
-Internet Draft                                              July, 2004 
- 
-    
-    
-5.2.      Common structure of the RTP payload format 
-    
-   The payload format defines three different basic payload structures.
-   A receiver can identify the payload structure by the first byte of 
-   the RTP payload, which co-serves as the RTP payload header and in 
-   some cases as the first byte of the payload.  This byte is always 
-   structured as a NAL unit header.  The NAL unit type field indicates 
-   which structure is present.  The possible structures are: 
-    
-   Single NAL Unit Packet: Contains only a single NAL unit in the 
-   payload.  The NAL header type field will be equal to the original 
-   NAL unit type, i.e., in the range of 1 to 23, inclusive.  Specified 
-   in section 5.6. 
-    
-   Aggregation packet: Packet type used to aggregate multiple NAL units 
-   into a single RTP payload.  This packet exists in four versions, the 
-   Single-Time Aggregation Packet type A (STAP-A), the Single-Time 
-   Aggregation Packet type B (STAP-B), Multi-Time Aggregation Packet 
-   (MTAP) with 16 bit offset (MTAP16), and Multi-Time Aggregation 
-   Packet (MTAP) with 24 bit offset (MTAP24).  The NAL unit type 
-   numbers assigned for STAP-A, STAP-B, MTAP16, and MTAP24 are 24, 25, 
-   26, and 27, respectively.  Specified in section 5.7. 
-    
-   Fragmentation unit: Used to fragment a single NAL unit over multiple 
-   RTP packets.  Exists with two versions, FU-A and FU-B, identified 
-   with the NAL unit type numbers 28 and 29, respectively.  Specified 
-   in section 5.8. 
-    
-   Table 1. Summary of NAL unit types and their payload structures. 
-    
-   Type   Packet    Type name                        Section 
-   --------------------------------------------------------- 
-   0      undefined                                    - 
-   1-23   NAL unit  Single NAL unit packet per H.264   5.6 
-   24     STAP-A    Single-time aggregation packet     5.7.1 
-   25     STAP-B    Single-time aggregation packet     5.7.1 
-   26     MTAP16    Multi-time aggregation packet      5.7.2 
-   27     MTAP24    Multi-time aggregation packet      5.7.2 
-   28     FU-A      Fragmentation unit                 5.8 
-   29     FU-B      Fragmentation unit                 5.8 
-   30-31  undefined                                    - 
-    
-    
-       Informative note: This specification does not limit the size of 
-       NAL units encapsulated in single NAL unit packets and 
-       fragmentation units.  The maximum size of a NAL unit 
-       encapsulated in any aggregation packet is 65535 bytes. 
-    
-    
- 
-Wenger et. al.          Expires December 2004               [Page 12] 
-
-Internet Draft                                              July, 2004 
- 
-5.3.      NAL Unit Octet Usage 
-    
-   The structure and semantics of the NAL unit octet were introduced in 
-   section 1.3.  For convenience, the format of the NAL unit type octet 
-   is reprinted below: 
-    
-   +---------------+ 
-   |0|1|2|3|4|5|6|7| 
-   +-+-+-+-+-+-+-+-+ 
-   |F|NRI|  Type   | 
-   +---------------+ 
-    
-   This section specifies the semantics of F and NRI according to this 
-   specification.   
-    
-   F: 1 bit 
-       forbidden_zero_bit.  A value of 0 indicates that the NAL unit 
-       type octet and payload should not contain bit errors or other 
-       syntax violations.  A value of 1 indicates that the NAL unit 
-       type octet and payload may contain bit errors or other syntax 
-       violations. 
-        
-       MANEs SHOULD set the F bit to indicate detected bit errors in 
-       the NAL unit.  The H.264 specification requires that the F bit 
-       is equal to 0.  When the F bit is set, the decoder is advised 
-       that bit errors or any other syntax violation may be present in 
-       the payload or in the NAL unit type octet.  The simplest decoder 
-       reaction to respond to a NAL unit in which the F bit is equal to 
-       1 is to discard such a NAL unit and to conceal the lost data in 
-       the discarded NAL unit. 
-        
-   NRI: 2 bits 
-       nal_ref_idc.  The semantics of value 00 and a non-zero value 
-       remain unchanged compared to the H.264 specification.  In other 
-       words, a value of 00 indicates that the content of the NAL unit 
-       is not used to reconstruct reference pictures for inter picture 
-       prediction.  Such NAL units can be discarded without risking the 
-       integrity of the reference pictures.  Values greater than 00 
-       indicate that the decoding of the NAL unit is required to 
-       maintain the integrity of the reference pictures.   
-        
-       In addition to the specification above, according to this RTP 
-       payload specification, values of NRI greater than 00 indicate 
-       the relative transport priority, as determined by the encoder.  
-       MANEs can use this information to protect more important NAL 
-       units better than less important NAL units.  11 is the highest 
-       transport priority, followed by 10, then by 01 and, finally, 00 
-       is the lowest. 
-        
-          Informative note: Any non-zero value of NRI is handled 
-          identically in H.264 decoders.  Therefore, receivers need not 
- 
-Wenger et. al.          Expires December 2004               [Page 13] 
-
-Internet Draft                                              July, 2004 
- 
-          manipulate the value of NRI when passing NAL units to the 
-          decoder. 
-        
-       An H.264 encoder MUST set the value of NRI according to the 
-       H.264 specification (subclause 7.4.1), when the value of 
-       nal_unit_type is in the range of 1 to 12, inclusive.  In 
-       particular, the H.264 specification requires that the value of 
-       NRI SHALL be equal to 0 for all NAL units having nal_unit_type 
-       equal to 6, 9, 10, 11, or 12.   
-        
-       An H.264 encoder SHOULD set the value of NRI for NAL units 
-       having nal_unit_type equal to 7 or 8 (indicating a sequence 
-       parameter set or a picture parameter set respectively) to 11 (in 
-       binary format). An H.264 encoder SHOULD set the value of NRI for 
-       coded slice NAL units of a primary coded picture having 
-       nal_unit_type equal to 5 (indicating a coded slice belonging to 
-       an IDR picture) to 11 (in binary format). 
-        
-       The following example for a mapping of the remaining 
-       nal_unit_types to NRI values MAY be used and has been shown as 
-       efficient in a certain environment [15]. Other mappings MAY also 
-       be desirable, depending on the application and the H.264/AVC 
-       Annex A profile in use. 
-          Informative Note: Data Partitioning is not available in 
-          certain profiles, e.g. in the Main or Baseline profiles. 
-          Consequently, the nal unit types 2, 3, and 4 can occur only 
-          if the video bit stream conforms to a profile in which data 
-          partitioning is allowed, and not in streams that conform to 
-          the Main or Baseline profiles.  
-        
-       Table 2: Example of NRI values for coded slices and coded slice 
-       data partitions of primary coded reference pictures 
-        
-        
-       NAL Unit Type     Content of NAL unit                 NRI 
-       (binary) 
-       ---------------------------------------------------------------- 
-        1              non-IDR coded slice                         10 
-        2              Coded slice data partition A                10 
-        3              Coded slice data partition B                01 
-        4              Coded slice data partition C                01 
-        
-          Informative note: As mentioned before, the NRI value of non-
-          reference pictures is 00 as mandated by H.264/AVC. 
-         
-       An H.264 encoder SHOULD set the value of NRI for coded slice and 
-       coded slice data partition NAL units of redundant coded 
-       reference pictures equal to 01 (in binary format). 
-        
-       Definitions of the values for NRI for NAL unit types 24 to 29, 
-       inclusive, are given in sections 5.7 and 5.8 of this memo. 
- 
-Wenger et. al.          Expires December 2004               [Page 14] 
-
-Internet Draft                                              July, 2004 
- 
-        
-       No recommendation for the value of NRI is given for NAL units 
-       having nal_unit_type in the range of 13 to 23, inclusive, 
-       because these values are reserved for ITU-T and ISO/IEC.  No 
-       recommendation for the value of NRI is given for NAL units 
-       having nal_unit_type equal to 0 or in the range of 30 to 31, 
-       inclusive, because the semantics of these values are not 
-       specified in this memo.   
-        
-    
-5.4.      Packetization Modes 
-    
-   This memo specifies three cases of packetization modes: 
-   o Single NAL unit mode 
-   o Non-interleaved mode 
-   o Interleaved mode 
-    
-   The single NAL unit mode is targeted for conversational systems that 
-   comply with ITU-T Recommendation H.241 [17] (see section 12.1).  The 
-   non-interleaved mode is targeted for conversational systems that may 
-   not comply with ITU-T Recommendation H.241.  In the non-interleaved 
-   mode NAL units are transmitted in NAL unit decoding order.  The 
-   interleaved mode is targeted for systems that do not require very 
-   low end-to-end latency.  The interleaved mode allows transmission of 
-   NAL units out of NAL unit decoding order. 
-    
-   The packetization mode in use MAY be signaled by the value of the 
-   OPTIONAL packetization-mode MIME parameter or by external means.  
-   The used packetization mode governs which NAL unit types are allowed 
-   in RTP payloads.  Table 3 summarizes the allowed NAL unit types for 
-   each packetization mode.  Some NAL unit type values (indicated as 
-   undefined in Table 3) are reserved for future extensions.  NAL units 
-   of those types SHOULD NOT be sent by a sender, and MUST be ignored 
-   by a receiver.  For example, the Types 1-23, with the associated 
-   packet type "NAL unit", are allowed in "Single NAL Unit Mode" and in 
-   "Non-Interleaved Mode", but disallowed in "Interleaved Mode".  
-   Packetization modes are explained in more detail in section 6. 
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 15] 
-
-Internet Draft                                              July, 2004 
- 
-   Table 3. Summary of allowed NAL unit types for each packetization 
-   mode (yes = allowed, no = disallowed, ig = ignore). 
-    
-   Type   Packet    Single NAL    Non-Interleaved    Interleaved 
-                    Unit Mode           Mode             Mode 
-   ------------------------------------------------------------- 
-    
-   0      undefined     ig               ig               ig 
-   1-23   NAL unit     yes              yes               no 
-   24     STAP-A        no              yes               no 
-   25     STAP-B        no               no              yes 
-   26     MTAP16        no               no              yes 
-   27     MTAP24        no               no              yes 
-   28     FU-A          no              yes              yes 
-   29     FU-B          no               no              yes 
-   30-31  undefined     ig               ig               ig 
-    
-5.5.      Decoding Order Number (DON) 
-    
-   In the interleaved packetization mode, the transmission order of NAL 
-   units is allowed to differ from the decoding order of the NAL units. 
-   Decoding order number (DON) is a field in the payload structure or a 
-   derived variable that indicates the NAL unit decoding order.  
-   Rationale and example use cases for transmission out of decoding 
-   order and for the use of DON are given in section 13. 
-    
-   The coupling of transmission and decoding order is controlled by the 
-   OPTIONAL sprop-interleaving-depth MIME parameter as follows.  When 
-   the value of the OPTIONAL sprop-interleaving-depth MIME parameter is 
-   equal to 0 (explicitly or per default) or transmission of NAL units 
-   out of their decoding order is disallowed by external means, the 
-   transmission order of NAL units MUST conform to the NAL unit 
-   decoding order.  When the value of the OPTIONAL sprop-interleaving-
-   depth MIME parameter is greater than 0 or transmission of NAL units 
-   out of their decoding order is allowed by external means, 
-   o the order of NAL units in an MTAP16 and an MTAP24 is NOT REQUIRED 
-     to be the NAL unit decoding order, and 
-   o the order of NAL units generated by decapsulating STAP-Bs, MTAPs, 
-     and FUs in two consecutive packets is NOT REQUIRED to be the NAL 
-     unit decoding order. 
-    
-   The RTP payload structures for a single NAL unit packet, an STAP-A, 
-   and an FU-A do not include DON.  STAP-B and FU-B structures include 
-   DON, and the structure of MTAPs enables derivation of DON as 
-   specified in section 5.7.2.   
-    
-       Informative note: When an FU-A occurs in interleaved mode, it 
-       always follows an FU-B which sets its DON. 
-    
-
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 16] 
-
-Internet Draft                                              July, 2004 
- 
-       Informative note: If a transmitter wants to encapsulate a single 
-       NAL unit per packet and transmit packets out of their decoding 
-       order, STAP-B packet type can be used. 
-    
-   In the single NAL unit packetization mode, the transmission order of 
-   NAL units, determined by the RTP sequence number, MUST be the same 
-   as their NAL unit decoding order.  In the non-interleaved 
-   packetization mode, the transmission order of NAL units in single 
-   NAL unit packets and STAP-As, and FU-As MUST be the same as their 
-   NAL unit decoding order.  The NAL units within an STAP MUST appear 
-   in the NAL unit decoding order.  Thus the decoding order is first 
-   provided through the implicit order within a STAP, and second 
-   provided through the RTP sequence number for the order between 
-   STAPs, FUs, and single NAL unit packets.   
-    
-   Signaling of the value of DON for NAL units carried in STAP-B, MTAP, 
-   and a series of fragmentation units starting with an FU-B is 
-   specified in sections 5.7.1, 5.7.2, and 5.8 respectively.  The DON 
-   value of the first NAL unit in transmission order MAY be set to any 
-   value.  Values of DON are in the range of 0 to 65535, inclusive.  
-   After reaching the maximum value, the value of DON wraps around to 
-   0.   
-    
-   The decoding order of two NAL units contained in any STAP-B, MTAP, 
-   or a series of fragmentation units starting with an FU-B is 
-   determined as follows.  Let DON(i) be the decoding order number of 
-   the NAL unit having index i in the transmission order.  Function 
-   don_diff(m,n) is specified as follows: 
-    
-   If DON(m) == DON(n), don_diff(m,n) = 0 
-    
-   If (DON(m) < DON(n) and DON(n) - DON(m) < 32768),  
-   don_diff(m,n) = DON(n) - DON(m) 
-    
-   If (DON(m) > DON(n) and DON(m) - DON(n) >= 32768),  
-   don_diff(m,n) = 65536 - DON(m) + DON(n) 
-    
-   If (DON(m) < DON(n) and DON(n) - DON(m) >= 32768), 
-   don_diff(m,n) = - (DON(m) + 65536 - DON(n))  
-    
-   If (DON(m) > DON(n) and DON(m) - DON(n) < 32768),  
-   don_diff(m,n) = - (DON(m) - DON(n)) 
-    
-   A positive value of don_diff(m,n) indicates that the NAL unit having 
-   transmission order index n follows, in decoding order, the NAL unit 
-   having transmission order index m.  When don_diff(m,n) is equal to 
-   0, then the NAL unit decoding order of the two NAL units can be in 
-   either order.  A negative value of don_diff(m,n) indicates that the 
-   NAL unit having transmission order index n precedes, in decoding 
-   order, the NAL unit having transmission order index m. 
-    
- 
-Wenger et. al.          Expires December 2004               [Page 17] 
-
-Internet Draft                                              July, 2004 
- 
-   Values of DON related fields (DON, DONB, and DOND, see section 5.7) 
-   MUST be such that the decoding order determined by the values of DON 
-   as specified above conforms to the NAL unit decoding order.  If the 
-   order of two NAL units in NAL unit decoding order is switched and 
-   the new order does not conform to the NAL unit decoding order, the 
-   NAL units MUST NOT have the same value of DON.  If the order of two 
-   consecutive NAL units in the NAL unit stream is switched and the new 
-   order still conforms to the NAL unit decoding order, the NAL units 
-   MAY have the same value of DON.  For example, when arbitrary slice 
-   order is allowed by the video coding profile in use, all the coded 
-   slice NAL units of a coded picture are allowed to have the same 
-   value of DON.  Consequently, NAL units having the same value of DON 
-   can be decoded in any order, and two NAL units having a different 
-   value of DON should be passed to the decoder in the order specified 
-   above.  When two consecutive NAL units in the NAL unit decoding 
-   order have a different value of DON, the value of DON for the second 
-   NAL unit in decoding order SHOULD be the value of DON for the first 
-   NAL unit in decoding order incremented by one. 
-    
-   An example decapsulation process to recover the NAL unit decoding 
-   order is given in section 7.   
-    
-       Informative note: Receivers should not expect that the absolute 
-       difference of values of DON for two consecutive NAL units in the 
-       NAL unit decoding order is equal to one even in case of error-
-       free transmission.  An increment by one is not required, because 
-       at the time of associating values of DON to NAL units, it may 
-       not be known, whether all NAL units are delivered to the 
-       receiver.  For example, a gateway may not forward coded slice 
-       NAL units of non-reference pictures or SEI NAL units, when there 
-       is a shortage of bitrate in the network to which the packets are 
-       forwarded.  In another example a live broadcast is interrupted 
-       by pre-encoded content such as commercials from time to time.  
-       The first intra picture of a pre-encoded clip is transmitted in 
-       advance to ensure that it is readily available in the receiver.  
-       At the time of transmitting the first intra picture, the 
-       originator does not exactly know how many NAL units are going to 
-       be encoded before the first intra picture of the pre-encoded 
-       clip follows in decoding order.  Thus, the values of DON for the 
-       NAL units of the first intra picture of the pre-encoded clip 
-       have to be estimated at the time of transmitting them and gaps 
-       in values of DON may occur. 
-    
-    
-5.6.      Single NAL Unit Packet 
-    
-   The single NAL unit packet defined here MUST contain one and only 
-   one NAL unit of the types defined in [1].  This means that neither 
-   an aggregation packet nor a fragmentation unit can be used within a 
-   single NAL unit packet.  A NAL unit stream composed by decapsulating 
-   single NAL unit packets in RTP sequence number order MUST conform to 
- 
-Wenger et. al.          Expires December 2004               [Page 18] 
-
-Internet Draft                                              July, 2004 
- 
-   the NAL unit decoding order.  The structure of the single NAL unit 
-   packet is shown in Figure 2. 
-    
-       Informative note: The first byte of a NAL unit co-serves as the 
-       RTP payload header. 
-    
-     0                   1                   2                   3 
-     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |F|NRI|  type   |                                               | 
-    +-+-+-+-+-+-+-+-+                                               | 
-    |                                                               | 
-    |               Bytes 2..n of a Single NAL unit                 | 
-    |                                                               | 
-    |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                               :...OPTIONAL RTP padding        | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 2. RTP payload format for single NAL unit packet. 
-    
-    
-5.7.      Aggregation Packets 
-    
-   Aggregation packets are the NAL unit aggregation scheme of this 
-   payload specification.  The scheme is introduced to reflect the 
-   dramatically different MTU sizes of two key target networks -- 
-   wireline IP networks (with an MTU size that is often limited by the 
-   Ethernet MTU size -- roughly 1500 bytes), and IP or non-IP (e.g. 
-   ITU-T H.324/M) based wireless communication systems with preferred 
-   transmission unit sizes of 254 bytes or less.  In order to prevent 
-   media transcoding between the two worlds, and to avoid undesirable 
-   packetization overhead, a NAL unit aggregation scheme is introduced. 
-    
-   Two types of aggregation packets are defined by this specification: 
-   o Single-time aggregation packet (STAP) aggregates NAL units with 
-     identical NALU-time.  Two types of STAPs are defined, one without 
-     DON (STAP-A) and another one including DON (STAP-B). 
-   o Multi-time aggregation packet (MTAP) aggregates NAL units with 
-     potentially differing NALU-time.  Two different MTAPs are defined 
-     that differ in the length of the NAL unit timestamp offset. 
-    
-   The term NALU-time is defined as the value that the RTP timestamp 
-   would have if that NAL unit would be transported in its own RTP 
-   packet.  
-    
-   Each NAL unit to be carried in an aggregation packet is encapsulated 
-   in an aggregation unit.  Please see below for the three different 
-   aggregation units and their characteristics. 
-    
-   The structure of the RTP payload format for aggregation packets is 
-   presented in Figure 3. 
- 
-Wenger et. al.          Expires December 2004               [Page 19] 
-
-Internet Draft                                              July, 2004 
- 
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |F|NRI|  type   |                                               | 
-   +-+-+-+-+-+-+-+-+                                               | 
-   |                                                               | 
-   |             one or more aggregation units                     | 
-   |                                                               | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               :...OPTIONAL RTP padding        | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 3. RTP payload format for aggregation packets. 
-    
-   MTAPs and STAPs share the following packetization rules:  The RTP 
-   timestamp MUST be set to the earliest of the NALU times of all the 
-   NAL units to be aggregated.  The type field of the NAL unit type 
-   octet MUST be set to the appropriate value as indicated in Table 4.  
-   The F bit MUST be cleared if all F bits of the aggregated NAL units 
-   are zero, otherwise it MUST be set.  The value of NRI MUST be the 
-   maximum of all the NAL units carried in the aggregation packet.   
-    
-    
-   Table 4. Type field for STAPs and MTAPs 
-    
-   Type   Packet    Timestamp offset   DON related fields 
-                    field length       (DON, DONB, DOND) 
-                    (in bits)          present 
-   -------------------------------------------------------- 
-   24     STAP-A       0                 no 
-   25     STAP-B       0                 yes 
-   26     MTAP16      16                 yes 
-   27     MTAP24      24                 yes 
-    
-   The marker bit in the RTP header is set to the value the marker bit 
-   of the last NAL unit of the aggregated packet would have if it were 
-   transported in its own RTP packet. 
-    
-   The payload of an aggregation packet consists of one or more 
-   aggregation units.  See section 5.7.1 and 5.7.2 for the four 
-   different types of aggregation units.  An aggregation packet can 
-   carry as many aggregation units as necessary, however the total 
-   amount of data in an aggregation packet obviously MUST fit into an 
-   IP packet, and the size SHOULD be chosen such that the resulting IP 
-   packet is smaller than the MTU size.  An aggregation packet MUST NOT 
-   contain fragmentation units specified in section 5.8.  Aggregation 
-   packets MUST NOT be nested, i.e., an aggregation packet MUST NOT 
-   contain another aggregation packet. 
- 
- 
- 
-Wenger et. al.          Expires December 2004               [Page 20] 
-
-Internet Draft                                              July, 2004 
- 
-5.7.1.        Single-Time Aggregation Packet 
- 
-   Single-time aggregation packet (STAP) SHOULD be used whenever 
-   aggregating NAL units that all share the same NALU-time.  The 
-   payload of an STAP-A does not include DON and consists of at least 
-   one single-time aggregation unit as presented in Figure 4.  The 
-   payload of an STAP-B consists of a 16-bit unsigned decoding order 
-   number (DON) (in network byte order) followed by at least one 
-   single-time aggregation unit as presented in Figure 5. 
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-                   :                                               | 
-   +-+-+-+-+-+-+-+-+                                               | 
-   |                                                               | 
-   |                single-time aggregation units                  | 
-   |                                                               | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               : 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 4. Payload format for STAP-A. 
-    
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-                   :  decoding order number (DON)  |               | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+               | 
-   |                                                               | 
-   |                single-time aggregation units                  | 
-   |                                                               | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               : 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 5. Payload format for STAP-B. 
-    
-   The DON field specifies the value of DON for the first NAL unit in 
-   an STAP-B in transmission order.  The value of DON for each 
-   successive NAL unit in appearance order in an STAP-B is equal to 
-   (the value of DON of the previous NAL unit in the STAP-B + 1) % 
-   65536, in which '%' stands for the modulo operation.   
-    
-   A single-time aggregation unit consists of 16-bit unsigned size 
-   information (in network byte order) that indicates the size of the 
-   following NAL unit in bytes (excluding these two octets, but 
-   including the NAL unit type octet of the NAL unit), followed by the 
-   NAL unit itself including its NAL unit type byte.  A single-time 
-   aggregation unit is byte-aligned within the RTP payload but it may 
- 
-Wenger et. al.          Expires December 2004               [Page 21] 
-
-Internet Draft                                              July, 2004 
- 
-   not be aligned on a 32-bit word boundary.  Figure 6 presents the 
-   structure of the single-time aggregation unit. 
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-                   :        NAL unit size          |               | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+               | 
-   |                                                               | 
-   |                           NAL unit                            | 
-   |                                                               | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               : 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 6. Structure for single-time aggregation unit. 
-    
-    
-   Figure 7 presents an example of an RTP packet that contains an STAP-
-   A.  The STAP contains two single-time aggregation units, labeled as 
-   1 and 2 in the figure. 
-    
-    0                   1                   2                   3 
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                          RTP Header                           | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |STAP-A NAL HDR |         NALU 1 Size           | NALU 1 HDR    | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                         NALU 1 Data                           | 
-    :                                                               | 
-    +               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |               | NALU 2 Size                   | NALU 2 HDR    | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                         NALU 2 Data                           | 
-    |                                                               | 
-    |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                               :...OPTIONAL RTP padding        | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 7. An example of an RTP packet including a STAP-A and two 
-   single-time aggregation units. 
-    
-    
-   Figure 8 presents an example of an RTP packet that contains an STAP-
-   B.  The STAP contains two single-time aggregation units, labeled as 
-   1 and 2 in the figure. 
-    
-
-
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 22] 
-
-Internet Draft                                              July, 2004 
- 
-    0                   1                   2                   3 
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                          RTP Header                           | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |STAP-B NAL HDR | DON                           | NALU 1 Size   | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    | NALU 1 Size   | NALU 1 HDR    | NALU 1 Data                   | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               + 
-    :                                                               | 
-    +               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |               | NALU 2 Size                   | NALU 2 HDR    | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                       NALU 2 Data                             | 
-    |                                                               | 
-    |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                               :...OPTIONAL RTP padding        | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 8. An example of an RTP packet including an STAP-B and two 
-   single-time aggregation units. 
-    
-    
-5.7.2.        Multi-Time Aggregation Packets (MTAPs) 
-    
-   The NAL unit payload of MTAPs consists of a 16-bit unsigned decoding 
-   order number base (DONB) (in network byte order) and one or more 
-   multi-time aggregation units as presented in Figure 9.  DONB MUST 
-   contain the value of DON for the first NAL unit in the NAL unit 
-   decoding order among the NAL units of the MTAP.   
-    
-       Informative note: The first NAL unit in the NAL unit decoding 
-       order is not necessarily the first NAL unit in the order the NAL 
-       units are encapsulated in an MTAP. 
-    
-    
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-                   :  decoding order number base   |               | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+               | 
-   |                                                               | 
-   |                 multi-time aggregation units                  | 
-   |                                                               | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               : 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 9. NAL unit payload format for MTAPs. 
-    
- 
-Wenger et. al.          Expires December 2004               [Page 23] 
-
-Internet Draft                                              July, 2004 
- 
-   Two different multi-time aggregation units are defined in this 
-   specification.  Both of them consist of 16 bits unsigned size 
-   information of the following NAL unit (in network byte order), an 8-
-   bit unsigned decoding order number difference (DOND), and n bits (in 
-   network byte order) of timestamp offset (TS offset) for this NAL 
-   unit, whereby n can be 16 or 24.  The choice between the different 
-   MTAP types (MTAP16 and MTAP24) is application dependent -- the 
-   larger the timestamp offset is, the higher is the flexibility of the 
-   MTAP, but the higher is also the overhead. 
-    
-   The structure of the multi-time aggregation units for MTAP16 and 
-   MTAP24 are presented in Figure 10 and Figure 11 respectively.  The 
-   starting or ending position of an aggregation unit within a packet 
-   is NOT REQUIRED to be on a 32-bit word boundary.  DON of the 
-   following NAL unit is equal to (DONB + DOND) % 65536, in which % 
-   denotes the modulo operation.  This memo does not specify how the 
-   NAL units within an MTAP are ordered, but, in most cases, NAL unit 
-   decoding order SHOULD be used.   
-    
-   The timestamp offset field MUST be set to a value equal to the value 
-   of the following formula: If the NALU-time is larger than or equal 
-   to the RTP timestamp of the packet, then the timestamp offset equals 
-   (the NALU-time of the NAL unit - the RTP timestamp of the packet).  
-   If the NALU-time is smaller than the RTP timestamp of the packet, 
-   then the timestamp offset is equal to the NALU-time + (2^32 - the 
-   RTP timestamp of the packet).  
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   :        NAL unit size          |      DOND     |  TS offset    | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |  TS offset    |                                               | 
-   +-+-+-+-+-+-+-+-+              NAL unit                         | 
-   |                                                               | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               : 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 10. Multi-time aggregation unit for MTAP16 
-    
-    
-
-
-
-
-
-
-
-
-
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 24] 
-
-Internet Draft                                              July, 2004 
- 
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   :        NALU unit size         |      DOND     |  TS offset    | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |         TS offset             |                               | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               | 
-   |                              NAL unit                         | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               : 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 11. Multi-time aggregation unit for MTAP24 
-    
-   For the "earliest" multi-time aggregation unit in an MTAP the 
-   timestamp offset MUST be zero.  Hence, the RTP timestamp of the MTAP 
-   itself is identical to the earliest NALU-time. 
-    
-       Informative note: The "earliest" multi-time aggregation unit is 
-       the one that has the smallest extended RTP timestamp among all 
-       the aggregation units of an MTAP if the aggregation units were 
-       encapsulated in single NAL unit packets.  An extended timestamp 
-       is a timestamp that has more than 32 bits and is capable of 
-       counting the wrap around of the timestamp field, thus enabling 
-       one to actually determine the smallest value if the timestamp 
-       wraps.  Such an "earliest" aggregation unit may not be the first 
-       one in the order the aggregation units are encapsulated in an 
-       MTAP.  The "earliest" NAL unit need not be the same as the first 
-       NAL unit in the NAL unit decoding order either. 
-    
-   Figure 12 presents an example of an RTP packet that contains a 
-   multi-time aggregation packet of type MTAP16 that contains two 
-   multi-time aggregation units, labeled as 1 and 2 in the figure. 
-    
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Wenger et. al.          Expires December 2004               [Page 25] 
-
-Internet Draft                                              July, 2004 
- 
-    0                   1                   2                   3 
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                          RTP Header                           | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |MTAP16 NAL HDR |  decoding order number base   | NALU 1 Size   | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |  NALU 1 Size  |  NALU 1 DOND  |       NALU 1 TS offset        | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |  NALU 1 HDR   |  NALU 1 DATA                                  | 
-    +-+-+-+-+-+-+-+-+                                               + 
-    :                                                               | 
-    +               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |               | NALU 2 SIZE                   |  NALU 2 DOND  | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |       NALU 2 TS offset        |  NALU 2 HDR   |  NALU 2 DATA  | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+               | 
-    |                                                               | 
-    |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                               :...OPTIONAL RTP padding        | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 12. An example of an RTP packet including a multi-time 
-   aggregation packet of type MTAP16 and two multi-time aggregation 
-   units. 
-    
-    
-   Figure 13 presents an example of an RTP packet that contains a 
-   multi-time aggregation packet of type MTAP24 that contains two 
-   multi-time aggregation units, labeled as 1 and 2 in the figure. 
-    
-    
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Wenger et. al.          Expires December 2004               [Page 26] 
-
-Internet Draft                                              July, 2004 
- 
-    0                   1                   2                   3 
-    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                          RTP Header                           | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |MTAP16 NAL HDR |  decoding order number base   | NALU 1 Size   | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |  NALU 1 Size  |  NALU 1 DOND  |       NALU 1 TS offs          | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |NALU 1 TS offs |  NALU 1 HDR   |  NALU 1 DATA                  | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               + 
-    :                                                               | 
-    +               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |               | NALU 2 SIZE                   |  NALU 2 DOND  | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |       NALU 2 TS offset                        |  NALU 2 HDR   | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |  NALU 2 DATA                                                  | 
-    |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    |                               :...OPTIONAL RTP padding        | 
-    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 13. An example of an RTP packet including a multi-time 
-   aggregation packet of type MTAP16 and two multi-time aggregation 
-   units. 
-    
-    
-5.8.      Fragmentation Units (FUs) 
-    
-   This payload type allows fragmenting a NAL unit into several RTP 
-   packets.  Doing so on the application layer instead of relying on 
-   lower layer fragmentation (e.g. by IP) has the following advantages: 
-    
-   o The payload format is capable of transporting NAL units bigger 
-     than 64 kbytes over an IPv4 network that may be present in pre-
-     recorded video, particularly in High Definition formats (there is 
-     a limit of the number of slices per picture, which results in a 
-     limit of NAL units per picture, which may result in big NAL units) 
-    
-   o The fragmentation mechanism allows fragmenting a single picture 
-     and applying generic forward error correction as described in 
-     section 12.5. 
-    
-   Fragmentation is defined only for a single NAL unit, and not for any 
-   aggregation packets.  A fragment of a NAL unit consists of an 
-   integer number of consecutive octets of that NAL unit.  Each octet 
-   of the NAL unit MUST be part of exactly one fragment of that NAL 
-   unit.  Fragments of the same NAL unit MUST be sent in consecutive 
-   order with ascending RTP sequence numbers (with no other RTP packets 
-   within the same RTP packet stream being sent between the first and 
-
- 
-Wenger et. al.          Expires December 2004               [Page 27] 
-
-Internet Draft                                              July, 2004 
- 
-   last fragment).  Similarly, a NAL unit MUST be reassembled in RTP 
-   sequence number order. 
-    
-   When a NAL unit is fragmented and conveyed within fragmentation 
-   units (FUs), it is referred to as fragmented NAL unit.  STAPs and 
-   MTAPs MUST NOT be fragmented.  FUs MUST NOT be nested, i.e., an FU 
-   MUST NOT contain another FU. 
- 
-   The RTP timestamp of an RTP packet carrying an FU is set to the NALU 
-   time of the fragmented NAL unit.   
-    
-   Figure 14 presents the RTP payload format for FU-As.  An FU-A 
-   consists of a fragmentation unit indicator of one octet, a 
-   fragmentation unit header of one octet, and a fragmentation unit 
-   payload.   
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   | FU indicator  |   FU header   |                               | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               | 
-   |                                                               | 
-   |                         FU payload                            | 
-   |                                                               | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               :...OPTIONAL RTP padding        | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 14. RTP payload format for FU-A. 
-    
-   Figure 15 presents the RTP payload format for FU-Bs.  An FU-B 
-   consists of a fragmentation unit indicator of one octet, a 
-   fragmentation unit header of one octet, a decoding order number 
-   (DON) (in network byte order), and a fragmentation unit payload.  In 
-   other words, the structure of FU-B is the same as the structure of 
-   FU-A except for the additional DON field. 
-    
-   0                   1                   2                   3 
-   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   | FU indicator  |   FU header   |               DON             | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-| 
-   |                                                               | 
-   |                         FU payload                            | 
-   |                                                               | 
-   |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-   |                               :...OPTIONAL RTP padding        | 
-   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 
-    
-   Figure 15. RTP payload format for FU-B. 
-    
- 
-Wenger et. al.          Expires December 2004               [Page 28] 
-
-Internet Draft                                              July, 2004 
- 
-   NAL unit type FU-B MUST be used in the interleaved packetization 
-   mode for the first fragmentation unit of a fragmented NAL unit.  NAL 
-   unit type FU-B MUST NOT be used in any other case.  In other words, 
-   in the interleaved packetization mode, each NALU that is fragmented 
-   has an FU-B as the first fragment, followed by one or more FU-A 
-   fragments. 
-    
-   The FU indicator octet has the following format: 
-    
-   +---------------+ 
-   |0|1|2|3|4|5|6|7| 
-   +-+-+-+-+-+-+-+-+ 
-   |F|NRI|  Type   | 
-   +---------------+ 
- 
-   Values equal to 28 and 29 in the Type field of the FU indicator 
-   octet identify an FU-A and an FU-B, respectively.  The use of the F 
-   bit is described in section 5.3.  The value of the NRI field MUST be 
-   set according to the value of the NRI field in the fragmented NAL 
-   unit. 
-    
-   The FU header has the following format: 
-    
-   +---------------+ 
-   |0|1|2|3|4|5|6|7| 
-   +-+-+-+-+-+-+-+-+ 
-   |S|E|R|  Type   | 
-   +---------------+ 
-    
-   S: 1 bit 
-       The Start bit, when one, indicates the start of a fragmented NAL 
-       unit.  Otherwise, when the following FU payload is not the start 
-       of a fragmented NAL unit payload, the Start bit is set to zero. 
-        
-   E: 1 bit 
-       The End bit, when one, indicates the end of a fragmented NAL 
-       unit, i.e., the last byte of the payload is also the last byte 
-       of the fragmented NAL unit.  Otherwise, when the following FU 
-       payload is not the last fragment of a fragmented NAL unit, the 
-       End bit is set to zero. 
-        
-   R: 1 bit 
-       The Reserved bit MUST be equal to 0 and MUST be ignored by the 
-       receiver. 
-        
-   Type: 5 bits 
-       The NAL unit payload type as defined in table 7-1 of [1]. 
- 
-   The value of DON in FU-Bs is selected as described in section 5.5. 
-    
-
- 
-Wenger et. al.          Expires December 2004               [Page 29] 
-
-Internet Draft                                              July, 2004 
- 
-       Informative note: The DON field in FU-Bs allows gateways to 
-       fragment NAL units to FU-Bs without organizing the incoming NAL 
-       units to the NAL unit decoding order.  
-    
-   A fragmented NAL unit MUST NOT be transmitted in one FU, i.e., Start 
-   bit and End bit MUST NOT both be set to one in the same FU header.   
-    
-   The FU payload consists of fragments of the payload of the 
-   fragmented NAL unit such that if the fragmentation unit payloads of 
-   consecutive FUs are sequentially concatenated, the payload of the 
-   fragmented NAL unit is reconstructed.  The NAL unit type octet of 
-   the fragmented NAL unit is not included as such in the fragmentation 
-   unit payload, but rather the information of the NAL unit type octet 
-   of the fragmented NAL unit is conveyed in F and NRI fields of the FU 
-   indicator octet of the fragmentation unit and in the type field of 
-   the FU header.  A FU payload MAY have any number of octets and MAY 
-   be empty.   
-    
-       Informative note: Empty FUs are allowed to reduce the latency of 
-       a certain class of senders in near loss-less environments.  
-       Those senders can be characterized in that they packetize NALU 
-       fragments before the NALU is completely generated and hence, 
-       before the NALU size if known.  If zero-length NALU fragments 
-       were not allowed, the sender would have to generate at least one 
-       bit of data of the following fragment before the current 
-       fragment could be sent.  Due to the characteristics of H.264, 
-       where sometimes several macroblocks occupy zero bits, this is 
-       undesirable and can add delay.  However, the (potential) use of 
-       zero-length NALUs should be carefully weighted against the 
-       increase of the risk of the loss of the NALU, because of the 
-       additional packets that are employed for its transmission. 
-    
-   If a fragmentation unit is lost, the receiver SHOULD discard all 
-   following fragmentation units in transmission order corresponding to 
-   the same fragmented NAL unit.   
-    
-   A receiver in an endpoint or in a MANE MAY aggregate the first n-1 
-   fragments of a NAL unit to an (incomplete) NAL unit even if fragment 
-   n of that NAL unit is not received.  In this case the 
-   forbidden_zero_bit of the NAL unit MUST be set to one to indicate a 
-   syntax violation. 
- 
- 
-6.    Packetization Rules 
- 
-   The packetization modes are introduced in section 5.2.  The 
-   packetization rules that are common to more than one of the 
-   packetization modes are specified in section 6.1.  The packetization 
-   rules for the single NAL unit mode, the non-interleaved mode, and 
-   the interleaved mode are specified in sections 6.2, 6.3, and 6.4 
-   respectively. 
- 
-Wenger et. al.          Expires December 2004               [Page 30] 
-
-Internet Draft                                              July, 2004 
- 
-    
-    
-6.1.      Common Packetization Rules 
- 
-   All senders MUST enforce the following packetization rules 
-   regardless of the packetization mode in use: 
-    
-   o Coded slice NAL units or coded slice data partition NAL units 
-     belonging to the same coded picture (and hence sharing the same 
-     RTP timestamp value) MAY be sent in any order permitted by the 
-     applicable profile defined in [1], although, for delay-critical 
-     systems, they SHOULD be sent in their original coding order to 
-     minimize the delay.  Note that the coding order is not necessarily 
-     the scan order, but the order the NAL packets become available to 
-     the RTP stack.  
-    
-   o Parameter sets are handled in accordance with the rules and 
-     recommendations given in section 8.4. 
-    
-   o MANEs MUST NOT duplicate any NAL unit except for sequence or 
-     picture parameter set NAL units, because neither this memo nor the 
-     H.264 specification provides means to identify duplicated NAL 
-     units.  Sequence and picture parameter set NAL units MAY be 
-     duplicated to make their correct reception more probable, but any 
-     such duplication MUST NOT affect the contents of any active 
-     sequence or picture parameter set.  Duplication SHOULD be 
-     performed on the application layer, and not by duplicating RTP 
-     packets (with identical sequence numbers). 
-    
-   Senders according to the non-interleaved mode and the interleaved 
-   mode MUST enforce the following packetization rule: 
-    
-   o MANEs MAY convert single NAL unit packets into one aggregation 
-     packet, convert an aggregation packet into several single NAL unit 
-     packets, or mix both concepts, in an RTP translator.  The RTP 
-     translator SHOULD take into account at least the following 
-     parameters: path MTU size, unequal protection mechanisms (e.g. 
-     through packet-based FEC according to RFC 2733 [21], especially 
-     for sequence and picture parameter set NAL units and coded slice 
-     data partition A NAL units), bearable latency of the system, and 
-     buffering capabilities of the receiver.       
-      
-        Informative note: An RTP translator is required to handle RTCP 
-        as per RFC 3550. 
-    
-    
-6.2.      Single NAL Unit Mode 
- 
-   This mode is in use when the value of the OPTIONAL packetization-
-   mode MIME parameter is equal to 0 or packetization-mode is not 
-   present or no other packetization mode is signaled by external 
- 
-Wenger et. al.          Expires December 2004               [Page 31] 
-
-Internet Draft                                              July, 2004 
- 
-   means.  All receivers MUST support this mode.  It is primarily 
-   intended for low-delay applications that are compatible with systems 
-   using ITU-T Recommendation H.241 [17] (see section 12.1).  Only 
-   single NAL unit packets MAY be used in this mode.  STAPs, MTAPs, and 
-   FUs MUST NOT be used.  The transmission order of single NAL unit 
-   packets MUST comply with the NAL unit decoding order.  
-    
- 
-6.3.      Non-Interleaved Mode  
-    
-   This mode is in use when the value of the OPTIONAL packetization-
-   mode MIME parameter is equal to 1 or the mode is turned on by 
-   external means.  This mode SHOULD be supported.  It is primarily 
-   intended for low-delay applications.  Only single NAL unit packets, 
-   STAP-As and FU-As MAY be used in this mode.  STAP-Bs, MTAPs, and FU-
-   Bs MUST NOT be used.  The transmission order of NAL units MUST 
-   comply with the NAL unit decoding order.  
-    
-    
-6.4.      Interleaved Mode 
- 
-   This mode is in use when the value of the OPTIONAL packetization-
-   mode MIME parameter is equal to 2 or the mode is turned on by 
-   external means.  Some receivers MAY support this mode.  STAP-Bs, 
-   MTAPs, FU-As, and FU-Bs MAY be used.  STAP-As and single NAL unit 
-   packets MUST NOT be used.  The transmission order of packets and NAL 
-   units is constrained as specified in section 5.5. 
-    
-    
-7.    De-Packetization Process (Informative) 
- 
-   The de-packetization process is implementation dependent.  Hence, 
-   the following description should be seen as an example of a suitable 
-   implementation.  Other schemes may be used as well.  Optimizations 
-   relative to the described algorithms are likely possible.  Section 
-   7.1 presents the de-packetization process for the single NAL unit 
-   and non-interleaved packetization modes, whereas section 7.2 
-   describes the process for the interleaved mode.  Section 7.3 
-   includes additional decapsulation guidelines for intelligent 
-   receivers. 
-    
-   All normal RTP mechanisms related to buffer management apply.  In 
-   particular, duplicated or outdated RTP packets (as indicated by the 
-   RTP sequences number and the RTP timestamp) are removed.  To 
-   determine the exact time for decoding, factors such as a possible 
-   intentional delay to allow for proper inter-stream synchronization 
-   must be factored in.   
-    
-    
-7.1.      Single NAL Unit and Non-Interleaved Mode 
- 
- 
-Wenger et. al.          Expires December 2004               [Page 32] 
-
-Internet Draft                                              July, 2004 
- 
-   The receiver includes a receiver buffer to compensate transmission 
-   delay jitter.  The receiver stores incoming packets in reception 
-   order into the receiver buffer.  Packets are decapsulated in RTP 
-   sequence number order.  If a decapsulated packet is a single NAL 
-   unit packet, the NAL unit contained in the packet is passed directly 
-   to the decoder.  If a decapsulated packet is an STAP-A, the NAL 
-   units contained in the packet are passed to the decoder in the order 
-   they are encapsulated in the packet.  If a decapsulated packet is an 
-   FU-A, all the fragments of the fragmented NAL unit are concatenated 
-   and passed to the decoder.   
-    
-       Informative note: If the decoder supports Arbitrary Slice Order, 
-       coded slices of a picture can be passed to the decoder in any 
-       order regardless of their reception and transmission order.  
-        
-    
-    
-7.2.      Interleaved Mode 
- 
-   The general concept behind these de-packetization rules is to 
-   reorder NAL units from transmission order to the NAL unit decoding 
-   order.   
-    
-   The receiver includes a receiver buffer, which is used to compensate 
-   for transmission delay jitter and to reorder packets from 
-   transmission order to the NAL unit decoding order.  In this section, 
-   the receiver operation is described assuming that there is no 
-   transmission delay jitter.  To make a difference between a practical 
-   receiver buffer that is also used for compensation of transmission 
-   delay jitter, the receiver buffer is hereinafter called the 
-   deinterleaving buffer in this section.  Receivers SHOULD also 
-   prepare for transmission delay jitter, i.e., either reserve separate 
-   buffers for transmission delay jitter buffering and deinterleaving 
-   buffering or use a receiver buffer for both transmission delay 
-   jitter and deinterleaving.  Moreover, receivers SHOULD take 
-   transmission delay jitter into account in the buffering operation, 
-   e.g., by additional initial buffering before starting of decoding 
-   and playback. 
-    
-   This section is organized as follows: Subsection 7.2.1 presents how 
-   to calculate the size of the deinterleaving buffer.  Subsection 
-   7.2.2 specifies the receiver process how to organize received NAL 
-   units to the NAL unit decoding order. 
-    
-    
-7.2.1.        Size of the Deinterleaving Buffer 
- 
-   When SDP Offer/Answer model or any other capability exchange 
-   procedure is used in session setup, the properties of the received 
-   stream SHOULD be such that the receiver capabilities are not 
-   exceeded.  In the SDP Offer/Answer model, the receiver can indicate 
- 
-Wenger et. al.          Expires December 2004               [Page 33] 
-
-Internet Draft                                              July, 2004 
- 
-   its capabilities to allocate a deinterleaving buffer with the deint-
-   buf-cap MIME parameter.  The sender indicates the requirement for 
-   the deinterleaving buffer size with the sprop-deint-buf-req MIME 
-   parameter.  It is therefore RECOMMENDED to set the deinterleaving 
-   buffer size, in terms of number of bytes, equal to or greater than 
-   the value of sprop-deint-buf-req MIME parameter.  See section 8.1 
-   for further information on deint-buf-cap and sprop-deint-buf-req 
-   MIME parameters and section 8.2.2 for further information on their 
-   use in SDP Offer/Answer model. 
-    
-   When a declarative session description is used in session setup, the 
-   sprop-deint-buf-req MIME parameter signals the requirement for the 
-   deinterleaving buffer size.  It is therefore RECOMMENDED to set the 
-   deinterleaving buffer size, in terms of number of bytes, equal to or 
-   greater than the value of sprop-deint-buf-req MIME parameter. 
-    
-    
-7.2.2.        Deinterleaving Process 
-    
-   There are two buffering states in the receiver: initial buffering 
-   and buffering while playing.  Initial buffering occurs when the RTP 
-   session is initialized.  After initial buffering, decoding and 
-   playback is started and the buffering-while-playing mode is used. 
-    
-   Regardless of the buffering state the receiver stores incoming NAL 
-   units in reception order into the deinterleaving buffer as follows.  
-   NAL units of aggregation packets are stored into the deinterleaving 
-   buffer individually.  The value of DON is calculated and stored for 
-   all NAL units. 
-    
-   The receiver operation is described below with the help of the 
-   following functions and constants:  
-   o Function AbsDON is specified in section 8.1. 
-   o Function don_diff is specified in section 5.5. 
-   o Constant N is the value of the OPTIONAL sprop-interleaving-depth 
-     MIME type parameter (see section 8.1) incremented by 1. 
-    
-   Initial buffering lasts until one of the following conditions is 
-   fulfilled:  
-   o There are N VCL NAL units in the deinterleaving buffer. 
-   o If sprop-max-don-diff is present, don_diff(m,n) is greater than 
-     the value of sprop-max-don-diff, in which n corresponds to the NAL 
-     unit having the greatest value of AbsDON among the received NAL 
-     units and m corresponds to the NAL unit having the smallest value 
-     of AbsDON among the received NAL units. 
-   o Initial buffering has lasted for the duration equal to or greater 
-     than the value of the OPTIONAL sprop-init-buf-time MIME parameter. 
-    
-   The NAL units to be removed from the deinterleaving buffer are 
-   determined as follows: 
-
- 
-Wenger et. al.          Expires December 2004               [Page 34] 
-
-Internet Draft                                              July, 2004 
- 
-   o If the deinterleaving buffer contains at least N VCL NAL units, 
-     NAL units are removed from the deinterleaving buffer and passed to 
-     the decoder in the order specified below until the buffer contains 
-     N-1 VCL NAL units.   
-   o If sprop-max-don-diff is present, all NAL units m for which 
-     don_diff(m,n) is greater than sprop-max-don-diff are removed from 
-     the deinterleaving buffer and passed to the decoder in the order 
-     specified below.  Herein, n corresponds to the NAL unit having the 
-     greatest value of AbsDON among the received NAL units. 
-   o Variable ts is set to the value of system timer that was 
-     initialized to 0 when the first packet of the NAL unit stream was 
-     received.  If the deinterleaving buffer contains a NAL unit whose 
-     reception time tr fulfills the condition that ts - tr > sprop-
-     init-buf-time, NAL units are passed to the decoder (and removed 
-     from the deinterleaving buffer) in the order specified below until 
-     the deinterleaving buffer contains no NAL unit whose reception 
-     time tr fulfills the specified condition.  Note that transmission 
-     delay jitter should be taken into account in the calculations with 
-     timestamps. 
-    
-   The order that NAL units are passed to the decoder is specified as 
-   follows: 
-   o Let PDON be a variable that is initialized to 0 at the beginning 
-     of the an RTP session. 
-   o For each NAL unit associated with a value of DON, a DON distance 
-     is calculated as follows.  If the value of DON of the NAL unit is 
-     larger than the value of PDON, the DON distance is equal to DON - 
-     PDON.  Otherwise, the DON distance is equal to 65535 - PDON + DON 
-     + 1.   
-   o NAL units are delivered to the decoder in ascending order of DON 
-     distance.  If several NAL units share the same value of DON 
-     distance, they can be passed to the decoder in any order. 
-   o When a desired number of NAL units have been passed to the 
-     decoder, the value of PDON is set to the value of DON for the last 
-     NAL unit passed to the decoder.   
-    
-    
-7.3.      Additional De-Packetization Guidelines 
-    
-   The following additional de-packetization rules may be used to 
-   implement an operational H.264 de-packetizer: 
-    
-   o Intelligent RTP receivers (e.g. in gateways) may identify lost 
-     coded slice data partitions A (DPAs).  If a lost DPA is found, a 
-     gateway may decide not to send the corresponding coded slice data 
-     partitions B and C, as their information is meaningless for H.264 
-     decoders.  In this way a MANE can reduce network load by 
-     discarding useless packets, without parsing a complex bitstream. 
-    
-   o Intelligent RTP receivers (e.g. in gateways) may identify lost 
-     FUs.  If a lost FU is found, a gateway may decide not to send the 
- 
-Wenger et. al.          Expires December 2004               [Page 35] 
-
-Internet Draft                                              July, 2004 
- 
-     following FUs of the same fragmented NAL unit, as their 
-     information is meaningless for H.264 decoders.  In this way a MANE 
-     can reduce network load by discarding useless packets, without 
-     parsing a complex bitstream. 
-    
-   o Intelligent receivers having to discard packets or NALUs should 
-     first discard all packets/NALUs in which the value of the NRI 
-     field of the NAL unit type octet is equal to 0.  This will 
-     minimize the impact on user experience and keep the reference 
-     pictures intact.  If more packets need to be discarded, then 
-     packets with a numerically lower NRI value should be discarded 
-     before packets with a numerically higher NRI value.  However, 
-     discarding any packets with an NRI bigger than 0 very likely leads 
-     to decoder drift and SHOULD be avoided. 
-    
-    
-8.    Payload Format Parameters 
- 
-   This section specifies the parameters that MAY be used to select 
-   optional features of the payload format and certain features of the 
-   bit stream.  The parameters are specified here as part of the MIME 
-   subtype registration for the ITU-T H.264 | ISO/IEC 14496-10 codec.  
-   A mapping of the parameters into the Session Description Protocol 
-   (SDP) [5] is also provided for those applications that use SDP.  
-   Equivalent parameters could be defined elsewhere for use with 
-   control protocols that do not use MIME or SDP. 
-    
-   Some parameters provide a receiver with the properties of the stream 
-   that is going to be sent. The name of all these parameters starts 
-   with "sprop" for stream properties.  Some of these "sprop" 
-   parameters are limited by other payload or codec configuration 
-   parameters.  For example, the sprop-parameter-sets parameter is 
-   constrained by the profile-level-id parameter. The media sender 
-   selects all "sprop" parameters rather than the receiver.  This 
-   uncommon characteristic of the "sprop" parameters may not be 
-   compatible with some signaling protocol concepts, in which case the 
-   use of these parameters SHOULD be avoided.  
-    
-    
-8.1.      MIME Registration 
-    
-   The MIME subtype for the ITU-T H.264 | ISO/IEC 14496-10 codec is 
-   allocated from the IETF tree.   
-    
-   The receiver MUST ignore any unspecified parameter. 
-    
-   Media Type name:     video 
-    
-   Media subtype name:  H264 
-    
-   Required parameters: none 
- 
-Wenger et. al.          Expires December 2004               [Page 36] 
-
-Internet Draft                                              July, 2004 
- 
-    
-   OPTIONAL parameters: 
-       profile-level-id: A base16 [6] (hexadecimal) representation of 
-                        the following three bytes in the sequence 
-                        parameter set NAL unit specified in [1]: 1) 
-                        profile_idc, 2) a byte herein referred to as 
-                        profile-iop, composed of the values of 
-                        constraint_set0_flag, constraint_set1_flag, 
-                        constraint_set2_flag, and reserved_zero_5bits 
-                        in bit-significance order starting from the 
-                        most significant bit, and 3) level_idc.  Note 
-                        that reserved_zero_5bits is required to be 
-                        equal to 0 in [1], but other values for it may 
-                        be specified in the future by ITU-T or ISO/IEC. 
-        
-                        If the profile-level-id parameter is used for 
-                        indicating properties of a NAL unit stream, it 
-                        indicates the profile and level that a decoder 
-                        has to support in order to comply with [1] when 
-                        decoding the stream.  The profile-iop byte 
-                        indicates whether the NAL unit stream also 
-                        obeys all constraints of the indicated profiles 
-                        as follows.  If bit 7 (the most significant 
-                        bit), bit 6, or bit 5 of profile-iop is equal 
-                        to 1, all constraints of the Baseline profile, 
-                        the Main profile, or the Extended profile, 
-                        respectively, are obeyed in the NAL unit 
-                        stream. 
-                         
-                        If the profile-level-id parameter is used for 
-                        capability exchange or session setup procedure, 
-                        it indicates the profile that the codec 
-                        supports and the highest level that is 
-                        supported for the signaled profile.  The 
-                        profile-iop byte indicates whether the codec 
-                        has such additional limitations that only the 
-                        common subset of the algorithmic features and 
-                        limitations of the profiles signaled with the 
-                        profile-iop byte and the profile indicated by 
-                        profile_idc is supported by the codec.  For 
-                        example, if a codec supports only the common 
-                        subset of the coding tools of the Baseline 
-                        profile and the Main profile at level 2.1 and 
-                        below, the profile-level-id becomes 42E015, in 
-                        which 42 stands for the Baseline profile, E0 
-                        indicates that only the common subset for all 
-                        profiles is supported, and 15 indicates level 
-                        2.1.   
-                         
-                            Informative note: Capability exchange and 
-                            session setup procedures should provide 
- 
-Wenger et. al.          Expires December 2004               [Page 37] 
-
-Internet Draft                                              July, 2004 
- 
-                            means to list the capabilities for each 
-                            supported codec profile separately.  For 
-                            example, the one-of-N codec selection 
-                            procedure of the SDP offer/answer model can 
-                            be used (section 10.2 of [8]). 
-                         
-                        If no profile-level-id is present, the Baseline 
-                        Profile without additional constraints at Level 
-                        1 MUST be implied. 
-        
-       max-mbps, max-fs, max-cpb, max-dpb, and max-br: 
-                        These parameters MAY be used to signal the 
-                        capabilities of a receiver implementation.  
-                        These parameters MUST NOT be used for any other 
-                        purpose.  The profile-level-id parameter MUST 
-                        be present in the same receiver capability 
-                        description that contains any of these 
-                        parameters.  The level conveyed in the value of 
-                        the profile-level-id parameter MUST be such 
-                        that the receiver is fully capable of 
-                        supporting.  max-mbps, max-fs, max-cpb, max-
-                        dpb, and max-br MAY be used to indicate such 
-                        capabilities of the receiver that extend the 
-                        required capabilities of the signaled level as 
-                        specified below. 
-                         
-                        When more than one parameter from the set 
-                        (max_mbps, max-fs, max-cpb, max_dpb, max-br) is 
-                        present, the receiver MUST support all signaled 
-                        capabilities simultaneously.  For example, if 
-                        both max-mbps and max-br are present, the 
-                        signaled level with the extension of both the 
-                        frame rate and bit rate is supported.  That is, 
-                        the receiver is able to decode such NAL unit 
-                        streams in which the macroblock processing rate 
-                        is up to max-mbps (inclusive), the bit rate is 
-                        up to max-br (inclusive), the coded picture 
-                        buffer size is derived as specified in the 
-                        semantics of the max-br parameter below, and 
-                        other properties comply with the level 
-                        specified in the value of the profile-level-id 
-                        parameter.  
-                         
-                        A receiver MUST NOT signal such values of max-
-                        mbps, max-fs, max-cpb, max-dpb, and max-br that 
-                        meet the requirements of a higher level, 
-                        referred to as level A herein, compared to the 
-                        level specified in the value of the profile-
-                        level-id parameter, if the receiver can support 
-                        all the properties of level A. 
-                         
- 
-Wenger et. al.          Expires December 2004               [Page 38] 
-
-Internet Draft                                              July, 2004 
- 
-                            Informative note: When the OPTIONAL MIME 
-                            type parameters are used to signal the 
-                            properties of a NAL unit stream, max-mbps, 
-                            max-fs, max-cpb, max-dpb, and max-br are 
-                            not present, and the value of profile-
-                            level-id must always be such that the NAL 
-                            unit stream complies fully with the 
-                            specified profile and level.  
-        
-       max-mbps:         The value of max-mbps is an integer indicating 
-                        the maximum macroblock processing rate in units 
-                        of macroblocks per second.  The max-mbps 
-                        parameter signals that the receiver is capable 
-                        of decoding video at a higher rate than 
-                        required by the signaled level conveyed in the 
-                        value of the profile-level-id parameter.  When 
-                        max-mbps is signaled, the receiver MUST be able 
-                        to decode NAL unit streams that conform to the 
-                        signaled level with the exception that the 
-                        MaxMBPS value in Table A-1 of [1] for the 
-                        signaled level is replaced with the value of 
-                        max-mbps.  The value of max-mbps MUST be 
-                        greater than or equal to the value of MaxMBPS 
-                        for the level given in Table A-1 of [1].  
-                        Senders MAY use this knowledge to send pictures 
-                        of a given size at a higher picture rate than 
-                        indicated in the signaled level. 
-        
-       max-fs:          The value of max-fs is an integer indicating 
-                        the maximum frame size in units of macroblocks. 
-                        The max-fs parameter signals that the receiver 
-                        is capable of decoding larger picture sizes 
-                        than required by the signaled level conveyed in 
-                        the value of the profile-level-id parameter.  
-                        When max-fs is signaled, the receiver MUST be 
-                        able to decode NAL unit streams that conform to 
-                        the signaled level with the exception that the 
-                        MaxFS value in Table A-1 of [1] for the 
-                        signaled level is replaced with the value of 
-                        max-fs.  The value of max-fs MUST be greater 
-                        than or equal to the value of MaxFS for the 
-                        level given in Table A-1 of [1].  Senders MAY 
-                        use this knowledge to send larger pictures at a 
-                        proportionally lower frame rate than indicated 
-                        in the signaled level.   
-        
-       max-cpb          The value of max-cpb is an integer indicating 
-                        the maximum coded picture buffer size in units 
-                        of 1000 bits for the VCL HRD parameters (see 
-                        A.3.1 item i of [1]) and in units of 1200 bits 
-                        for the NAL HRD parameters (see A.3.1 item j of 
- 
-Wenger et. al.          Expires December 2004               [Page 39] 
-
-Internet Draft                                              July, 2004 
- 
-                        [1]).  The max-cpb parameter signals that the 
-                        receiver has more memory than the minimum 
-                        amount of coded picture buffer memory required 
-                        by the signaled level conveyed in the value of 
-                        the profile-level-id parameter.  When max-cpb 
-                        is signaled, the receiver MUST be able to 
-                        decode NAL unit streams that conform to the 
-                        signaled level with the exception that the 
-                        MaxCPB value in Table A-1 of [1] for the 
-                        signaled level is replaced with the value of 
-                        max-cpb.  The value of max-cpb MUST be greater 
-                        than or equal to the value of MaxCPB for the 
-                        level given in Table A-1 of [1].  Senders MAY 
-                        use this knowledge to construct coded video 
-                        streams with greater variation of bitrate 
-                        compared to which can be achieved with the 
-                        MaxCPB value in Table A-1 of [1].   
-                         
-                            Informative note: The coded picture buffer 
-                            is used in the hypothetical reference 
-                            decoder (Annex C) of H.264.  The use 
-                            hypothetical reference decoder is 
-                            recommended in H.264 encoders to verify 
-                            that the produced bitstream conforms to the 
-                            standard and to control the output bitrate.
-                            Thus, the coded picture buffer is 
-                            conceptually independent from any other 
-                            potential buffers in the receiver, 
-                            including de-interleaving and de-jitter 
-                            buffers.  The coded picture buffer need not 
-                            be implemented in decoders as specified in 
-                            Annex C of H.264, but rather standard-
-                            compliant decoders can have any buffering 
-                            arrangements provided that they can decode 
-                            standard-compliant bitstreams.  Thus, in 
-                            practice, the input buffer for video 
-                            decoder can be integrated with de-
-                            interleaving and de-jitter buffers of the 
-                            receiver. 
-                         
-       max-dpb:         The value of max-dpb is an integer indicating 
-                        the maximum decoded picture buffer size in 
-                        units of 1024 bytes.  The max-dpb parameter 
-                        signals that the receiver has more memory than 
-                        the minimum amount of decoded picture buffer 
-                        memory required by the signaled level conveyed 
-                        in the value of the profile-level-id parameter. 
-                        When max-dpb is signaled, the receiver MUST be 
-                        able to decode NAL unit streams that conform to 
-                        the signaled level with the exception that the 
-                        MaxDPB value in Table A-1 of [1] for the 
- 
-Wenger et. al.          Expires December 2004               [Page 40] 
-
-Internet Draft                                              July, 2004 
- 
-                        signaled level is replaced with the value of 
-                        max-dpb.  Consequently, a receiver that signals 
-                        max-dpb MUST be capable of storing the 
-                        following number of decoded frames, 
-                        complementary field pairs, and non-paired 
-                        fields in its decoded picture buffer: 
-                         
-                        Min(1024 * max-dpb / ( PicWidthInMbs * 
-                        FrameHeightInMbs * 256 * ChromaFormatFactor ), 
-                        16) 
-                         
-                        PicWidthInMbs, FrameHeightInMbs, and 
-                        ChromaFormatFactor are defined in [1]. 
-                         
-                        The value of max-dpb MUST be greater than or 
-                        equal to the value of MaxDPB for the level 
-                        given in Table A-1 of [1].  Senders MAY use 
-                        this knowledge to construct coded video streams 
-                        with improved compression.   
-                         
-                            Informative note: This parameter was added 
-                            primarily to complement a similar codepoint 
-                            in the ITU-T Recommendation H.245, so as to 
-                            facilitate signaling gateway designs.  The 
-                            decoded picture buffer stores reconstructed 
-                            samples, and is a property of the video 
-                            decoder only.  There is no relationship 
-                            between the size of the decoded picture 
-                            buffer and the buffers used in RTP, 
-                            especially de-interleaving and de-jitter 
-                            buffers.   
-                         
-       max-br:           The value of max-br is an integer indicating 
-                        the maximum video bit rate in units of 1000 
-                        bits per second for the VCL HRD parameters (see 
-                        A.3.1 item i of [1]) and in units of 1200 bits 
-                        per second for the NAL HRD parameters (see 
-                        A.3.1 item j of [1]).   
-        
-                        The max-br parameter signals that the video 
-                        decoder of the receiver is capable of decoding 
-                        video at a higher bit rate than required by the 
-                        signaled level conveyed in the value of the 
-                        profile-level-id parameter.  The value of max-
-                        br MUST be greater than or equal to the value 
-                        of MaxBR for the level given in Table A-1 of 
-                        [1].   
-                         
-                        When max-br is signaled, the video codec of the 
-                        receiver MUST be able to decode NAL unit 
-                        streams that conform to the signaled level, 
- 
-Wenger et. al.          Expires December 2004               [Page 41] 
-
-Internet Draft                                              July, 2004 
- 
-                        conveyed in the profile-level-id parameter, 
-                        with the following exceptions in the limits 
-                        specified by the level: 
-                        o The value of max-br replaces the MaxBR value 
-                           of the signaled level (in Table A-1 of [1]). 
-                        o When the max-cpb parameter is not present, 
-                           the result of the following formula replaces 
-                           the value of MaxCPB in Table A-1 of [1]: 
-                           (MaxCPB of the signaled level) * max_br / 
-                           (MaxBR of the signaled level). 
-                         
-                        For example, if a receiver signals capability 
-                        for Level 1.2 with max-br equal to 1550, this 
-                        indicates a maximum video bitrate of 1550 
-                        kbits/sec for VCL HRD parameters, a maximum 
-                        video bitrate of 1860 kbits/sec for NAL HRD 
-                        parameters, and a CPB size of 4,036,458 bits 
-                        (1550000 / 384000 * 1000 * 1000). 
-                         
-                        The value of max-br MUST be grater than or 
-                        equal to the value MaxBR for the signaled level 
-                        given in Table A-1 of [1]. 
-                         
-                        Senders MAY use this knowledge to send higher 
-                        bitrate video as allowed in the level 
-                        definition of Annex A of H.264, to achieve 
-                        improved video quality. 
-                         
-                            Informative note: This parameter was added 
-                            primarily to complement a similar codepoint 
-                            in the ITU-T Recommendation H.245, so as to 
-                            facilitate signaling gateway designs.  No 
-                            assumption can be made from the value of 
-                            this parameter that the network is capable 
-                            of handling such bit rates at any given 
-                            time.  In particular, no conclusion can be 
-                            drawn that the signaled bit rate is 
-                            possible under congestion control 
-                            constraints.    
-                         
-       redundant-pic-cap: This parameter signals the capabilities of a 
-                        receiver implementation.  When equal to 0, the 
-                        parameter indicates the receiver makes no 
-                        attempt to use redundant coded pictures to 
-                        correct incorrectly decoded primary coded 
-                        pictures.  When equal to 0, the receiver is not 
-                        capable of using redundant slices, hence a 
-                        sender SHOULD avoid sending redundant slices to 
-                        save bandwidth.  When equal to 1, the receiver 
-                        is capable of decoding any such redundant slice 
-                        that covers a corrupted area in a primary 
- 
-Wenger et. al.          Expires December 2004               [Page 42] 
-
-Internet Draft                                              July, 2004 
- 
-                        decoded picture (at least partly), and hence a 
-                        sender MAY send redundant slices.  When the 
-                        parameter is not present, then a value of 0 
-                        MUST be used for redundant-pic-cap.  When 
-                        present, the value of redundant-pic-cap MUST be 
-                        either 0 or 1. 
-                         
-                        When the profile-level-id parameter is present 
-                        in the same capability signaling as the 
-                        redundant-pic-cap parameter and the profile 
-                        indicated in profile-level-id is such that it 
-                        disallows the use of redundant coded pictures 
-                        (e.g., Main Profile), the value of redundant-
-                        pic-cap MUST be equal to 0.  When a receiver 
-                        indicates redundant-pic-cap equal to 0, the 
-                        received stream SHOULD NOT contain redundant 
-                        coded pictures. 
-        
-                            Informative note: Even if redundant-pic-cap 
-                            is equal to 0, the decoder is able to 
-                            ignore redundant codec pictures provided 
-                            that the decoder supports such profile 
-                            (Baseline, Extended) in which redundant 
-                            coded pictures are allowed. 
-                             
-                            Informative note: Even if redundant-pic-cap 
-                            is equal to 1, the receiver may also choose 
-                            other error concealment strategies to 
-                            replace or complement decoding of redundant 
-                            slices. 
-        
-       sprop-parameter-sets:   This parameter MAY be used to convey 
-                        such sequence and picture parameter set NAL 
-                        units, herein referred to as the initial 
-                        parameter set NAL units, that MUST precede any 
-                        other NAL units in decoding order.  The 
-                        parameter MUST NOT be used to indicate codec 
-                        capability in any capability exchange 
-                        procedure.  The value of the parameter is the 
-                        base64 [6] representation of the initial 
-                        parameter set NAL units as specified in 
-                        sections 7.3.2.1 and 7.3.2.2 of [1].  The 
-                        parameter sets are conveyed in decoding order 
-                        and no framing of the parameter set NAL units 
-                        takes place.  A comma is used to separate any 
-                        pair of parameter sets in the list.  Note that 
-                        the number of bytes in a parameter set NAL unit 
-                        is typically less than 10 bytes, but a picture 
-                        parameter set NAL unit can contain several 
-                        hundreds of bytes. 
-        
- 
-Wenger et. al.          Expires December 2004               [Page 43] 
-
-Internet Draft                                              July, 2004 
- 
-                           Informative Note: When several payload 
-                            types are offered in the SDP Offer/Answer 
-                            model, each with its own sprop-parameter-
-                            sets parameter, then the receiver cannot 
-                            assume that those parameter sets do not use 
-                            conflicting storage locations (i.e., 
-                            identical values of parameter set 
-                            identifiers).  Hence, a receiver should 
-                            double-buffer all sprop-parameter-sets and 
-                            make them available to the decoder instance 
-                            that decodes a certain payload type.  
-        
-       parameter-add:   This parameter MAY be used to signal whether 
-                        the receiver of this parameter is allowed to 
-                        add parameter sets in its signaling response 
-                        using the sprop-parameter-sets MIME parameter.  
-                        The value of this parameter is either 0 or 1.  
-                        0 is equal to false, i.e., it is not allowed to 
-                        add parameter sets.  1 is equal to true, i.e. 
-                        it is allowed to add parameter sets.  If the 
-                        parameter is not present, its value MUST be 1. 
-        
-       packetization-mode: This parameter signals the properties of a 
-                        RTP payload type or the capabilities of a 
-                        receiver implementation.  Only a single 
-                        configuration point can be indicated, thus for 
-                        when declaring capabilities to support more 
-                        than one packetization-mode, multiple 
-                        configuration points (RTP payload types) must 
-                        be used.  
-                         
-                        When the value of packetization-mode is equal 
-                        to 0 or packetization-mode is not present, the 
-                        single NAL mode as defined in section 6.2 of 
-                        RFC XXXX MUST be used.  This mode is in use in 
-                        standards using ITU-T Recommendation H.241 [17] 
-                        (see section 12.1).  When the value of 
-                        packetization-mode is equal to 1, the non-
-                        interleaved mode as defined in section 6.3 of 
-                        RFC XXXX MUST be used.  When the value of 
-                        packetization-mode is equal to 2, the 
-                        interleaved mode as defined in section 6.4 of 
-                        RFC XXXX MUST be used.  The value of 
-                        packetization mode MUST be an integer in the 
-                        range of 0 to 2, inclusive. 
- 
-       sprop-interleaving-depth: This parameter MUST NOT be present 
-                        when packetization-mode is not present or the 
-                        value of packetization-mode is equal to 0 or 1.
-                        This parameter MUST be present when the value 
-                        of packetization-mode is equal to 2.   
- 
-Wenger et. al.          Expires December 2004               [Page 44] 
-
-Internet Draft                                              July, 2004 
- 
-        
-                        This parameter signals the properties of a NAL 
-                        unit stream.  It specifies the maximum number 
-                        of VCL NAL units that precede any VCL NAL unit 
-                        in the NAL unit stream in transmission order 
-                        and follow the VCL NAL unit in decoding order.  
-                        Consequently, it is guaranteed that receivers 
-                        can reconstruct NAL unit decoding order, when 
-                        the buffer size for NAL unit decoding order 
-                        recovery is at least the value of sprop-
-                        interleaving-depth + 1 in terms of VCL NAL 
-                        units.   
-                         
-                        The value of sprop-interleaving-depth MUST be 
-                        an integer in the range of 0 to 32767, 
-                        inclusive. 
- 
-       sprop-deint-buf-req:   This parameter MUST NOT be present when 
-                        packetization-mode is not present or the value 
-                        of packetization-mode is equal to 0 or 1.  It 
-                        MUST be present when the value of 
-                        packetization-mode is equal to 2. 
-        
-                        sprop-deint-buf-req signals the required size 
-                        of the deinterleaving buffer for the NAL unit 
-                        stream.  The value of the parameter MUST be 
-                        greater than or equal to the maximum buffer 
-                        occupancy (in units of bytes) required in such 
-                        a deinterleaving buffer that is specified in 
-                        section 7.2 of RFC XXXX.  It is guaranteed that 
-                        receivers can perform the deinterleaving of 
-                        interleaved NAL units into NAL unit decoding 
-                        order, when the deinterleaving buffer size is 
-                        at least the value of sprop-deint-buf-req in 
-                        terms of bytes.   
-                         
-                        The value of sprop-deint-buf-req MUST be an 
-                        integer in the range of 0 to 4 294 967 295, 
-                        inclusive. 
-                         
-                            Informative note: deint_buf_req indicates 
-                            the required size of the deinterleaving 
-                            buffer only.  When network jitter can 
-                            occur, additionally an appropriately sized 
-                            jitter buffer has to be provisioned for. 
-        
-       deint-buf-cap:   This parameter signals the capabilities of a 
-                        receiver implementation, and indicates the 
-                        amount of deinterleaving buffer space in units 
-                        of bytes that the receiver has available for 
-                        reconstructing the NAL unit decoding order.  A 
- 
-Wenger et. al.          Expires December 2004               [Page 45] 
-
-Internet Draft                                              July, 2004 
- 
-                        receiver is able to handle any stream for which 
-                        the value of the sprop-deint-buf-req parameter 
-                        is smaller than or equal to this parameter. 
-                         
-                        If the parameter is not present, then a value 
-                        of 0 MUST be used for deint-buf-cap.  The value 
-                        of deint-buf-cap MUST be an integer in the 
-                        range of 0 to 4 294 967 295, inclusive. 
-                         
-                            Informative note: deint_buf_cap indicates 
-                            the maximum possible size of the 
-                            deinterleaving buffer of the receiver only.
-                            When network jitter can occur, additionally 
-                            an appropriately sized jitter buffer has to 
-                            be provisioned for. 
-                         
-        
-       sprop-init-buf-time:    This parameter MAY be used to signal the 
-                        properties of a NAL unit stream.  The parameter 
-                        MUST NOT be present, if the value of 
-                        packetization-mode is equal to 0 or 1.   
-        
-                        The parameter signals the initial buffering 
-                        time that a receiver MUST buffer before 
-                        starting decoding to recover the NAL unit 
-                        decoding order from the transmission order.  
-                        The parameter is the maximum value of 
-                        (transmission time of a NAL unit - decoding 
-                        time of the NAL unit) assuming reliable and 
-                        instantaneous transmission, the same timeline 
-                        for transmission and decoding, and starting of 
-                        decoding when the first packet arrives.   
-                         
-                        An example of specifying the value of sprop-
-                        init-buf-time follows: A NAL unit stream is 
-                        sent in the following interleaved order, in 
-                        which the value corresponds to the decoding 
-                        time and the transmission order is from left to 
-                        right: 
-                         
-                        0  2  1  3  5  4  6  8  7 ... 
-                         
-                        Assuming a steady transmission rate of NAL 
-                        units, the transmission times are: 
-                        0  1  2  3  4  5  6  7  8 ... 
-                         
-                        Subtracting the decoding time from the 
-                        transmission time column-wise results into the 
-                        following series: 
-                        0 -1  1  0 -1  1  0 -1 1 ... 
-                         
- 
-Wenger et. al.          Expires December 2004               [Page 46] 
-
-Internet Draft                                              July, 2004 
- 
-                        Thus, the value of sprop-init-buf-time in this 
-                        example is 1 in terms of intervals of NAL unit 
-                        transmission times. 
-                         
-                        The parameter is coded as a decimal 
-                        representation in clock ticks of a 90-kHz 
-                        clock.  If the parameter is not present, then a 
-                        value of 0 MUST be used for sprop-init-buf-
-                        time.  The value of sprop-init-buf-time MUST be 
-                        an integer in the range of 0 to 4 294 967 295, 
-                        inclusive.   
-                         
-                        In addition to the signaled init_buf_time, 
-                        receivers SHOULD take into account the 
-                        transmission delay jitter buffering, including 
-                        buffering for the delay jitter caused by 
-                        mixers, translators, gateways, proxies, 
-                        traffic-shapers and other network elements. 
-        
-       sprop-max-don-diff:     This parameter MAY be used to signal the 
-                        properties of a NAL unit stream.  It MUST NOT 
-                        be used to signal transmitter or receiver or 
-                        codec capabilities.  The parameter MUST NOT be 
-                        present, if the value of packetization-mode is 
-                        equal to 0 or 1.  sprop-max-don-diff is an 
-                        integer in the range of 0 to 32767, inclusive.  
-                        If sprop-max-don-diff is not present, the value 
-                        of the parameter is unspecified.  sprop-max-
-                        don-diff is calculated as follows: 
-        
-                        sprop-max-don-diff = max{AbsDON(i) - 
-                        AbsDON(j)},  
-                        for any i and any j>i, 
-        
-                        where i and j indicate the index of the NAL 
-                        unit in the transmission order and AbsDON 
-                        denotes such decoding order number of the NAL 
-                        unit that does not wrap around to 0 after 
-                        65535.  In other words, AbsDON is calculated as 
-                        follows: Let m and n be consecutive NAL units 
-                        in transmission order.  For the very first NAL 
-                        unit in transmission order (whose index is 0), 
-                        AbsDON(0) = DON(0).  For other NAL units, 
-                        AbsDON is calculated as follows: 
-                         
-                        If DON(m) == DON(n), AbsDON(n) = AbsDON(m) 
-                         
-                        If (DON(m) < DON(n) and DON(n) - DON(m) < 
-                        32768),  
-                        AbsDON(n) = AbsDON(m) + DON(n) - DON(m) 
-                         
- 
-Wenger et. al.          Expires December 2004               [Page 47] 
-
-Internet Draft                                              July, 2004 
- 
-                        If (DON(m) > DON(n) and DON(m) - DON(n) >= 
-                        32768),  
-                        AbsDON(n) = AbsDON(m) + 65536 - DON(m) + DON(n) 
-                         
-                        If (DON(m) < DON(n) and DON(n) - DON(m) >= 
-                        32768), 
-                        AbsDON(n) = AbsDON(m) - (DON(m) + 65536 - 
-                        DON(n))  
-                         
-                        If (DON(m) > DON(n) and DON(m) - DON(n) < 
-                        32768),  
-                        AbsDON(n) = AbsDON(m) - (DON(m) - DON(n)) 
-                         
-                        where DON(i) is the decoding order number of 
-                        the NAL unit having index i in the transmission 
-                        order.  The decoding order number is specified 
-                        in section 5.5 of RFC XXXX. 
-                         
-                            Informative note: Receivers may use sprop-
-                            max-don-diff to trigger which NAL units in 
-                            the receiver buffer can be passed to the 
-                            decoder. 
-                         
-     max-rcmd-nalu-size: This parameter MAY be used to signal the 
-                        capabilities of a receiver.  The parameter MUST 
-                        NOT be used for any other purposes.  The value 
-                        of the parameter indicates the largest NALU 
-                        size in bytes that the receiver can handle 
-                        efficiently.  The parameter value is a 
-                        recommendation, not a strict upper boundary.  
-                        The sender MAY create larger NALUs but must be 
-                        aware that the handling of these may come at 
-                        higher cost than NALUs following the 
-                        limitation.  
-      
-                        The value of max-rcmd-nalu-size MUST be an 
-                        integer in the range of 0 to 4 294 967 295, 
-                        inclusive.  If this parameter is not specified, 
-                        no known limitation to the NALU size exists.  
-                        Senders still need to consider the MTU size 
-                        available between the sender and the receiver 
-                        and SHOULD run MTU discovery for this purpose.  
-      
-                        This parameter is motivated by, for example, an 
-                        IP to H.223 video telephony gateway, where 
-                        NALUs smaller than the H.223 transport data 
-                        unit will be more efficient.  A gateway may 
-                        terminate IP, thus MTU discovery will normally 
-                        not work beyond the gateway.  
-                         
-
- 
-Wenger et. al.          Expires December 2004               [Page 48] 
-
-Internet Draft                                              July, 2004 
- 
-                            Informative note: Setting this parameter to 
-                            a lower than necessary value may have a 
-                            negative impact.  
-                         
-   Encoding considerations: 
-                        This type is only defined for transfer via RTP 
-                        (RFC 3550).  
-                         
-                        A file format of H.264/AVC video is defined in 
-                        [32].  This definition is utilized by other 
-                        file formats such as the 3GPP multimedia file 
-                        format (MIME type video/3gpp) [33] or the MP4 
-                        file format (MIME type video/mp4).  
-    
-   Security considerations: 
-                        See section 9 of RFC XXXX.  
- 
-   Public specification: 
-                        Please refer to RFC XXXX and its section 17. 
-                         
-   Additional information: 
-                        None 
-    
-   File extensions:     none 
-   Macintosh file type code: none 
-   Object identifier or OID: none 
-    
-   Person & email address to contact for further information: 
-                        stewe@stewe.org 
-    
-   Intended usage:      COMMON. 
-    
-   Author/Change controller: 
-                        stewe@stewe.org 
-                        IETF Audio/Video transport working group 
-    
-    
-8.2.      SDP Parameters 
-    
-8.2.1.        Mapping of MIME Parameters to SDP 
- 
-   The MIME media type video/H264 string is mapped to fields in the 
-   Session Description Protocol (SDP) [5] as follows: 
-    
-   o The media name in the "m=" line of SDP MUST be video. 
-    
-   o The encoding name in the "a=rtpmap" line of SDP MUST be H264 (the 
-     MIME subtype). 
-    
-   o The clock rate in the "a=rtpmap" line MUST be 90000. 
-    
- 
-Wenger et. al.          Expires December 2004               [Page 49] 
-
-Internet Draft                                              July, 2004 
- 
-   o The OPTIONAL parameters "profile-level-id", "max-mbps", "max-fs", 
-     "max-cpb", "max-dpb", "max-br", "redundant-pic-cap", "sprop-
-     parameter-sets", "parameter-add", "packetization-mode", "sprop-
-     interleaving-depth", "deint-buf-cap", "sprop-deint-buf-req", 
-     "sprop-init-buf-time", "sprop-max-don-diff", and "max-rcmd-nalu-
-     size", when present, MUST be included in the "a=fmtp" line of SDP.
-     These parameters are expressed as a MIME media type string, in the 
-     form of a semicolon separated list of parameter=value pairs. 
-    
-   An example of media representation in SDP is as follows (Baseline 
-   Profile, Level 3.0, some of the constraints of the Main profile may 
-   not be obeyed): 
-    
-   m=video 49170 RTP/AVP 98 
-   a=rtpmap:98 H264/90000 
-   a=fmtp:98 profile-level-id=42A01E; sprop-parameter-
-   sets=Z0IACpZTBYmI,aMljiA== 
-    
-    
-8.2.2.        Usage with the SDP Offer/Answer Model 
- 
-   When offering H.264 over RTP using SDP in an Offer/Answer model [8] 
-   for negotiation for unicast usage, the following limitations and 
-   rules apply: 
-    
-   o The parameters identifying a media format configuration for H.264 
-     are "profile-level-id", "packetization-mode", and, if required by 
-     "packetization-mode", "sprop-deint-buf-req".  These three 
-     parameters MUST be used symmetrically, i.e. the answerer MUST 
-     either maintain all configuration parameters or remove the media 
-     format (payload type) completely, if one or more of the parameter 
-     values are not supported.   
-    
-         Informative note: The requirement for symmetric use applies 
-         only for the above three parameters, and not for the other 
-         stream properties and capability parameters. 
-    
-     To simplify handling and matching of these configurations, the 
-     same RTP payload type number used in the offer SHOULD also be used 
-     in the answer, as specified in [8].  An answer MUST NOT contain a 
-     payload type number used in the offer unless the configuration 
-     ("profile-level-id", "packetization-mode", and if present "sprop-
-     deint-buf-req") is the same as in the offer.  
-    
-         Informative note: An offerer, when receiving the answer, needs 
-         to compare payload types not declared in the offer based on 
-         media type (i.e. video/h264) and the above three parameters 
-         with any payload types it has already declared, in order to 
-         determine whether the configuration in question is new or 
-         equivalent to a configuration already offered.  
-    
- 
-Wenger et. al.          Expires December 2004               [Page 50] 
-
-Internet Draft                                              July, 2004 
- 
-   o The parameters "sprop-parameter-sets", "sprop-deint-buf-req", 
-     "sprop-interleaving-depth", "sprop-max-don-diff", and "sprop-init-
-     buf-time" describe the properties of the NAL unit stream that the 
-     offerer or answerer is sending for this media format 
-     configuration.  This differs from the normal usage of the 
-     offer/answer parameters: normally such parameters declare the 
-     properties of the stream the offerer or the answerer is able to 
-     receive. When dealing with H.264, the offerer assumes that the 
-     answerer will be able to receive media encoded using the 
-     configuration being offered.  
-         Informative note: The above parameters apply for any stream 
-         sent by the declaring entity with the same configuration, i.e. 
-         they are dependent on their source.  As they apply for the 
-         configuration, rather then being bound to the payload type, 
-         the values may need to be applied to another payload type when 
-         sending.  
-    
-   o The capability parameters ("max-mbps", "max-fs", "max-cpb", "max-
-     dpb", "max-br", ,"redundant-pic-cap", "max-rcmd-nalu-size") MAY be 
-     used to declare further capabilities.  Their interpretation 
-     depends on the direction attribute.  When the direction attribute 
-     is sendonly, then the parameters describe the limits of the RTP 
-     packets and the NAL unit stream that the sender is capable of 
-     producing.  When the direction attribute is sendrecv or recvonly, 
-     then the parameters describe the limitations of what the receiver 
-     accepts. 
-      
-   o As specified above, an offerer needs to include the size of the 
-     deinterleaving buffer in the offer for an interleaved H.264 
-     stream.  To enable the offerer and answerer to inform each other 
-     about their capabilities for deinterleaving buffering, both 
-     parties are RECOMMENDED to include "deint-buf-cap".  This 
-     information MAY be utilized when selecting the value for "sprop-
-     deint-buf-req" in a second round of offer and answer.  For 
-     interleaved streams, it is also RECOMMENDED to consider offering 
-     multiple payload types with different buffering requirements when 
-     the capabilities of the receiver are unknown.  
-    
-   o The "sprop-parameter-sets" parameter is used as described above.  
-     In addition, an answerer MUST maintain all parameter sets received 
-     in the offer in its answer.  Depending on the value of the 
-     "parameter-add" parameter different rules apply: If "parameter-
-     add" is false (0), the answer MUST NOT add any additional 
-     parameter sets.  If "parameter-add" is true (1), the answerer, in 
-     its answer, MAY add additional parameter sets to the "sprop-
-     parameter-sets" parameter.  The answerer MUST also, independent of 
-     the value of "parameter-add", accept to receive a video stream 
-     using the sprop-parameter-sets it declared in the answer. 
-    
-
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 51] 
-
-Internet Draft                                              July, 2004 
- 
-         Informative note: care must be taken when adding parameter 
-         sets not to cause overwriting of already transmitted parameter 
-         sets by using conflicting parameter set identifiers.  
-    
-   For streams being delivered over multicast, the following rules 
-   apply in addition.  
-    
-   o The stream properties parameters ("sprop-parameter-sets", "sprop-
-     deint-buf-req", "sprop-interleaving-depth", "sprop-max-don-diff", 
-     and "sprop-init-buf-time") MUST NOT be changed by the answerer.  
-     Hence, a payload type can either be accepted unaltered, or 
-     removed.  
-    
-   o The receiver capability parameters "max-mbps", "max-fs", "max-
-     cpb", "max-dpb", "max-br", and "max-rcmd-nalu-size" MUST be 
-     supported by the answerer for all streams declared as sendrecv or 
-     recvonly, otherwise one of the following actions MUST be 
-     performed: the media format is removed, or the session rejected. 
-    
-   o The receiver capability parameter redundant-pic-cap SHOULD be 
-     supported by the answerer for all streams declared as sendrecv or 
-     recvonly as follows:  The answerer SHOULD NOT include redundant 
-     coded pictures in the transmitted stream, if the offerer indicated 
-     redundant-pic-cap equal to 0.  Otherwise (when redundant_pic_cap 
-     is equal to 1), it is beyond the scope of this memo to recommend 
-     how the answerer should use redundant coded pictures. 
-    
-   Below are the complete lists of how the different parameters shall 
-   be interpreted in the different combinations of offer or answer and 
-   direction attribute. 
-    
-   o In offers and answers when "a=sendrecv", or no direction attribute 
-     is used, or in offers and answers where "a=recvonly" is used, the 
-     following interpretation of the parameters MUST be used.  
-    
-     Declaring actual configuration or properties for receiving: 
-        - profile-level-id 
-        - packetization-mode 
-    
-     Declaring actual properties of the stream to be sent (applicable 
-     only when "a=sendrecv" or no direction attribute is used): 
-        - sprop-deint-buf-req 
-        - sprop-interleaving-depth 
-        - sprop-parameter-sets 
-        - sprop-max-don-diff 
-        - sprop-init-buf-time 
-           
-     Declaring receiver implementation capabilities: 
-        - max-mbps 
-        - max-fs 
-        - max-cpb 
-
- 
-Wenger et. al.          Expires December 2004               [Page 52] 
-
-Internet Draft                                              July, 2004 
- 
-        - max-dpb 
-        - max-br 
-        - redundant-pic-cap 
-        - deint-buf-cap 
-        - max-rcmd-nalu-size 
-    
-     Declaring how Offer/Answer negotiation shall be performed: 
-        - parameter-add 
-      
-   o In an Offer or Answer where the direction attribute "a=sendonly" 
-     is included for the media stream, the following interpretation of 
-     the parameters MUST be used: 
-    
-     Declaring actual configuration and properties of stream proposed 
-     to be sent: 
-        - profile-level-id 
-        - packetization-mode  
-        - sprop-deint-buf-req 
-        - sprop-max-don-diff 
-        - sprop-init-buf-time 
-        - sprop-parameter-sets 
-        - sprop-interleaving-depth 
-      
-     Declaring the capabilities of the sender when it receives a 
-     stream: 
-        - max-mbps 
-        - max-fs 
-        - max-cpb 
-        - max-dpb 
-        - max-br 
-        - redundant-pic-cap 
-        - deint-buf-cap 
-        - max-rcmd-nalu-size 
-       
-     Declaring how Offer/Answer negotiation shall be performed: 
-        - parameter-add 
-       
-   Further the following considerations are necessary: 
-    
-   o Parameters used for declaring receiver capabilities are in general 
-     downgradable, i.e. they express the upper limit for a sender's 
-     possible behavior.  Thus a sender MAY select to set its encoder 
-     using only lower/lesser or equal values of these parameters.  
-     "sprop-parameter-sets" MUST NOT be used in a senders declaration 
-     of its capabilities, as the limits of the values that are carried 
-     inside the parameter sets are implicit with the profile and level 
-     used. 
-    
-   o Parameters declaring a configuration point are not downgradable, 
-     with the exception of the level part of the "profile-level-id" 
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 53] 
-
-Internet Draft                                              July, 2004 
- 
-     parameter.  They express values a receiver expects to be used, and 
-     must be used verbatim on the sender side. 
-    
-   o When declaring sender's capabilities, and non-downgradable 
-     parameters are used in this declaration, then these parameters 
-     express a configuration that is acceptable.  In order to achieve 
-     high interoperability levels, it is often advisable to offer 
-     multiple alternative configurations, e.g. for the packetization 
-     mode.  It is impossible to offer multiple configurations in a 
-     single payload type.  Hence, when multiple configuration offers 
-     are made, each offer requires its own RTP payload type associated 
-     with the offer.  
-    
-   o A receiver SHOULD understand all MIME parameters even if it only 
-     supports a subset of the payload formats functionality.  This 
-     ensures that a receiver is capable of understanding when an offer 
-     to receive media can be downgraded to what is supported by the 
-     receiver of the offer. 
-    
-   o An answerer MAY extend the offer with additional media format 
-     configurations.  However, to enable the usage of these, a second 
-     offer from the offerer is required in most cases to provide the 
-     stream properties parameters that the media sender will use.  This 
-     also has the effect that the offerer needs to be able to receive 
-     this media format configuration, not only send it.  
-    
-   o If an offerer wishes to have non-symmetric capabilities between 
-     sending and receiving, the offerer has to offer different RTP 
-     sessions, i.e. different media lines declared as "recvonly" and 
-     "sendonly" respectively.  This may have further implications on 
-     the system.  
-    
-8.2.3.        Usage in Declarative Session Descriptions 
- 
-   When offering H.264 over RTP using SDP in a declarative style as 
-   used in RTSP [30] or SAP [31], the following considerations are 
-   necessary.  
-    
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 54] 
-
-Internet Draft                                              July, 2004 
- 
-     o All parameters that are capable of indicating both the 
-        properties of a NAL unit stream and the capabilities of a 
-        receiver are used to indicate the properties of a NAL unit 
-        stream.  For example, in this case, the parameter "profile-
-        level-id" declares the values used by the stream, instead of 
-        capabilities of the sender.  This results in that the following 
-        interpretation of the parameters MUST be used: 
-        Declaring actual configuration or properties: 
-          - profile-level-id 
-          - sprop-parameter-sets 
-          - packetization-mode 
-          - sprop-interleaving-depth 
-          - sprop-deint-buf-req 
-          - sprop-max-don-diff 
-          - sprop-init-buf-time 
-              
-        Not usable: 
-          - max-mbps 
-          - max-fs 
-          - max-cpb 
-          - max-dpb 
-          - max-br 
-          - redundant-pic-cap 
-          - max-rcmd-nalu-size 
-          - parameter-add 
-          - deint-buf-cap 
-    
-   o A receiver of the SDP is required to support all parameters and 
-     all values of the parameters provided, or the receiver MUST reject 
-     (RTSP) or not participate in (SAP) the session.  It falls on the 
-     creator of the session to use values that are expected to be 
-     supported by the receiving application. 
- 
- 
-8.3.      Examples 
- 
-   A SIP Offer/Answer exchange where both parties are expected to both 
-   send and receive could look like the following.  Only the media 
-   codec specific parts of the SDP are shown.  Some lines are wrapped 
-   due to text constraints.  
-    
-   Offerer -> Answer SDP message: 
-    
-   m=video 49170 RTP/AVP 100 99 98 
-   a=rtpmap:98 H264/90000 
-   a=fmtp:98 profile-level-id=42A01E; packetization-mode=0;  
-             sprop-parameter-sets=Z0IACpZTBYmI,aMljiA== 
-   a=rtpmap:99 H264/90000 
-   a=fmtp:99 profile-level-id=42A01E; packetization-mode=1;  
-             sprop-parameter-sets=Z0IACpZTBYmI,aMljiA== 
-   a=rtpmap:100 H264/90000 
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 55] 
-
-Internet Draft                                              July, 2004 
- 
-   a=fmtp:100 profile-level-id=42A01E; packetization-mode=2;  
-              sprop-parameter-sets=Z0IACpZTBYmI,aMljiA==;  
-              sprop-interleaving-depth=45; sprop-deint-buf-req=64000; 
-              sprop-init-buf-time=102478; deint-buf-cap=128000 
-    
-   The above offer offers the same codec configuration in three 
-   different packetization formats.  PT 98 represents single NALU mode, 
-   99 non-interleaved mode, and 100 indicates the interleaved mode.  In 
-   the interleaved mode case, the interleaving parameters that the 
-   offerer would use if the answer indicates support for PT 100 are 
-   also included.  In all three cases the parameter "sprop-parameter-
-   sets" conveys the initial parameter sets that are required for the 
-   answerer when receiving a stream from the offerer when this 
-   configuration (profile-level-id and packetization mode) is accepted. 
-   Note that the value for "sprop-parameter-sets", although identical 
-   in the example above, could be different for each payload type.   
-    
-   Answerer -> Offerer SDP message: 
-    
-   m=video 49170 RTP/AVP 100 99 97 
-   a=rtpmap:97 H264/90000 
-   a=fmtp:97 profile-level-id=42A01E; packetization-mode=0;  
-             sprop-parameter-sets=Z0IACpZTBYmI,aMljiA==,As0DEWlsIOp==, 
-             KyzFGleR 
-   a=rtpmap:99 H264/90000 
-   a=fmtp:99 profile-level-id=42A01E; packetization-mode=1;  
-             sprop-parameter-sets=Z0IACpZTBYmI,aMljiA==,As0DEWlsIOp==, 
-             KyzFGleR; max-rcmd-nalu-size=3980 
-   a=rtpmap:100 H264/90000 
-   a=fmtp:100 profile-level-id=42A01E; packetization-mode=2;  
-             sprop-parameter-sets=Z0IACpZTBYmI,aMljiA==,As0DEWlsIOp==, 
-             KyzFGleR; sprop-interleaving-depth=60;  
-             sprop-deint-buf-req=86000; sprop-init-buf-time=156320; 
-             deint-buf-cap=128000; max-rcmd-nalu-size=3980 
-    
-   As the offer/answer negotiation covers both sending and receiving 
-   streams, an offer indicates the exact parameters for what the 
-   offerer is willing to receive, while the answer indicates the same 
-   for what the answerer accepts to receive.  In this case the offerer 
-   declared that it is willing to receive payload type 98.  The 
-   answerer accepts this by declaring a equivalent payload type 97, 
-   i.e. it has identical values for the three parameters "profile-
-   level-id", packetization-mode, and "sprop-deint-buf-req".  This has 
-   the following implications for both the offerer and the answerer 
-   concerning the parameters that declare properties.  The offerer 
-   initially declared a certain value of the "sprop-parameter-sets" in 
-   the payload definition for PT=98.  However, as the answerer accepted 
-   this as PT=97, the values of "sprop-parameter-sets" in PT=98 must 
-   now be used instead when the offerer sends PT=97.  Similarly, when 
-   the answerer sends PT=98 to the offerer, it has to use the 
-   properties parameters it declared in PT=97.   
- 
-Wenger et. al.          Expires December 2004               [Page 56] 
-
-Internet Draft                                              July, 2004 
- 
-    
-   The answerer also accepts the reception of the two configurations 
-   that payload types 99 and 100 represents.  It provides the initial 
-   parameter sets for the answerer-to-offerer direction, and buffering 
-   related parameters that it will use to send the payload types.  It 
-   also provides the offerer with its memory limit for deinterleaving 
-   operations by providing a "deint-buf-cap" parameter.  This is only 
-   useful if the offerer decides on making a second offer, where it can 
-   take the new value into account.  The "max-rcmd-nalu-size" indicates 
-   that the answerer can efficiently process NALUs up to the size of 
-   3980 bytes.  However, there is no guarantee that the network 
-   supports this size.   
-   Please note that the parameter sets in the above example are not 
-   representing a legal operation point of an H.264 codec -- the base64 
-   strings are only used for illustration.  
-    
-    
-8.4.      Parameter Set Considerations 
-    
-   The H.264 parameter sets are a fundamental part of the video codec 
-   and vital to its operation, see section 1.2.  Due to their 
-   characteristics and their importance for the decoding process, lost 
-   or erroneously transmitted parameter sets can hardly be concealed 
-   locally at the receiver.  A reference to a corrupt parameter set has 
-   normally fatal results to the decoding process.  Corruption could 
-   occur, for example, due to the erroneous transmission or loss of a 
-   parameter set data structure, but also due to the untimely 
-   transmission of a parameter set update.  Hence, the following 
-   recommendations are provided as a guideline for the implementer of 
-   the RTP sender. 
-    
-   Parameter set NALUs can be transported using three different 
-   principles: 
-    
-   A. Using a session control protocol (out-of-band) prior to the 
-      actual RTP session. 
-   B. Using a session control protocol (out-of-band) during an ongoing 
-      RTP session. 
-   C. Within the RTP stream in the payload (in-band) during an ongoing 
-      RTP session.  
-    
-   It is necessary to implement principles A and B within a session 
-   control protocol.  SIP and SDP can be used as described in the SDP 
-   Offer/Answer model and in the previous sections of this memo.  This 
-   section contains guidelines how principles A and B must be 
-   implemented within session control protocols, and is independent of 
-   the particular protocol used.  Principle C is supported by the RTP 
-   payload format defined in this specification.  
-    
-   Picture and sequence parameter set NALUs SHOULD NOT be transmitted 
-   in the RTP payload unless reliable transport is provided for RTP, as 
-
- 
-Wenger et. al.          Expires December 2004               [Page 57] 
-
-Internet Draft                                              July, 2004 
- 
-   a loss of a parameter set of either type likely prevents decoding of 
-   a considerable portion of the corresponding RTP stream.  Thus, the 
-   transmission of parameter sets using a reliable session control 
-   protocol, i.e. usage of principle A or B above, is RECOMMENDED.   
-    
-   In the rest of the section it is assumed that out-of-band signaling 
-   provides reliable transport of parameter set NALUs, while in-band 
-   transport does not.  If in-band signaling of parameter sets is used, 
-   the sender SHOULD take the error characteristics into account and 
-   use mechanisms to provide a high probability for delivering the 
-   parameter sets correctly.  Mechanisms that increase the probability 
-   for a correct reception include packet repetition, FEC, and 
-   retransmission.  The use of an unreliable, out-of-band control 
-   protocol has similar disadvantages as the in-band signaling 
-   (possible loss) and, in addition, may also lead to difficulties in 
-   the synchronization (see below) and is NOT RECOMMENDED. 
-    
-   Parameter sets MAY be added or updated during the lifetime of a 
-   session using principles B and C.  It is required that parameter 
-   sets are present at the decoder prior to the NAL units that refer to 
-   them.  Updating or adding of parameter sets can result in further 
-   problems, and therefore the following recommendations should be 
-   considered. 
-    
-   - When adding or updating parameter sets, principle C is vulnerable 
-     to transmission errors as described above, and therefore principle 
-     B is RECOMMENDED.   
-      
-   - When adding or updating parameter sets, care SHOULD be taken to 
-     ensure that any parameter set is delivered prior to its usage.  It 
-     is common that no synchronization is present between out-of-band 
-     signaling and in-band traffic.  If out-of-band signaling is used, 
-     it is RECOMMEDED that a sender does not start sending NALUs 
-     requiring the updated parameter sets prior to acknowledgement of 
-     delivery from the signaling protocol. 
-      
-   - When updating parameter sets, the following synchronization issue 
-     should be taken into account.  When overwriting a parameter set at 
-     the receiver, the sender needs ensure that the parameter set in 
-     question is not needed by any NALU present in the network or 
-     receiver buffers.  Otherwise decoding using a wrong parameter set 
-     may occur.  To lessen this problem, it is RECOMMENDED to either 
-     overwrite only those parameter sets that have not been used for a 
-     sufficiently long time (to ensure that all related NALUs have been 
-     consumed), or to add a new parameter set instead (which may have 
-     negative consequences for the efficiency of the video coding). 
-      
-   - When adding new parameter sets, previously unused parameter set 
-     identifiers are used.  This avoids the problem identified in the 
-     previous paragraph.  However, in a multiparty session and unless a 
-     synchronized control protocol is used, there is a risk that 
- 
-Wenger et. al.          Expires December 2004               [Page 58] 
-
-Internet Draft                                              July, 2004 
- 
-     multiple entities try to add different parameter sets for the same 
-     identifier, which needs to be avoided.  
-    
-   - Adding or modifying parameter sets by using both principles B and 
-     C in the same RTP session may lead to inconsistencies of the 
-     parameter sets because of the lack of synchronization between the 
-     control and the RTP channel.  Therefore principle B and C MUST NOT 
-     both be used in the same session, unless sufficient 
-     synchronization can be provided.  
-      
-   In some scenarios, e.g. when only the subset of this payload format 
-   specification corresponding to H.241 is used, it is not possible to 
-   employ out-of-band parameter set transmission.  In this case, 
-   parameter sets need to be transmitted in-band.  Here, the 
-   synchronization with the non-parameter-set-data in the bitstream is 
-   implicit, but the possibility of a loss needs to be taken into 
-   account and the loss probability should be reduced using the 
-   mechanisms discussed above.   
-    
-   - When parameter sets are both provided initially using principle A 
-     and then later added or updated in-band (principle C), then there 
-     is a risk associated with updating the parameter sets delivered 
-     out-of-band.  If receivers miss some in-band updates, because of a 
-     loss or a late tune-in, for example, those receivers attempt to 
-     decode the bitstream using out-dated parameters.  It is 
-     RECOMMENDED that parameter set IDs are partitioned between the 
-     out-of-band and in-band parameter sets. 
- 
-   To allow for maximum flexibility and best performance from the H.264 
-   coder, it is recommended if possible to allow any sender to add its 
-   own parameter sets to be used in a session.  Setting the "parameter-
-   add" parameter to false should only be done in cases where the 
-   session topology prevents a participant to add its own parameter 
-   sets.  
-    
-    
-9.    Security Considerations 
- 
-   RTP packets using the payload format defined in this specification 
-   are subject to the security considerations discussed in the RTP 
-   specification [4], and any appropriate RTP profile (for example 
-   [18]).  This implies that confidentiality of the media streams is 
-   achieved by encryption, for example through the application of SRTP 
-   [29].  Because the data compression used with this payload format is 
-   applied end-to-end, encryption may be performed after compression so 
-   there is no conflict between the two operations.  
-    
-   A potential denial-of-service threat exists for data encodings using 
-   compression techniques that have non-uniform receiver-end 
-   computational load.  The attacker can inject such pathological 
-   datagrams into the stream that are complex to decode and cause the 
-
- 
-Wenger et. al.          Expires December 2004               [Page 59] 
-
-Internet Draft                                              July, 2004 
- 
-   receiver to be overloaded.  H.264 is particularly vulnerable to such 
-   attacks because it is extremely simple to generate datagrams 
-   containing NAL units that affect the decoding process of many future 
-   NAL units.  Therefore the usage of authentication of at least the 
-   RTP packet is RECOMMENDED, for example with SRTP [29]. 
-    
-   Note that the appropriate mechanism to ensure confidentiality and 
-   integrity of RTP packets and their payloads are very dependent on 
-   the application and the transport and signaling protocols employed.  
-   Hence, although SRTP is given as example above, other possible 
-   choices exist. 
-    
-   As with any IP-based protocol, in some circumstances a receiver may 
-   be overloaded simply by the receipt of too many packets, either 
-   desired or undesired.  Network-layer authentication may be used to 
-   discard packets from undesired sources, but the processing cost of 
-   the authentication itself may be too high.  In a multicast 
-   environment, pruning of specific sources may be implemented in 
-   future versions of IGMP [19] and in multicast routing protocols to 
-   allow a receiver to select which sources are allowed to reach it.  
-    
-   Decoders MUST exercise caution with respect to the handling of user 
-   data SEI messages, particularly if they contain active elements, and 
-   MUST restrict their domain of applicability to the presentation 
-   containing the stream. 
-    
-    
-10.     Congestion Control 
-    
-   Congestion control for RTP SHALL be used in accordance with RFC 3550 
-   [4], and any applicable RTP profile, e.g. RFC 3551 [18].  This means 
-   that congestion control is required for any transmission over 
-   unmanaged best-effort networks. 
-    
-   The bit rate adaptation necessary for obeying the congestion control 
-   principle is easily achievable when real-time encoding is used.  
-   However, when pre-encoded content is being transmitted, bandwidth 
-   adaptation requires the availability of more than one coded 
-   representation of the same content, at different bit rates, or the 
-   existence of non-reference pictures or sub-sequences [25] in the 
-   bitstream.  The switching between the different representations can 
-   normally be performed in the same RTP session, e.g. by employing a 
-   concept known as SI/SP slices of the Extended Profile, or by 
-   switching streams at IDR picture boundaries.  Only if non-
-   downgradable parameters, such as the profile part of the 
-   profile/level ID change, it becomes necessary to terminate and re-
-   start the media stream, possibly using a different RTP payload type. 
-    
-   MANEs MAY follow the suggestions outlined in section 7.3 and remove 
-   certain not usable packets from the packet stream when that stream 
-
- 
-Wenger et. al.          Expires December 2004               [Page 60] 
-
-Internet Draft                                              July, 2004 
- 
-   was damaged due to previous packet losses.  This can help reducing 
-   the network load in certain special cases. 
-    
-11.     IANA Consideration 
- 
-   IANA is kindly requested to register one new MIME type, see section 
-   8.1. 
- 
-12.     Informative Appendix: Application Examples 
- 
-   This payload specification is very flexible in its use, to cover the 
-   extremely wide application space that is anticipated for H.264.  
-   However, such a great flexibility also makes it difficult for an 
-   implementer to decide on a reasonable packetization scheme.  Some 
-   information on how to apply this specification to real-world 
-   scenarios is likely to appear in the form of academic publications 
-   and a test model software and description in the near future.  
-   However, some preliminary usage scenarios are described here as 
-   well.   
-    
-    
-12.1.       Video Telephony according to ITU-T Recommendation H.241  
-       Annex A 
-    
-   H.323-based video telephony systems that use H.264 as an optional 
-   video compression scheme are required to support H.241 Annex A [17] 
-   as a packetization scheme.  The packetization mechanism defined in 
-   this Annex is technically identical with a small subset of this 
-   specification. 
-    
-   When operating according to H.241 Annex A, parameter sets NAL units 
-   are sent in-band.  Only Single NAL unit packets are used.  Many such 
-   systems are not sending IDR pictures regularly, but only when 
-   required by user interaction or by control protocol means, e.g. when 
-   switching between video channels in a Multipoint Control Unit or for 
-   error recovery requested by feedback. 
-    
-    
-12.2.       Video Telephony, No Slice Data Partitioning, No NAL Unit  
-       Aggregation 
- 
-   The RTP part of this scheme is implemented and tested (though not 
-   the control-protocol part, see below). 
-    
-   In most real-world video telephony applications, the picture 
-   parameters such as picture size or optional modes never change 
-   during the lifetime of a connection.  Hence, all necessary parameter 
-   sets (usually only one) are sent as a side effect of the capability 
-   exchange/announcement process e.g. according to the SDP syntax 
-   specified in section 8.2 of this document.  Since all necessary 
-   parameter set information is established before the RTP session 
-
- 
-Wenger et. al.          Expires December 2004               [Page 61] 
-
-Internet Draft                                              July, 2004 
- 
-   starts, there is no need for sending any parameter set NAL units.  
-   Slice data partitioning is not used either.  Hence, the RTP packet 
-   stream consists basically of NAL units that carry single coded 
-   slices. 
-    
-   The encoder chooses the size of coded slice NAL units such that they 
-   offer the best performance.  Often, this is done by adapting the 
-   coded slice size to the MTU size of the IP network.  For small 
-   picture sizes this may result in a one-picture-per-one-packet 
-   strategy.  Intra refresh algorithms clean up the loss of packets and 
-   the resulting drift-related artifacts. 
-    
-    
-12.3.       Video Telephony, Interleaved Packetization Using NAL Unit 
-       Aggregation 
- 
-   This scheme allows better error concealment and is used in H.263 
-   based designed using RFC 2429 packetization [12].  It is also 
-   implemented and good results were reported [14].  
-    
-   The VCL encoder codes the source picture such that all macroblocks 
-   (MBs) of one MB line are assigned to one slice.  All slices with 
-   even MB row addresses are combined into one STAP, and all slices 
-   with odd MB row addresses into another STAP.  Those STAPs are 
-   transmitted as RTP packets.  The establishment of the parameter sets 
-   is performed as discussed above. 
-    
-   Note that the use of STAPs is essential here, because the high 
-   number of individual slices (18 for a CIF picture) would lead to 
-   unacceptably high IP/UDP/RTP header overhead (unless the source 
-   coding tool FMO is used, which is not assumed in this scenario).  
-   Furthermore, some wireless video transmission systems, such as 
-   H.324M and the IP-based video telephony specified in 3GPP, are 
-   likely to use relatively small transport packet size.  For example, 
-   a typical MTU size of H.223 AL3 SDU is around 100 bytes [20].  
-   Coding individual slices according to this packetization scheme 
-   provides a further advantage in communication between wired and 
-   wireless networks, as individual slices are likely to be smaller 
-   than the preferred maximum packet size of wireless systems.  
-   Consequently, a gateway can convert the STAPs used in a wired 
-   network to several RTP packets with only one NAL unit that are 
-   preferred in a wireless network and vice versa.  
-    
-    
-12.4.       Video Telephony, with Data Partitioning 
- 
-   This scheme is implemented and was shown to offer good performance 
-   especially at higher packet loss rates [14]. 
-    
-   Data Partitioning is known to be useful only when some form of 
-   unequal error protection is available.  Normally, in single-session 
- 
-Wenger et. al.          Expires December 2004               [Page 62] 
-
-Internet Draft                                              July, 2004 
- 
-   RTP environments, even error characteristics are assumed, i.e., the 
-   packet loss probability of all packets of the session is the same 
-   statistically.  However, there are means to reduce the packet loss 
-   probability of individual packets in an RTP session.  A FEC packet 
-   according to RFC 2733 [21], for example, specifies which media 
-   packets are associated with the FEC packet. 
-    
-   In all cases, the incurred overhead is substantial, but in the same 
-   order of magnitude as the number of bits that have otherwise be 
-   spent for intra information.  However, this mechanism is not adding 
-   any delay to the system.   
-    
-   Again, the complete parameter set establishment is performed through 
-   control protocol means. 
-    
-    
-12.5.       Video Telephony or Streaming, with FUs and Forward Error 
-       Correction 
-    
-   This scheme is implemented and was shown to provide good performance 
-   especially at higher packet loss rates [22]. 
-    
-   The most efficient means to combat packet-losses for scenarios where 
-   retransmissions are not applicable is forward error correction 
-   (FEC).  Although the application layer, end-to-end use of FEC is 
-   often less efficient when compared to a FEC-based protection of 
-   individual links (especially when links of different characteristics 
-   are in the transmission path), application layer, end-to-end FEC is 
-   unavoidable in some scenarios.  RFC 2733 [21] provides means to use 
-   generic, application layer, end-to-end FEC in packet-loss 
-   environments.  A binary forward error correcting code is generated 
-   by applying the XOR operation to the bits at the same bit position 
-   in different packets.  The binary code can be specified by the 
-   parameters (n,k) in which k is the number of information packets 
-   used in the connection and n is the total number of packets 
-   generated for k information packets, i.e., n-k parity packets are 
-   generated for k information packets. 
-    
-   When using a code with parameters (n,k) within the RFC 2733 
-   framework, the following properties are well-known: 
-   a) If applied over one RTP packet, RFC 2733 provides only packet 
-      repetition.  
-   b) RFC 2733 is most bit-rate efficient if XOR-connected packets have 
-      equal length.  
-   c) At the same packet loss probability p and for a fixed k, the 
-      greater the value of n is, the smaller the residual error 
-      probability becomes.  For example, for packet loss probability 
-      10%, k=1, and n=2, the residual error probability is about 1%, 
-      whereas for n=3, the residual error probability is about 0.1%. 
-   d) At the same packet loss probability p and for a fixed code rate 
-      k/n, the greater the value of n is, the smaller the residual 
- 
-Wenger et. al.          Expires December 2004               [Page 63] 
-
-Internet Draft                                              July, 2004 
- 
-      error probability becomes.  For example, at a packet loss 
-      probability of p=10%, k=1 and n=2, the residual error rate is 
-      about 1%, whereas for an extended Golay code with k=12 and n=24, 
-      the residual error rate is about 0.01%. 
-    
-   For applying RFC 2733 in combination with H.264 baseline coded video 
-   without using FUs several options might be considered: 
-   1) The video encoder produces NAL units where each video frame is 
-      coded in a single slice.  Applying FEC, one could use a simple 
-      code, e.g. (n=2, k=1), i.e., each NAL unit would basically just 
-      be repeated.  The disadvantage is obviously the bad code 
-      performance according to (d) and the low flexibility as only (n, 
-      k=1) codes can be used. 
-   2) The video encoder produces NAL units where each video frame is 
-      encoded in one or more consecutive slices.  Applying FEC, one 
-      could use a better code, e.g. (n=24, k=12), over a sequence of 
-      NAL units.  Depending on the number of RTP packets per frame, a 
-      loss may introduce a significant delay, which is reduced the more 
-      RTP packets per frame are used.  Packets of completely different 
-      length might also be connected, which decreases bit-rate 
-      efficiency according to (b).  However with some care and for 
-      slices of 1kb or larger, similar length (100-200 bytes 
-      difference) may be produced, which will not lower the bit-
-      efficiency catastrophically.  
-   3) The video encoder produces NAL units, where a certain frame 
-      contains k slices of possibly almost equal length.  Then, 
-      applying FEC, a better code, e.g. (n=24, k=12), over the sequence 
-      of NAL units for each frame can be used.  The delay compared to 
-      (2) may be reduced, but several disadvantages are obvious.  
-      Firstly, the coding efficiency of the encoded video is lowered 
-      significantly as slice-structured coding reduces intra-frame 
-      prediction and additional slice overhead is necessary.  Secondly, 
-      pre-encoded content or, when operating over a gateway, the video 
-      is usually not appropriately coded with k slices such that FEC 
-      can be applied.  Finally, the encoding of video producing k 
-      slices of equal length is not straightforward and might require 
-      more than one encoding pass. 
-    
-   Many of the mentioned disadvantages can be avoided by applying FUs 
-   in combination with FEC.  Each NAL unit can be split into any number 
-   of FUs of basically equal length, and therefore FEC with a 
-   reasonable k and n can be applied even if the encoder made no effort 
-   of producing slices of equal length.  For example, a coded slice NAL 
-   unit containing an entire frame can be split to k FUs and a parity 
-   check code (n=k+1, k) can be applied.  However this has the 
-   disadvantage that unless all created fragments can be recovered the 
-   whole slice will be lost.  Thus a larger section is lost, than would 
-   be the case if the frame had been split into several slices.   
-    
-   The presented technique makes it possible to achieve good 
-   transmission error tolerance even if no additional source coding 
- 
-Wenger et. al.          Expires December 2004               [Page 64] 
-
-Internet Draft                                              July, 2004 
- 
-   layer redundancy, such as periodic intra frames, is present.  
-   Consequently, the same coded video sequence can be used for 
-   achieving the maximum compression efficiency and quality over error-
-   free transmission and for transmission over error-prone networks.  
-   Furthermore, the technique allows the application of FEC to pre-
-   encoded sequences without adding delay.  In addition, in this case 
-   pre-encoded sequences that are not encoded for error-prone networks 
-   can still be transmitted almost reliably without adding extensive 
-   delays.  In addition, FUs of equal length result in a bit-rate 
-   efficient use of RFC 2733.  
-    
-   In case that the error probability depends on the length of the 
-   transmitted packet, e.g. in case of mobile transmission [16], the 
-   benefits of applying FUs with FEC are even more obvious.  Basically, 
-   the flexibility of the size of FUs allows applying appropriate FEC 
-   for each NAL unit and even unequal error protection of NAL units. 
-    
-   The incurred overhead when using FUs and FEC is substantial, but in 
-   the same order of magnitude as the number of bits that have to be 
-   spent for intra coded macroblocks if no FEC is applied.  In [22] it 
-   was shown that the overall performance at the same error rate and 
-   the same overall bit-rate including the overhead, the FEC-based 
-   approach can enhance the quality.   
-    
-    
-12.6.       Low-Bit-Rate Streaming 
- 
-   This scheme has been implemented with H.263 and non-standard RTP 
-   packetization and gave good results [23].  There is no technical 
-   reason why similarly good results could not be achievable with 
-   H.264.  
-    
-   In today's Internet streaming, some of the offered bit-rates are 
-   relatively low in order to allow terminals with dial-up modems to 
-   access the content.  In wired IP networks, relatively large packets, 
-   say 500 - 1500 bytes, are preferred to smaller and more frequently 
-   occurring packets in order to reduce network congestion.  Moreover, 
-   use of large packets decreases the amount of RTP/UDP/IP header 
-   overhead.  For low-bit-rate video, the use of large packets means 
-   that sometimes up to few pictures should be encapsulated in one 
-   packet.  
-    
-   However, loss of a packet including many coded pictures would have 
-   drastic consequences in visual quality, as there is practically no 
-   other way to conceal a loss of an entire picture than to repeat the 
-   previous one.  One way to construct relatively large packets and 
-   maintain possibilities for successful loss concealment is to 
-   construct MTAPs that contain slices from several pictures in an 
-   interleaved manner.  An MTAP should not contain spatially adjacent 
-   slices from the same picture or spatially overlapping slices from 
-   any picture.  If a packet is lost, it is likely that a lost slice is 
- 
-Wenger et. al.          Expires December 2004               [Page 65] 
-
-Internet Draft                                              July, 2004 
- 
-   surrounded by spatially adjacent slices of the same picture and 
-   spatially corresponding slices of the temporally previous and 
-   succeeding pictures.  Consequently, concealment of the lost slice is 
-   likely to succeed relatively well. 
-    
-    
-12.7.       Robust Packet Scheduling in Video Streaming 
-    
-   Robust packet scheduling has been implemented with MPEG-4 Part 2 and 
-   simulated in a wireless streaming environment [24].  There is no 
-   technical reason why similar or better results could not be 
-   achievable with H.264. 
-    
-   Streaming clients typically have a receiver buffer that is capable 
-   of storing a relatively large amount of data.  Initially, when a 
-   streaming session is established, a client does not start playing 
-   the stream back immediately, but rather it typically buffers the 
-   incoming data for a few seconds.  This buffering helps to maintain 
-   continuous playback, because, in case of occasional increased 
-   transmission delays or network throughput drops, the client can 
-   decode and play buffered data.  Otherwise, without initial 
-   buffering, the client has to freeze the display, stop decoding, and 
-   wait for incoming data.  The buffering is also necessary for either 
-   automatic or selective retransmission in any protocol level.  If any 
-   part of a picture is lost, a retransmission mechanism may be used to 
-   resend the lost data.  If the retransmitted data is received before 
-   its scheduled decoding or playback time, the loss is perfectly 
-   recovered.  Coded pictures can be ranked according to their 
-   importance in the subjective quality of the decoded sequence.  For 
-   example, non-reference pictures, such as conventional B pictures, 
-   are subjectively least important, because their absence does not 
-   affect decoding of any other pictures.  In addition to non-reference 
-   pictures, the ITU-T H.264 | ISO/IEC 14496-10 standard includes a 
-   temporal scalability method called sub-sequences [25].  Subjective 
-   ranking can also be made on coded slice data partition or slice 
-   group basis.  Coded slices and coded slice data partitions that are 
-   subjectively the most important can be sent earlier than their 
-   decoding order indicates, whereas coded slices and coded slice data 
-   partitions that are subjectively the least important can be sent 
-   later than their natural coding order indicates.  Consequently, any 
-   retransmitted parts of the most important slices and coded slice 
-   data partitions are more likely to be received before their 
-   scheduled decoding or playback time compared to the least important 
-   slices and slice data partitions. 
-    
-    
-13.     Informative Appendix: Rationale for Decoding Order Number 
-    
-13.1.       Introduction 
- 
-
- 
-Wenger et. al.          Expires December 2004               [Page 66] 
-
-Internet Draft                                              July, 2004 
- 
-   The Decoding Order Number (DON) concept was introduced mainly to 
-   enable efficient multi-picture slice interleaving (see section 12.6) 
-   and robust packet scheduling (see section 12.7).  In both of these 
-   applications NAL units are transmitted out of decoding order.  DON 
-   indicates the decoding order of NAL units and should be used in the 
-   receiver to recover the decoding order.  Example use cases for 
-   efficient multi-picture slice interleaving and for robust packet 
-   scheduling are given in sections 13.2 and 13.3 respectively.  
-   Section 13.4 describes the benefits of the DON concept in error 
-   resiliency achieved by redundant coded pictures.  Section 13.5 
-   summarizes considered alternatives to DON and justifies why DON was 
-   chosen to this RTP payload specification. 
-    
-    
-13.2.       Example of Multi-Picture Slice Interleaving 
- 
-   An example of multi-picture slice interleaving follows.  A subset of 
-   a coded video sequence is depicted below in output order.  R denotes 
-   a reference picture, N denotes a non-reference picture, and the 
-   number indicates a relative output time. 
-    
-   ... R1 N2 R3 N4 R5 ... 
-    
-   The decoding order of these pictures is from left to right as 
-   follows: 
-   ... R1 R3 N2 R5 N4 ... 
-    
-   The NAL units of pictures R1, R3, N2, R5, and N4 are marked with a 
-   DON equal to 1, 2, 3, 4, and 5, respectively. 
-    
-   Each reference picture consists of three slice groups that are 
-   scattered as follows (a number denotes the slice group number for 
-   each macroblock in a QCIF frame): 
-    
-   0 1 2 0 1 2 0 1 2 0 1 
-   2 0 1 2 0 1 2 0 1 2 0 
-   1 2 0 1 2 0 1 2 0 1 2 
-   0 1 2 0 1 2 0 1 2 0 1 
-   2 0 1 2 0 1 2 0 1 2 0 
-   1 2 0 1 2 0 1 2 0 1 2 
-   0 1 2 0 1 2 0 1 2 0 1 
-   2 0 1 2 0 1 2 0 1 2 0 
-   1 2 0 1 2 0 1 2 0 1 2 
-    
-   For the sake of simplicity, we assume that all the macroblocks of a 
-   slice group are included in one slice.  Three MTAPs are constructed 
-   from three consecutive reference pictures so that each MTAP contains 
-   three aggregation units, each of which contains all the macroblocks 
-   from one slice group.  The first MTAP contains slice group 0 of 
-   picture R1, slice group 1 of picture R3, and slice group 2 of 
-   picture R5.  The second MTAP contains slice group 1 of picture R1, 
- 
-Wenger et. al.          Expires December 2004               [Page 67] 
-
-Internet Draft                                              July, 2004 
- 
-   slice group 2 of picture R3, and slice group 0 of picture R5.  The 
-   third MTAP contains slice group 2 of picture R1, slice group 0 of 
-   picture R3, and slice group 1 of picture R5.  Each non-reference 
-   picture is encapsulated into an STAP-B. 
-    
-   Consequently, the transmission order of NAL units is the following: 
-     R1, slice group 0, DON 1, carried in MTAP,   RTP SN: N 
-     R3, slice group 1, DON 2, carried in MTAP,   RTP SN: N 
-     R5, slice group 2, DON 4, carried in MTAP,   RTP SN: N 
-     R1, slice group 1, DON 1, carried in MTAP,   RTP SN: N+1 
-     R3, slice group 2, DON 2, carried in MTAP,   RTP SN: N+1 
-     R5, slice group 0, DON 4, carried in MTAP,   RTP SN: N+1 
-     R1, slice group 2, DON 1, carried in MTAP,   RTP SN: N+2 
-     R3, slice group 1, DON 2, carried in MTAP,   RTP SN: N+2 
-     R5, slice group 0, DON 4, carried in MTAP,   RTP SN: N+2 
-     N2,                DON 3, carried in STAP-B, RTP SN: N+3 
-     N4,                DON 5, carried in STAP-B, RTP SN: N+4 
-    
-   The receiver is able to organize the NAL units back in decoding 
-   order based on the value of DON associated with each NAL unit. 
-    
-   If one of the MTAPs is lost, the spatially adjacent and temporally 
-   co-located macroblocks are received and can be used to conceal the 
-   loss efficiently.  If one of the STAPs is lost, the effect of the 
-   loss does not propagate temporally. 
-    
-    
-13.3.       Example of Robust Packet Scheduling 
-    
-   An example of robust packet scheduling follows.  The communication 
-   system used in the example consists of the following components in 
-   the order that the video is processed from source to sink: 
-   o camera and capturing 
-   o pre-encoding buffer 
-   o encoder 
-   o encoded picture buffer 
-   o transmitter 
-   o transmission channel 
-   o receiver 
-   o receiver buffer 
-   o decoder 
-   o decoded picture buffer 
-   o display 
-    
-   The video communication system used in the example operates as 
-   follows.  Note that processing of the video stream happens gradually 
-   and at the same time in all components of the system.  The source 
-   video sequence is shot and captured to a pre-encoding buffer.  The 
-   pre-encoding buffer can be used to order pictures from sampling 
-   order to encoding order or to analyze multiple uncompressed frames 
-   for bitrate rate control purposes, for example.  In some cases the 
- 
-Wenger et. al.          Expires December 2004               [Page 68] 
-
-Internet Draft                                              July, 2004 
- 
-   pre-encoding buffer may not exist, but rather the sampled pictures 
-   are encoded right away.  The encoder encodes pictures from the pre-
-   encoding buffer and stores the output, i.e., coded pictures, to the 
-   encoded picture buffer.  The transmitter encapsulates the coded 
-   pictures from the encoded picture buffer to transmission packets and 
-   sends them to a receiver through a transmission channel.  The 
-   receiver stores the received packets to the receiver buffer.  The 
-   receiver buffering process typically includes buffering for 
-   transmission delay jitter.  The receiver buffer can also be used to 
-   recover correct decoding order of coded data.  The decoder reads 
-   coded data from the receiver buffer and produces decoded pictures as 
-   output into the decoded picture buffer.  The decoded picture buffer 
-   is used to recover the output (or display) order of pictures.  
-   Finally, pictures are displayed. 
-    
-   In the following example figures, I denotes an IDR picture, R 
-   denotes a reference picture, N denotes a non-reference picture, and 
-   the number after I, R, or N indicates the sampling time relative to 
-   the previous IDR picture in decoding order.  Values below the 
-   sequence of pictures indicate scaled system clock timestamps.  The 
-   system clock is initialized arbitrarily in this example, and time 
-   runs from left to right.  Each I, R, and N picture is mapped into 
-   the same timeline compared to the previous processing step, if any, 
-   assuming that encoding, transmission, and decoding take no time.  
-   Thus, events happening at the same time are located in the same 
-   column throughout all example figures. 
-    
-   A subset of a sequence of coded pictures is depicted below in 
-   sampling order.   
-    
-   ...  N58 N59 I00 N01 N02 R03 N04 N05 R06 ... N58 N59 I00 N01 ... 
-   ... --|---|---|---|---|---|---|---|---|- ... -|---|---|---|- ... 
-   ...  58  59  60  61  62  63  64  65  66  ... 128 129 130 131 ... 
-    
-   Figure 16. Sequence of pictures in sampling order 
- 
- 
-   The sampled pictures are buffered in the pre-encoding buffer to 
-   arrange them in encoding order.  In this example, we assume that the 
-   non-reference pictures are predicted from both the previous and the 
-   next reference picture in output order except for the non-reference 
-   pictures immediately preceding an IDR picture, which are predicted 
-   only from the previous reference picture in output order.  Thus, the 
-   pre-encoding buffer has to contain at least two pictures and the 
-   buffering causes a delay of two picture intervals.  The output of 
-   the pre-encoding buffering process and the encoding (and decoding) 
-   order of the pictures are as follows: 
-    
-
-
-
-
- 
-Wenger et. al.          Expires December 2004               [Page 69] 
-
-Internet Draft                                              July, 2004 
- 
-            ... N58 N59 I00 R03 N01 N02 R06 N04 N05 ...  
-            ... -|---|---|---|---|---|---|---|---|-  ...  
-            ... 60  61  62  63  64  65  66  67  68  ... 
-    
-   Figure 17. Re-ordered pictures in the pre-encoding buffer 
-    
-   The encoder or the transmitter can set the value of DON for each 
-   picture to a value of DON for the previous picture in decoding order 
-   plus one. 
-    
-   For the sake of simplicity, let us assume that: 
-   o the frame rate of the sequence is constant, 
-   o each picture consists of only one slice, 
-   o each slice is encapsulated in a single NAL unit packet,  
-   o there is no transmission delay, and 
-   o pictures are transmitted at constant intervals (that is equal to 1 
-     / frame rate). 
-    
-   When pictures are transmitted in decoding order, they are received 
-   as follows: 
-    
-            ... N58 N59 I00 R03 N01 N02 R06 N04 N05 ...  
-            ... -|---|---|---|---|---|---|---|---|- ...  
-            ... 60  61  62  63  64  65  66  67  68  ... 
-    
-   Figure 18. Received pictures in decoding order 
- 
-   The OPTIONAL sprop-interleaving-depth MIME type parameter is set to 
-   0, because the transmission (or reception) order is identical to the 
-   decoding order. 
-    
-   The decoder has to buffer for one picture interval initially in its 
-   decoded picture buffer to organize pictures from decoding order to 
-   output order as depicted below: 
-    
-                ... N58 N59 I00 N01 N02 R03 N04 N05 R06 ...  
-                ... -|---|---|---|---|---|---|---|---|- ...  
-                ... 61  62  63  64  65  66  67  68  69  ... 
-    
-   Figure 19. Output order 
-    
-   The amount of required initial buffering in the decoded picture 
-   buffer can be signaled in the buffering period SEI message or with 
-   the num_reorder_frames syntax element of H.264 video usability 
-   information.  num_reorder_frames indicates the maximum number of 
-   frames, complementary field pairs, or non-paired fields that precede 
-   any frame, complementary field pair, or non-paired field in the 
-   sequence in decoding order and follow it in output order.  For the 
-   sake of simplicity, we assume that num_reorder_frames is used to 
-   indicate the initial buffer in the decoded picture buffer.  In this 
-   example, num_reorder_frames is equal to 1. 
- 
-Wenger et. al.          Expires December 2004               [Page 70] 
-
-Internet Draft                                              July, 2004 
- 
-    
-   It can be observed that if the IDR picture I00 is lost during 
-   transmission and a retransmission request is issued when the value 
-   of the system clock is 62, there is one picture interval of time 
-   (until the system clock reaches timestamp 63) to receive the 
-   retransmitted IDR picture I00. 
-    
-   Let us then assume that IDR pictures are transmitted two frame 
-   intervals earlier than their decoding position, i.e., the pictures 
-   are transmitted as follows: 
-    
-                   ...  I00 N58 N59 R03 N01 N02 R06 N04 N05 ...  
-                   ... --|---|---|---|---|---|---|---|---|- ...  
-                   ...  62  63  64  65  66  67  68  69  70  ...  
-    
-   Figure 20. Interleaving: early IDR pictures in sending order 
-    
-   The OPTIONAL sprop-interleaving-depth MIME type parameter is set 
-   equal to 1 according to its definition.  (The value of sprop-
-   interleaving-depth in this example can be derived as follows:  
-   Picture I00 is the only picture preceding picture N58 or N59 in 
-   transmission order and following it in decoding order.  Except for 
-   pictures I00, N58, and N59, the transmission order is the same as 
-   the decoding order of pictures.  Since a coded picture is 
-   encapsulated into exactly one NAL unit, the value of sprop-
-   interleaving-depth is equal to the maximum number of pictures 
-   preceding any picture in transmission order and following the 
-   picture in decoding order.) 
-    
-   The receiver buffering process contains two pictures at a time 
-   according to the value of the sprop-interleaving-depth parameter and 
-   orders pictures from the reception order to the correct decoding 
-   order based on the value of DON associated with each picture.  The 
-   output of the receiver buffering process is the following: 
-    
-                        ... N58 N59 I00 R03 N01 N02 R06 N04 N05 ...  
-                        ... -|---|---|---|---|---|---|---|---|- ...  
-                        ... 63  64  65  66  67  68  69  70  71  ... 
-    
-   Figure 21. Interleaving: Receiver Buffer 
-    
-   Again, an initial buffering delay of one picture interval is needed 
-   to organize pictures from decoding order to output order as depicted 
-   below: 
-    
-                            ... N58 N59 I00 N01 N02 R03 N04 N05 ... 
-                            ... -|---|---|---|---|---|---|---|- ... 
-                            ... 64  65  66  67  68  69  70  71  ... 
-    
-   Figure 22. Interleaving: Receiver buffer after reordering 
-    
- 
-Wenger et. al.          Expires December 2004               [Page 71] 
-
-Internet Draft                                              July, 2004 
- 
-   It can be observed that the maximum delay that IDR pictures can 
-   undergo during transmission, including possible application, 
-   transport, or link layer retransmission, is equal to three picture 
-   intervals.  Thus, the loss resiliency of IDR pictures is improved in 
-   systems supporting retransmission compared to the case in which 
-   pictures were transmitted in their decoding order. 
-    
-    
-13.4.       Robust Transmission Scheduling of Redundant Coded Slices 
-    
-   A redundant coded picture is a coded representation of a picture or 
-   a part of a picture that is not used in the decoding process if the 
-   corresponding primary coded picture is correctly decoded.  There 
-   should be no noticeable difference between any area of the decoded 
-   primary picture and a corresponding area that would result from 
-   application of the H.264 decoding process for any redundant picture 
-   in the same access unit.  A redundant coded slice is a coded slice 
-   that is a part of a redundant coded picture. 
-    
-   Redundant coded pictures can be used to provide unequal error 
-   protection in error-prone video transmission.  If a primary coded 
-   representation of a picture is decoded incorrectly, a corresponding 
-   redundant coded picture can be decoded.  Examples of applications 
-   and coding techniques utilizing the redundant codec picture feature 
-   include the video redundancy coding [26] and protection of "key 
-   pictures" in multicast streaming [27]. 
-    
-   One property of many error-prone video communications systems is 
-   that transmission errors are often bursty and therefore they may 
-   affect more than one consecutive transmission packets in 
-   transmission order.  In low bitrate video communication it is 
-   relatively common that an entire coded picture can be encapsulated 
-   into one transmission packet.  Consequently, a primary coded picture 
-   and the corresponding redundant coded pictures may be transmitted in 
-   consecutive packets in transmission order.  In order to make the 
-   transmission scheme more tolerant of bursty transmission errors, it 
-   is beneficial to transmit a primary coded picture further apart from 
-   the corresponding redundant coded pictures.  The DON concept enables 
-   this. 
-    
-    
-13.5.       Remarks on Other Design Possibilities 
-    
-   The slice header syntax structure of the H.264 coding standard 
-   contains the frame_num syntax element that can indicate the decoding 
-   order of coded frames.  However, the usage of the frame_num syntax 
-   element is not feasible or desirable to recover the decoding order 
-   due to the following reasons: 
-   o The receiver is required to parse at least one slice header per 
-     coded picture (before passing the coded data to the decoder). 
-
- 
-Wenger et. al.          Expires December 2004               [Page 72] 
-
-Internet Draft                                              July, 2004 
- 
-   o Coded slices from multiple coded video sequences cannot be 
-     interleaved, because the frame number syntax element is reset to 0 
-     in each IDR picture. 
-   o The coded fields of a complementary field pair share the same 
-     value of the frame_num syntax element.  Thus, the decoding order 
-     of the coded fields of a complementary field pair cannot be 
-     recovered based on the frame_num syntax element or any other 
-     syntax element of the H.264 coding syntax. 
-    
-   The RTP payload format for transport of MPEG-4 elementary streams 
-   [28] enables interleaving of access units and transmission of 
-   multiple access units in the same RTP packet.  An access unit is 
-   specified in the H.264 coding standard to consist of all NAL units 
-   that are associated with a primary coded picture according to 
-   subclause 7.4.1.2 of [1].  Consequently, slices of different 
-   pictures cannot be interleaved and the multi-picture slice 
-   interleaving technique (see section 12.6) for improved error 
-   resilience cannot be used. 
-    
-    
-14.     Acknowledgements 
-    
-   The authors thank Roni Even, Dave Lindbergh, Philippe Gentric, 
-   Gonzalo Camarillo, Joerg Ott, and Colin Perkins for careful review.  
-    
-    
-15.     Full Copyright Statement 
-    
-   Copyright (C) The Internet Society (2004).  This document is subject 
-   to the rights, licenses and restrictions contained in BCP 78, and 
-   except as set forth therein, the authors retain all their rights. 
-    
-   This document and the information contained herein are provided on 
-   an "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE 
-   REPRESENTS OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE 
-   INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR 
-   IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF 
-   THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED 
-   WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 
-    
-    
-16.     Intellectual Property Notice 
-    
-   The IETF takes no position regarding the validity or scope of any 
-   Intellectual Property Rights or other rights that might be claimed 
-   to pertain to the implementation or use of the technology described 
-   in this document or the extent to which any license under such 
-   rights might or might not be available; nor does it represent that 
-   it has made any independent effort to identify any such rights.  
-   Information on the procedures with respect to rights in RFC 
-   documents can be found in BCP 78 and BCP 79. 
- 
-Wenger et. al.          Expires December 2004               [Page 73] 
-
-Internet Draft                                              July, 2004 
- 
-    
-   Copies of IPR disclosures made to the IETF Secretariat and any 
-   assurances of licenses to be made available, or the result of an 
-   attempt made to obtain a general license or permission for the use 
-   of such proprietary rights by implementers or users of this 
-   specification can be obtained from the IETF on-line IPR repository 
-   at http://www.ietf.org/ipr. 
-    
-   The IETF invites any interested party to bring to its attention any 
-   copyrights, patents or patent applications, or other proprietary 
-   rights that may cover technology that may be required to implement 
-   this standard.  Please address the information to the IETF at ietf-
-   ipr@ietf.org. 
-    
-    
-17.     References 
-    
-17.1.       Normative References 
-    
-   [1]  ITU-T Recommendation H.264, "Advanced video coding for generic 
-         audiovisual services", May 2003. 
-   [2]  ISO/IEC International Standard 14496-10:2003. 
-   [3]  S. Bradner, "Key words for use in RFCs to Indicate Requirement 
-         Levels", BCP 14, RFC 2119, March 1997. 
-   [4]  H. Schulzrinne, S. Casner, R. Frederick, and V. Jacobson, 
-         "RTP: A Transport Protocol for Real-Time Applications", STD 
-         64, RFC 3550, July 2003. 
-   [5]  M. Handley and V. Jacobson, "SDP: Session Description 
-         Protocol", RFC 2327, April 1998. 
-   [6]  S. Josefsson, "The Base16, Base32, and Base64 Data Encodings", 
-         RFC 3548, July 2003. 
-   [7]  ITU-T Recommendation T.35, "Procedure for the allocation of 
-         ITU-T defined codes for non-standard facilities", February 
-         2000. 
-   [8]  J. Rosenberg, and H. Schulzrinne, "An Offer/Answer Model with 
-         the Session Description Protocol (SDP)", RFC 3264, June 2002. 
-    
-    
-17.2.       Informative References 
-    
-   [9]  "Draft ITU-T Recommendation and Final Draft International 
-         Standard of Joint Video Specification (ITU-T Rec. H.264 | 
-         ISO/IEC 14496-10 AVC)", available from ftp://ftp.imtc-
-         files.org/jvt-experts/2003_03_Pattaya/JVT-G050r1.zip, May 
-         2003. 
-   [10] A. Luthra, G.J. Sullivan, and T. Wiegand (eds.), Special Issue 
-         on H.264/AVC. IEEE Transactions on Circuits and Systems on 
-         Video Technology, July 2003. 
-   [11] P. Borgwardt, "Handling Interlaced Video in H.26L", VCEG-
-         N57r2, available from http://ftp3.itu.int/av-arch/video-
-         site/0109_San/VCEG-N57r2.doc, September 2001. 
- 
-Wenger et. al.          Expires December 2004               [Page 74] 
-
-Internet Draft                                              July, 2004 
- 
-   [12] C. Borman et. Al., "RTP Payload Format for the 1998 Version of 
-         ITU-T Rec. H.263 Video (H.263+)", RFC 2429, October 1998. 
-   [13] ISO/IEC IS 14496-2. 
-   [14] S. Wenger, "H.26L over IP", IEEE Transaction on Circuits and 
-         Systems for Video technology, July 2003. 
-   [15] S. Wenger, "H.26L over IP: The IP Network Adaptation Layer", 
-         Proceedings Packet Video Workshop 02, April 2002 
-   [16] T. Stockhammer, M.M. Hannuksela, and S. Wenger, "H.26L/JVT 
-         Coding Network Abstraction Layer and IP-based Transport" in 
-         Proc. ICIP 2002, Rochester, NY, September 2002. 
-   [17] ITU-T Recommendation H.241, "Extended video procedures and 
-         control signals for H.300 series terminals", 2004. 
-   [18] H. Schulzrinne and S. Casner, "RTP Profile for Audio and Video 
-         Conferences with Minimal Control", STD 65, RFC 3551,    July 
-         2003. 
-   [19] B. Cain, S. Deering, I. Kouvelas, B. Fenner, and A. 
-         Thyagarajan, "Internet Group Management Protocol, Version 3", 
-         RFC 3376, October 2002. 
-   [20] ITU-T Recommendation H.223, "Multiplexing protocol for low bit 
-         rate multimedia communication", July 2001. 
-   [21] J. Rosenberg, H. Schulzrinne, "An RTP Payload Format for 
-         Generic Forward Error Correction", RFC 2733, December 1999. 
-   [22] T. Stockhammer, T. Wiegand, T. Oelbaum, and F. Obermeier, 
-         "Video Coding and Transport Layer Techniques for H.264/AVC-
-         Based Transmission over Packet-Lossy Networks", IEEE 
-         International Conference on Image Processing (ICIP 2003), 
-         Barcelona, Spain, September 2003. 
-   [23] V. Varsa, M. Karczewicz, "Slice interleaving in compressed 
-         video packetization", Packet Video Workshop 2000. 
-   [24] S.H. Kang and A. Zakhor, "Packet scheduling algorithm for 
-         wireless video streaming," International Packet Video Workshop 
-         2002, available http://www.pv2002.org. 
-   [25] M.M. Hannuksela, "Enhanced concept of GOP", JVT-B042, 
-         available http://ftp3.itu.int/av-arch/video-site/0201_Gen/JVT-
-         B042.doc , January 2002. 
-   [26] S. Wenger, "Video Redundancy Coding in H.263+", 1997 
-         International Workshop on Audio-Visual Services over Packet 
-         Networks, September 1997. 
-   [27] Y.-K. Wang, M.M. Hannuksela, and M. Gabbouj, "Error Resilient 
-         Video Coding Using Unequally Protected Key Pictures", in Proc. 
-         International Workshop VLBV03, September 2003. 
-   [28] J. van der Meer, D. Mackie, V. Swaminathan, D. Singer, and P. 
-         Gentric, "RTP Payload Format for Transport of MPEG-4 
-         Elementary Streams", RFC 3640, November 2003. 
-   [29] Baugher, McGrew, Carrara, Naslund, and Norrman, "The Secure 
-         Real-time Transport Protocol," RFC 3711, Internet Engineering 
-         Task Force, March 2004. 
-   [30] H. Schulzrinne, A. Rao, R. Lanphier, "Real Time Streaming 
-         Protocol (RTSP)", RFC 2326, Internet Engineering Task Force, 
-         April 1998. 
-
- 
-Wenger et. al.          Expires December 2004               [Page 75] 
-
-Internet Draft                                              July, 2004 
- 
-   [31] M. Handley, C. Perkins, E. Whelan, "Session Announcement 
-         Protocol", RFC 2974, Internet Engineering Task Force, June 
-         2001. 
-   [32] ISO/IEC 14496-15: "Information technology - Coding of audio-
-         visual objects - Part 15: Advanced Video Coding (AVC) file 
-         format". 
-   [33] D. Singer, and R. Castagno, "MIME Type Registrations for 3GPP 
-         Multimedia files", Internet Draft,  
-         draft-singer-avt-3gpp-mime-01, Sep 2003. 
-    
-    
-   Author's Addresses 
-    
-   Stephan Wenger                    Phone: +49-172-300-0813 
-   TU Berlin / Teles AG              Email: stewe@stewe.org 
-   Franklinstr. 28-29 
-   D-10587 Berlin 
-   Germany 
-    
-   Miska M. Hannuksela               Phone: +358-7180-73151 
-   Nokia Corporation                 Email: miska.hannuksela@nokia.com 
-   P.O. Box 100 
-   33721 Tampere 
-   Finland   
-    
-   Thomas Stockhammer                Phone: +49-89-28923474 
-   Institute for Communications Eng. Email: stockhammer@ei.tum.de 
-   Munich University of Technology 
-   D-80290 Munich 
-   Germany 
-    
-   Magnus Westerlund                 Phone: +46-8-4048287 
-   Multimedia Technologies           Email:  
-   Ericsson Research EAB/TVA/A       magnus.westerlund@ericsson.com 
-   Ericsson AB 
-   Torshamsgatan 23 
-   SE-164 80 Stockholm 
-   Sweden 
-    
-   David Singer                      Phone +1 408 974-3162 
-   QuickTime Engineering             Email: singer@apple.com 
-   Apple 
-   1 Infinite Loop MS 302-3MT 
-   Cupertino 
-   CA 95014 
-   USA 
-    
-    
-18.     RFC Editor Considerations 
-    
-
- 
-Wenger et. al.          Expires December 2004               [Page 76] 
-
-Internet Draft                                              July, 2004 
- 
-   The RFC editor is requested to remove this section and Annex A 
-   before publications as a RFC.  The RFC editor is also requested to 
-   replace all occurrences of XXXX with the RFC number this document 
-   receive.     
-    
-   If available at the time of publication please do update reference 
-   33 with the assigned RFC number. 
-    
-    
-Annex A: Changes relative to draft-ietf-avt-rtp-h264-08.txt 
-    
-   [This section will be removed in a future version of this draft.] 
-    
-   This memo contains the following technical changes relative to the 
-   previous I-D: 
-    
-   o Editorial fixes as requested by the I-D review 
-   o Fixed table and figure numbering 
-   o Clarified the term network element and introduced MANE 
-     abbreviation 
-    
-    
- 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Wenger et. al.          Expires December 2004               [Page 77] 
-
-
-
diff --git a/include/mpeg4ip_version.h b/include/mpeg4ip_version.h
index 691d444b..e7524919 100644
--- a/include/mpeg4ip_version.h
+++ b/include/mpeg4ip_version.h
@@ -1,6 +1,6 @@
 #define MPEG4IP_PACKAGE "mpeg4ip"
-#define MPEG4IP_VERSION "1.5.14"
+#define MPEG4IP_VERSION "1.5.15"
 #define MPEG4IP_MAJOR_VERSION 0x1
 #define MPEG4IP_MINOR_VERSION 0x5
-#define MPEG4IP_CVS_VERSION 0x14
+#define MPEG4IP_CVS_VERSION 0x15
 #define MPEG4IP_HEX_VERSION ((MPEG4IP_MAJOR_VERSION << 16) | (MPEG4IP_MINOR_VERSION << 8) | MPEG4IP_CVS_VERSION)
diff --git a/lib/mpeg2ps/mpeg2ps.c b/lib/mpeg2ps/mpeg2ps.c
index e45ff334..08972bd0 100644
--- a/lib/mpeg2ps/mpeg2ps.c
+++ b/lib/mpeg2ps/mpeg2ps.c
@@ -1063,6 +1063,7 @@ static void get_info_from_frame (mpeg2ps_stream_t *sptr,
     sptr->samples_per_frame = MP4AV_Mp3GetHdrSamplingWindow(hdr);
     sptr->bitrate = MP4AV_Mp3GetBitRate(hdr) * 1000; // give bps, not kbps
     sptr->layer = MP4AV_Mp3GetHdrLayer(hdr);
+    sptr->version = MP4AV_Mp3GetHdrVersion(hdr);
   } else if (sptr->m_stream_id == 0xbd) {
     if (sptr->m_substream_id >= 0xa0) {
       // PCM - ???
@@ -1653,10 +1654,29 @@ const char *mpeg2ps_get_audio_stream_name (mpeg2ps_t *ps,
     return "none";
   }
   if (ps->audio_streams[streamno]->m_stream_id >= 0xc0) {
-    switch (ps->audio_streams[streamno]->layer) {
-    case 0: return "MP1";
-    case 1: return "MP2";
-    case 2: return "MP3";
+    switch (ps->audio_streams[streamno]->version) {
+    case 3:
+      switch (ps->audio_streams[streamno]->layer) {
+      case 3: return "MP1 layer 1";
+      case 2: return "MP1 layer 2";
+      case 1: return "MP1 layer 3";
+      }
+      break;
+    case 2:
+      switch (ps->audio_streams[streamno]->layer) {
+      case 3: return "MP2 layer 1";
+      case 2: return "MP2 layer 2";
+      case 1: return "MP2 layer 3";
+      }
+      break;
+    case 0:
+      switch (ps->audio_streams[streamno]->layer) {
+      case 3: return "MP2.5 layer 1";
+      case 2: return "MP2.5 layer 2";
+      case 1: return "MP2.5 layer 3";
+      }
+      break;
+      break;
     }
     return "unknown mpeg layer";
   }
diff --git a/lib/mpeg2ps/mpeg2ps_private.h b/lib/mpeg2ps/mpeg2ps_private.h
index 237d9ed2..0dadc2af 100644
--- a/lib/mpeg2ps/mpeg2ps_private.h
+++ b/lib/mpeg2ps/mpeg2ps_private.h
@@ -100,7 +100,8 @@ typedef struct mpeg2ps_stream_t
   uint32_t channels;
   uint32_t bitrate;
   uint32_t samples_per_frame;
-  uint32_t layer;
+  uint8_t layer;
+  uint8_t version;
   // video stuff
   uint32_t h, w;
   double frame_rate;
diff --git a/lib/mpeg2ps/ps_info.cpp b/lib/mpeg2ps/ps_info.cpp
index 1cbf95a6..a069bc01 100644
--- a/lib/mpeg2ps/ps_info.cpp
+++ b/lib/mpeg2ps/ps_info.cpp
@@ -123,7 +123,7 @@ int main(int argc, char** argv)
       printf("   No streams\n");
     } else {
       for (ix = 0; ix < mpeg2ps_get_audio_stream_count(ps); ix++) {
-	printf("   stream %d: %s %u channels %u sample rate %u bitrate\n",
+	printf("   stream %d: %s, %u channels %u sample rate %u bitrate\n",
 	       ix,
 	       mpeg2ps_get_audio_stream_name(ps, ix),
 	       mpeg2ps_get_audio_stream_channels(ps, ix),
diff --git a/lib/mpeg2t/mpeg2_transport.c b/lib/mpeg2t/mpeg2_transport.c
index cb3f9c6c..6cdd3dc0 100644
--- a/lib/mpeg2t/mpeg2_transport.c
+++ b/lib/mpeg2t/mpeg2_transport.c
@@ -332,6 +332,9 @@ static void create_es (mpeg2t_t *ptr,
   case 2:
     es->is_video = 1;
     break;
+  case 0x10:
+    es->is_video = 3; // mpeg4
+    break;
   case 0x1b:
     es->is_video = 2;
     break;
@@ -634,7 +637,7 @@ void mpeg2t_finished_es_work (mpeg2t_es_t *es_pid,
 {
   mpeg2t_frame_t *p;
 #if 1
-  mpeg2t_message(LOG_ERR, "pid %x pts %d "U64" listing %d", 
+  mpeg2t_message(LOG_WARNING, "pid %x pts %d "U64" listing %d", 
 		 es_pid->pid.pid, es_pid->work->have_ps_ts, 
 		 es_pid->work->ps_ts, es_pid->save_frames);
 #endif
@@ -880,6 +883,9 @@ static int mpeg2t_process_es (mpeg2t_t *ptr,
     // mpeg1/mpeg2 audio (mp3 codec)
     ret = process_mpeg2t_mpeg_audio(es_pid, esptr, buflen);
     break;
+  case 0x10:
+    ret = process_mpeg2t_mpeg4_video(es_pid, esptr, buflen);
+    break;
   case 129:
     ret = process_mpeg2t_ac3_audio(es_pid, esptr, buflen);
     break;
@@ -1164,6 +1170,9 @@ int mpeg2t_write_stream_info (mpeg2t_es_t *es_pid,
     // mpeg1/mpeg2 audio (mp3 codec)
     ret = mpeg2t_mpeg_audio_info(es_pid, buffer, buflen);
     break;
+  case 0x10:
+    ret = mpeg2t_mpeg4_video_info(es_pid, buffer, buflen);
+    break;
   case 129:
     ret = mpeg2t_ac3_audio_info(es_pid, buffer, buflen);
     break;
diff --git a/lib/mpeg2t/mpeg2t_private.h b/lib/mpeg2t/mpeg2t_private.h
index 2ada3506..d5f0c88d 100644
--- a/lib/mpeg2t/mpeg2t_private.h
+++ b/lib/mpeg2t/mpeg2t_private.h
@@ -41,7 +41,10 @@ int process_mpeg2t_h264_video(mpeg2t_es_t *es_pid,
 			      const uint8_t *esptr, 
 			      uint32_t buflen);
 int mpeg2t_h264_video_info(mpeg2t_es_t *es_pid, char *buffer, size_t buflen);
-
+int process_mpeg2t_mpeg4_video(mpeg2t_es_t *es_pid, 
+			       const uint8_t *esptr, 
+			       uint32_t buflen);
+int mpeg2t_mpeg4_video_info(mpeg2t_es_t *es_pid, char *buffer, size_t buflen);
 
 
 void mpeg2t_malloc_es_work(mpeg2t_es_t *es_pid, uint32_t frame_len);
diff --git a/lib/mpeg2t/mpeg2t_video.c b/lib/mpeg2t/mpeg2t_video.c
index 57e63685..d66571cf 100644
--- a/lib/mpeg2t/mpeg2t_video.c
+++ b/lib/mpeg2t/mpeg2t_video.c
@@ -422,3 +422,175 @@ int process_mpeg2t_h264_video (mpeg2t_es_t *es_pid,
   return framesfinished;
 
 }
+
+int mpeg2t_mpeg4_video_info (mpeg2t_es_t *es_pid, char *buffer, size_t len)
+{
+  int offset;
+  if (es_pid->info_loaded == 0) return -1;
+  offset = snprintf(buffer, len, "Mpeg-4 Video, %d x %d",
+		    es_pid->w, es_pid->h);
+  return 0;
+}
+
+int process_mpeg2t_mpeg4_video (mpeg2t_es_t *es_pid, 
+				const uint8_t *esptr, 
+				uint32_t buflen)
+{
+
+  bool have_header = false;
+  uint8_t header_value = 0;
+  int framesfinished = 0;
+#if 0
+  mpeg2t_message(LOG_DEBUG, "enter mpeg4 process");
+  if (es_pid->peshdr_loaded != 0 && ((es_pid->stream_id & 0xf0) != 0xe0)) {
+    mpeg2t_message(LOG_ERR, "Video stream PID %x with bad stream_id %x", 
+		   es_pid->pid.pid,
+		   es_pid->stream_id);
+    return 0;
+  }
+#endif
+  // note - one thing that we're not handling correctly is the
+  // extra 0 byte before the access header.  That's okay for now, but
+  // may run into problems later.
+  while (buflen > 0) {
+    es_pid->header <<= 8;
+    es_pid->header |= *esptr;
+    have_header = false;
+    if ((es_pid->header & 0xffffff00) == 0x00000100) {
+      have_header = true;
+      header_value = es_pid->header;
+      mpeg2t_message(LOG_DEBUG, "header %x %x %d", es_pid->header, 
+		     header_value,
+		     es_pid->work_state);
+    }
+
+    switch (es_pid->work_state) {
+    case 0:
+      if (have_header == false) break;
+      /*
+       * Work state 0 - looking for any header
+       */
+      // have a header.
+      if (es_pid->work_max_size < 4096) es_pid->work_max_size = 4096;
+
+      // always do this in state 0 to get the psts at the start
+      mpeg2t_malloc_es_work(es_pid, es_pid->work_max_size);
+      if (es_pid->work == NULL) return framesfinished;
+      
+      // Store header
+      es_pid->work->flags = 0;
+      es_pid->work->frame[0] = 0;
+      es_pid->work->frame[1] = 0;
+      es_pid->work->frame[2] = 1;
+      es_pid->work_loaded = 3;
+      es_pid->work_state = 1; // looking for VOP
+#if 1
+      mpeg2t_message(LOG_DEBUG, "video - state 0 header %x state %d", es_pid->header, 
+		     es_pid->work_state);
+#endif
+      // fall into:
+    case 1:
+    case 2:
+      /*
+       * Work state 1 - looking for VOP
+       */
+      if (es_pid->work_loaded >= es_pid->work_max_size - 5) {
+	uint8_t *frameptr;
+	es_pid->work_max_size += 1024;
+	frameptr = 
+	  (uint8_t *)realloc(es_pid->work,
+			     sizeof(mpeg2t_frame_t) + 
+			     es_pid->work_max_size);
+
+	if (frameptr == NULL) {
+	  es_pid->work = NULL;
+	  es_pid->work_state = 0;
+	  es_pid->header = 0;
+	  buflen--;
+	  esptr++;
+	  break;
+	} else {
+	  es_pid->work = (mpeg2t_frame_t *)frameptr;
+	  frameptr += sizeof(mpeg2t_frame_t);
+	  es_pid->work->frame = frameptr;
+	}
+      }
+	
+      es_pid->work->frame[es_pid->work_loaded] = *esptr;
+      es_pid->work_loaded++;
+
+      if (have_header) {
+	if (es_pid->work_state == 2) {
+	  // we're finished with this frame
+	  es_pid->work->frame_type = 1;
+	  //printf("hi %d\n", es_pid->work->frame_type);
+
+	  mpeg2t_message(LOG_DEBUG, "finished work %d", es_pid->work_loaded);
+	  // -4 might have to be -5 in the case of zero byte
+	  mpeg2t_finished_es_work(es_pid, es_pid->work_loaded - 4);
+
+	  es_pid->have_seq_header = 0;
+	  mpeg2t_malloc_es_work(es_pid, es_pid->work_max_size);
+	  if (es_pid->work != NULL) {
+	    // Put the header we just found at the start of the frame,
+	    // then set the work state accordingly.
+	    es_pid->work->frame[0] = 0;
+	    es_pid->work->frame[1] = 0;
+	    es_pid->work->frame[2] = 1;
+	    es_pid->work->frame[3] = *esptr;
+	    es_pid->work_loaded = 4;
+	    es_pid->work->flags = 0;
+	    es_pid->work_state = 1;
+	  } else {
+	    es_pid->work_state = 0;
+	    es_pid->header = 0;
+	    return framesfinished;
+	  }
+	} 
+	// now, figure out what state to do based on header
+	if (header_value == MP4AV_MPEG4_VOP_START){ 
+	  // now, we're reading until the next header
+	  es_pid->work_state = 2;
+	  if (es_pid->info_loaded == 0) {
+	    // read the VOL
+	    if ((es_pid->work->flags & HAVE_SEQ_HEADER) == HAVE_SEQ_HEADER) {
+	      u_int8_t TimeBits;
+	      u_int16_t TimeTicks;
+	      u_int16_t FrameDuration;
+	      u_int16_t FrameWidth;
+	      u_int16_t FrameHeight;
+	      
+	      if (MP4AV_Mpeg4ParseVol(es_pid->work->frame + es_pid->work->seq_header_offset,
+				      es_pid->work_loaded - es_pid->work->seq_header_offset,
+				      &TimeBits, 
+				      &TimeTicks, 
+				      &FrameDuration, 
+				      &FrameWidth,
+				      &FrameHeight, 
+				      NULL, 
+				      NULL, 
+				      NULL)) { 
+		es_pid->info_loaded = 1;
+		es_pid->h = FrameHeight;
+		es_pid->w = FrameWidth;
+	      }
+	    }
+	  } 
+	} else {
+	  // we only really care about VOL headers
+	  if (header_value >= MP4AV_MPEG4_VOL_START &&
+		header_value < MP4AV_MPEG4_VOL_START + 0xf) {
+	      // have VOL
+	      es_pid->work->seq_header_offset = es_pid->work_loaded - 4;
+	      es_pid->work->flags |= HAVE_SEQ_HEADER;
+	    }
+	}
+      }
+      break;
+    }
+    esptr++;
+    buflen--;
+  }
+  return framesfinished;
+
+}
diff --git a/lib/rtp/Makefile.am b/lib/rtp/Makefile.am
index 5e000adc..aae81474 100644
--- a/lib/rtp/Makefile.am
+++ b/lib/rtp/Makefile.am
@@ -23,9 +23,7 @@ libuclmmbase_la_SOURCES= \
 #	mbus_parser.c 
 #	crypt_random.c \
 #	md5.c \
-#	qfDES.c \
-#	rijndael-alg-fst.c \
-#	rijndael-api-fst.c 
+#	qfDES.c 
 
 EXTRA_DIST = \
 	acconfig.h \
@@ -60,8 +58,6 @@ EXTRA_DIST = \
 	net_udp.h \
 	ntp.h \
 	qfDES.h \
-	rijndael-alg-fst.h \
-	rijndael-api-fst.h \
 	rtp.h \
 	sockstorage.h \
 	util.h \
diff --git a/lib/rtp/rijndael-alg-fst.c b/lib/rtp/rijndael-alg-fst.c
deleted file mode 100644
index a1691cb6..00000000
--- a/lib/rtp/rijndael-alg-fst.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * rijndael-alg-fst.c   v2.4   April '2000
- *
- * Optimised ANSI C code
- *
- * authors: v1.0: Antoon Bosselaers
- *          v2.0: Vincent Rijmen
- *          v2.3: Paulo Barreto
- *          v2.4: Vincent Rijmen
- *
- * This code is placed in the public domain.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "rijndael-alg-fst.h"
-
-#include "boxes-fst.dat"
-
-int rijndaelKeySched(word8 k[MAXKC][4], word8 W[MAXROUNDS+1][4][4], int ROUNDS) {
-	/* Calculate the necessary round keys
-	 * The number of calculations depends on keyBits and blockBits
-	 */ 
-	int j, r, t, rconpointer = 0;
-	word8 tk[MAXKC][4];
-	int KC = ROUNDS - 6;
-
-	for (j = KC-1; j >= 0; j--) {
-		*((word32*)tk[j]) = *((word32*)k[j]);
-	}
-	r = 0;
-	t = 0;
-	/* copy values into round key array */
-	for (j = 0; (j < KC) && (r < ROUNDS + 1); ) {
-		for (; (j < KC) && (t < 4); j++, t++) {
-			*((word32*)W[r][t]) = *((word32*)tk[j]);
-		}
-		if (t == 4) {
-			r++;
-			t = 0;
-		}
-	}
-		
-	while (r < ROUNDS + 1) { /* while not enough round key material calculated */
-		/* calculate new values */
-		tk[0][0] ^= S[tk[KC-1][1]];
-		tk[0][1] ^= S[tk[KC-1][2]];
-		tk[0][2] ^= S[tk[KC-1][3]];
-		tk[0][3] ^= S[tk[KC-1][0]];
-		tk[0][0] ^= rcon[rconpointer++];
-
-		if (KC != 8) {
-			for (j = 1; j < KC; j++) {
-				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
-			}
-		} else {
-			for (j = 1; j < KC/2; j++) {
-				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
-			}
-			tk[KC/2][0] ^= S[tk[KC/2 - 1][0]];
-			tk[KC/2][1] ^= S[tk[KC/2 - 1][1]];
-			tk[KC/2][2] ^= S[tk[KC/2 - 1][2]];
-			tk[KC/2][3] ^= S[tk[KC/2 - 1][3]];
-			for (j = KC/2 + 1; j < KC; j++) {
-				*((word32*)tk[j]) ^= *((word32*)tk[j-1]);
-			}
-		}
-		/* copy values into round key array */
-		for (j = 0; (j < KC) && (r < ROUNDS + 1); ) {
-			for (; (j < KC) && (t < 4); j++, t++) {
-				*((word32*)W[r][t]) = *((word32*)tk[j]);
-			}
-			if (t == 4) {
-				r++;
-				t = 0;
-			}
-		}
-	}		
-	return 0;
-}
-
-int rijndaelKeyEncToDec(word8 W[MAXROUNDS+1][4][4], int ROUNDS) {
-	int r;
-	word8 *w;
-
-	for (r = 1; r < ROUNDS; r++) {
-		w = W[r][0];
-		*((word32*)w) =
-			  *((word32*)U1[w[0]])
-			^ *((word32*)U2[w[1]])
-			^ *((word32*)U3[w[2]])
-			^ *((word32*)U4[w[3]]);
-
-		w = W[r][1];
-		*((word32*)w) =
-			  *((word32*)U1[w[0]])
-			^ *((word32*)U2[w[1]])
-			^ *((word32*)U3[w[2]])
-			^ *((word32*)U4[w[3]]);
-
-		w = W[r][2];
-		*((word32*)w) =
-			  *((word32*)U1[w[0]])
-			^ *((word32*)U2[w[1]])
-			^ *((word32*)U3[w[2]])
-			^ *((word32*)U4[w[3]]);
-
-		w = W[r][3];
-		*((word32*)w) =
-			  *((word32*)U1[w[0]])
-			^ *((word32*)U2[w[1]])
-			^ *((word32*)U3[w[2]])
-			^ *((word32*)U4[w[3]]);
-	}
-	return 0;
-}	
-
-/**
- * Encrypt a single block. 
- */
-int rijndaelEncrypt(word8 a[16], word8 b[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS) {
-	int r;
-	word8 temp[4][4];
-
-    *((word32*)temp[0]) = *((word32*)(a   )) ^ *((word32*)rk[0][0]);
-    *((word32*)temp[1]) = *((word32*)(a+ 4)) ^ *((word32*)rk[0][1]);
-    *((word32*)temp[2]) = *((word32*)(a+ 8)) ^ *((word32*)rk[0][2]);
-    *((word32*)temp[3]) = *((word32*)(a+12)) ^ *((word32*)rk[0][3]);
-    *((word32*)(b    )) = *((word32*)T1[temp[0][0]])
-						^ *((word32*)T2[temp[1][1]])
-						^ *((word32*)T3[temp[2][2]]) 
-						^ *((word32*)T4[temp[3][3]]);
-    *((word32*)(b + 4)) = *((word32*)T1[temp[1][0]])
-						^ *((word32*)T2[temp[2][1]])
-						^ *((word32*)T3[temp[3][2]]) 
-						^ *((word32*)T4[temp[0][3]]);
-    *((word32*)(b + 8)) = *((word32*)T1[temp[2][0]])
-						^ *((word32*)T2[temp[3][1]])
-						^ *((word32*)T3[temp[0][2]]) 
-						^ *((word32*)T4[temp[1][3]]);
-    *((word32*)(b +12)) = *((word32*)T1[temp[3][0]])
-						^ *((word32*)T2[temp[0][1]])
-						^ *((word32*)T3[temp[1][2]]) 
-						^ *((word32*)T4[temp[2][3]]);
-	for (r = 1; r < ROUNDS-1; r++) {
-		*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[r][0]);
-		*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[r][1]);
-		*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[r][2]);
-		*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[r][3]);
-
-		*((word32*)(b    )) = *((word32*)T1[temp[0][0]])
-							^ *((word32*)T2[temp[1][1]])
-							^ *((word32*)T3[temp[2][2]]) 
-							^ *((word32*)T4[temp[3][3]]);
-		*((word32*)(b + 4)) = *((word32*)T1[temp[1][0]])
-							^ *((word32*)T2[temp[2][1]])
-							^ *((word32*)T3[temp[3][2]]) 
-							^ *((word32*)T4[temp[0][3]]);
-		*((word32*)(b + 8)) = *((word32*)T1[temp[2][0]])
-							^ *((word32*)T2[temp[3][1]])
-							^ *((word32*)T3[temp[0][2]]) 
-							^ *((word32*)T4[temp[1][3]]);
-		*((word32*)(b +12)) = *((word32*)T1[temp[3][0]])
-							^ *((word32*)T2[temp[0][1]])
-							^ *((word32*)T3[temp[1][2]]) 
-							^ *((word32*)T4[temp[2][3]]);
-	}
-	/* last round is special */   
-	*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[ROUNDS-1][0]);
-	*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[ROUNDS-1][1]);
-	*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[ROUNDS-1][2]);
-	*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[ROUNDS-1][3]);
-	b[ 0] = T1[temp[0][0]][1];
-	b[ 1] = T1[temp[1][1]][1];
-	b[ 2] = T1[temp[2][2]][1];
-	b[ 3] = T1[temp[3][3]][1];
-	b[ 4] = T1[temp[1][0]][1];
-	b[ 5] = T1[temp[2][1]][1];
-	b[ 6] = T1[temp[3][2]][1];
-	b[ 7] = T1[temp[0][3]][1];
-	b[ 8] = T1[temp[2][0]][1];
-	b[ 9] = T1[temp[3][1]][1];
-	b[10] = T1[temp[0][2]][1];
-	b[11] = T1[temp[1][3]][1];
-	b[12] = T1[temp[3][0]][1];
-	b[13] = T1[temp[0][1]][1];
-	b[14] = T1[temp[1][2]][1];
-	b[15] = T1[temp[2][3]][1];
-	*((word32*)(b   )) ^= *((word32*)rk[ROUNDS][0]);
-	*((word32*)(b+ 4)) ^= *((word32*)rk[ROUNDS][1]);
-	*((word32*)(b+ 8)) ^= *((word32*)rk[ROUNDS][2]);
-	*((word32*)(b+12)) ^= *((word32*)rk[ROUNDS][3]);
-
-	return 0;
-}
-
-#ifdef INTERMEDIATE_VALUE_KAT
-/**
- * Encrypt only a certain number of rounds.
- * Only used in the Intermediate Value Known Answer Test.
- */
-int rijndaelEncryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds) {
-	int r;
-	word8 temp[4][4];
-
-	/* make number of rounds sane */
-	if (rounds > ROUNDS) {
-		rounds = ROUNDS;
-	}
-
-	*((word32*)a[0]) = *((word32*)a[0]) ^ *((word32*)rk[0][0]);
-	*((word32*)a[1]) = *((word32*)a[1]) ^ *((word32*)rk[0][1]);
-	*((word32*)a[2]) = *((word32*)a[2]) ^ *((word32*)rk[0][2]);
-	*((word32*)a[3]) = *((word32*)a[3]) ^ *((word32*)rk[0][3]);
-
-	for (r = 1; (r <= rounds) && (r < ROUNDS); r++) {
-		*((word32*)temp[0]) = *((word32*)T1[a[0][0]])
-           ^ *((word32*)T2[a[1][1]])
-           ^ *((word32*)T3[a[2][2]]) 
-           ^ *((word32*)T4[a[3][3]]);
-		*((word32*)temp[1]) = *((word32*)T1[a[1][0]])
-           ^ *((word32*)T2[a[2][1]])
-           ^ *((word32*)T3[a[3][2]]) 
-           ^ *((word32*)T4[a[0][3]]);
-		*((word32*)temp[2]) = *((word32*)T1[a[2][0]])
-           ^ *((word32*)T2[a[3][1]])
-           ^ *((word32*)T3[a[0][2]]) 
-           ^ *((word32*)T4[a[1][3]]);
-		*((word32*)temp[3]) = *((word32*)T1[a[3][0]])
-           ^ *((word32*)T2[a[0][1]])
-           ^ *((word32*)T3[a[1][2]]) 
-           ^ *((word32*)T4[a[2][3]]);
-		*((word32*)a[0]) = *((word32*)temp[0]) ^ *((word32*)rk[r][0]);
-		*((word32*)a[1]) = *((word32*)temp[1]) ^ *((word32*)rk[r][1]);
-		*((word32*)a[2]) = *((word32*)temp[2]) ^ *((word32*)rk[r][2]);
-		*((word32*)a[3]) = *((word32*)temp[3]) ^ *((word32*)rk[r][3]);
-	}
-	if (rounds == ROUNDS) {
-	   	/* last round is special */   
-	   	temp[0][0] = T1[a[0][0]][1];
-	   	temp[0][1] = T1[a[1][1]][1];
-	   	temp[0][2] = T1[a[2][2]][1]; 
-	   	temp[0][3] = T1[a[3][3]][1];
-	   	temp[1][0] = T1[a[1][0]][1];
-	   	temp[1][1] = T1[a[2][1]][1];
-	   	temp[1][2] = T1[a[3][2]][1]; 
-	   	temp[1][3] = T1[a[0][3]][1];
-	   	temp[2][0] = T1[a[2][0]][1];
-	   	temp[2][1] = T1[a[3][1]][1];
-	   	temp[2][2] = T1[a[0][2]][1]; 
-	   	temp[2][3] = T1[a[1][3]][1];
-	   	temp[3][0] = T1[a[3][0]][1];
-	   	temp[3][1] = T1[a[0][1]][1];
-	   	temp[3][2] = T1[a[1][2]][1]; 
-	   	temp[3][3] = T1[a[2][3]][1];
-		*((word32*)a[0]) = *((word32*)temp[0]) ^ *((word32*)rk[ROUNDS][0]);
-		*((word32*)a[1]) = *((word32*)temp[1]) ^ *((word32*)rk[ROUNDS][1]);
-		*((word32*)a[2]) = *((word32*)temp[2]) ^ *((word32*)rk[ROUNDS][2]);
-		*((word32*)a[3]) = *((word32*)temp[3]) ^ *((word32*)rk[ROUNDS][3]);
-	}
-
-	return 0;
-}   
-#endif /* INTERMEDIATE_VALUE_KAT */
-
-/**
- * Decrypt a single block.
- */
-int rijndaelDecrypt(word8 a[16], word8 b[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS) {
-	int r;
-	word8 temp[4][4];
-	
-    *((word32*)temp[0]) = *((word32*)(a   )) ^ *((word32*)rk[ROUNDS][0]);
-    *((word32*)temp[1]) = *((word32*)(a+ 4)) ^ *((word32*)rk[ROUNDS][1]);
-    *((word32*)temp[2]) = *((word32*)(a+ 8)) ^ *((word32*)rk[ROUNDS][2]);
-    *((word32*)temp[3]) = *((word32*)(a+12)) ^ *((word32*)rk[ROUNDS][3]);
-
-    *((word32*)(b   )) = *((word32*)T5[temp[0][0]])
-           ^ *((word32*)T6[temp[3][1]])
-           ^ *((word32*)T7[temp[2][2]]) 
-           ^ *((word32*)T8[temp[1][3]]);
-	*((word32*)(b+ 4)) = *((word32*)T5[temp[1][0]])
-           ^ *((word32*)T6[temp[0][1]])
-           ^ *((word32*)T7[temp[3][2]]) 
-           ^ *((word32*)T8[temp[2][3]]);
-	*((word32*)(b+ 8)) = *((word32*)T5[temp[2][0]])
-           ^ *((word32*)T6[temp[1][1]])
-           ^ *((word32*)T7[temp[0][2]]) 
-           ^ *((word32*)T8[temp[3][3]]);
-	*((word32*)(b+12)) = *((word32*)T5[temp[3][0]])
-           ^ *((word32*)T6[temp[2][1]])
-           ^ *((word32*)T7[temp[1][2]]) 
-           ^ *((word32*)T8[temp[0][3]]);
-	for (r = ROUNDS-1; r > 1; r--) {
-		*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[r][0]);
-		*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[r][1]);
-		*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[r][2]);
-		*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[r][3]);
-		*((word32*)(b   )) = *((word32*)T5[temp[0][0]])
-           ^ *((word32*)T6[temp[3][1]])
-           ^ *((word32*)T7[temp[2][2]]) 
-           ^ *((word32*)T8[temp[1][3]]);
-		*((word32*)(b+ 4)) = *((word32*)T5[temp[1][0]])
-           ^ *((word32*)T6[temp[0][1]])
-           ^ *((word32*)T7[temp[3][2]]) 
-           ^ *((word32*)T8[temp[2][3]]);
-		*((word32*)(b+ 8)) = *((word32*)T5[temp[2][0]])
-           ^ *((word32*)T6[temp[1][1]])
-           ^ *((word32*)T7[temp[0][2]]) 
-           ^ *((word32*)T8[temp[3][3]]);
-		*((word32*)(b+12)) = *((word32*)T5[temp[3][0]])
-           ^ *((word32*)T6[temp[2][1]])
-           ^ *((word32*)T7[temp[1][2]]) 
-           ^ *((word32*)T8[temp[0][3]]);
-	}
-	/* last round is special */   
-	*((word32*)temp[0]) = *((word32*)(b   )) ^ *((word32*)rk[1][0]);
-	*((word32*)temp[1]) = *((word32*)(b+ 4)) ^ *((word32*)rk[1][1]);
-	*((word32*)temp[2]) = *((word32*)(b+ 8)) ^ *((word32*)rk[1][2]);
-	*((word32*)temp[3]) = *((word32*)(b+12)) ^ *((word32*)rk[1][3]);
-	b[ 0] = S5[temp[0][0]];
-	b[ 1] = S5[temp[3][1]];
-	b[ 2] = S5[temp[2][2]];
-	b[ 3] = S5[temp[1][3]];
-	b[ 4] = S5[temp[1][0]];
-	b[ 5] = S5[temp[0][1]];
-	b[ 6] = S5[temp[3][2]];
-	b[ 7] = S5[temp[2][3]];
-	b[ 8] = S5[temp[2][0]];
-	b[ 9] = S5[temp[1][1]];
-	b[10] = S5[temp[0][2]];
-	b[11] = S5[temp[3][3]];
-	b[12] = S5[temp[3][0]];
-	b[13] = S5[temp[2][1]];
-	b[14] = S5[temp[1][2]];
-	b[15] = S5[temp[0][3]];
-	*((word32*)(b   )) ^= *((word32*)rk[0][0]);
-	*((word32*)(b+ 4)) ^= *((word32*)rk[0][1]);
-	*((word32*)(b+ 8)) ^= *((word32*)rk[0][2]);
-	*((word32*)(b+12)) ^= *((word32*)rk[0][3]);
-
-	return 0;
-}
-
-#ifdef INTERMEDIATE_VALUE_KAT
-/**
- * Decrypt only a certain number of rounds.
- * Only used in the Intermediate Value Known Answer Test.
- * Operations rearranged such that the intermediate values
- * of decryption correspond with the intermediate values
- * of encryption.
- */
-int rijndaelDecryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds) {
-	int r, i;
-	word8 temp[4], shift;
-
-	/* make number of rounds sane */
-	if (rounds > ROUNDS) {
-		rounds = ROUNDS;
-	}
-    /* first round is special: */
-	*(word32 *)a[0] ^= *(word32 *)rk[ROUNDS][0];
-	*(word32 *)a[1] ^= *(word32 *)rk[ROUNDS][1];
-	*(word32 *)a[2] ^= *(word32 *)rk[ROUNDS][2];
-	*(word32 *)a[3] ^= *(word32 *)rk[ROUNDS][3];
-	for (i = 0; i < 4; i++) {
-		a[i][0] = Si[a[i][0]];
-		a[i][1] = Si[a[i][1]];
-		a[i][2] = Si[a[i][2]];
-		a[i][3] = Si[a[i][3]];
-	}
-	for (i = 1; i < 4; i++) {
-		shift = (4 - i) & 3;
-		temp[0] = a[(0 + shift) & 3][i];
-		temp[1] = a[(1 + shift) & 3][i];
-		temp[2] = a[(2 + shift) & 3][i];
-		temp[3] = a[(3 + shift) & 3][i];
-		a[0][i] = temp[0];
-		a[1][i] = temp[1];
-		a[2][i] = temp[2];
-		a[3][i] = temp[3];
-	}
-	/* ROUNDS-1 ordinary rounds */
-	for (r = ROUNDS-1; r > rounds; r--) {
-		*(word32 *)a[0] ^= *(word32 *)rk[r][0];
-		*(word32 *)a[1] ^= *(word32 *)rk[r][1];
-		*(word32 *)a[2] ^= *(word32 *)rk[r][2];
-		*(word32 *)a[3] ^= *(word32 *)rk[r][3];
-
-		*((word32*)a[0]) =
-			  *((word32*)U1[a[0][0]])
-			^ *((word32*)U2[a[0][1]])
-			^ *((word32*)U3[a[0][2]])
-			^ *((word32*)U4[a[0][3]]);
-
-		*((word32*)a[1]) =
-			  *((word32*)U1[a[1][0]])
-			^ *((word32*)U2[a[1][1]])
-			^ *((word32*)U3[a[1][2]])
-			^ *((word32*)U4[a[1][3]]);
-
-		*((word32*)a[2]) =
-			  *((word32*)U1[a[2][0]])
-			^ *((word32*)U2[a[2][1]])
-			^ *((word32*)U3[a[2][2]])
-			^ *((word32*)U4[a[2][3]]);
-
-		*((word32*)a[3]) =
-			  *((word32*)U1[a[3][0]])
-			^ *((word32*)U2[a[3][1]])
-			^ *((word32*)U3[a[3][2]])
-			^ *((word32*)U4[a[3][3]]);
-		for (i = 0; i < 4; i++) {
-			a[i][0] = Si[a[i][0]];
-			a[i][1] = Si[a[i][1]];
-			a[i][2] = Si[a[i][2]];
-			a[i][3] = Si[a[i][3]];
-		}
-		for (i = 1; i < 4; i++) {
-			shift = (4 - i) & 3;
-			temp[0] = a[(0 + shift) & 3][i];
-			temp[1] = a[(1 + shift) & 3][i];
-			temp[2] = a[(2 + shift) & 3][i];
-			temp[3] = a[(3 + shift) & 3][i];
-			a[0][i] = temp[0];
-			a[1][i] = temp[1];
-			a[2][i] = temp[2];
-			a[3][i] = temp[3];
-		}
-	}
-	if (rounds == 0) {
-		/* End with the extra key addition */	
-		*(word32 *)a[0] ^= *(word32 *)rk[0][0];
-		*(word32 *)a[1] ^= *(word32 *)rk[0][1];
-		*(word32 *)a[2] ^= *(word32 *)rk[0][2];
-		*(word32 *)a[3] ^= *(word32 *)rk[0][3];
-	}    
-	return 0;
-}
-#endif /* INTERMEDIATE_VALUE_KAT */
diff --git a/lib/rtp/rijndael-alg-fst.h b/lib/rtp/rijndael-alg-fst.h
deleted file mode 100644
index 45ec6925..00000000
--- a/lib/rtp/rijndael-alg-fst.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * rijndael-alg-fst.h   v2.4   April '2000
- *
- * Optimised ANSI C code
- *
- * #define INTERMEDIATE_VALUE_KAT to generate the Intermediate Value Known Answer Test.
- */
-
-#ifndef __RIJNDAEL_ALG_FST_H
-#define __RIJNDAEL_ALG_FST_H
-
-#define BINARY_KEY_MATERIAL
-#define MAXKC			(256/32)
-#define MAXROUNDS		14
-
-#ifndef USUAL_TYPES
-#define USUAL_TYPES
-typedef unsigned char	byte;
-typedef unsigned char	word8;	
-typedef unsigned short	word16;	
-typedef unsigned int	word32;
-#endif /* USUAL_TYPES */
-
-int rijndaelKeySched(word8 k[MAXKC][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS);
-
-int rijndaelKeyEncToDec(word8 W[MAXROUNDS+1][4][4], int ROUNDS);
-
-int rijndaelEncrypt(word8 a[16], word8 b[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS);
-
-#ifdef INTERMEDIATE_VALUE_KAT
-int rijndaelEncryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds);
-#endif /* INTERMEDIATE_VALUE_KAT */
-
-int rijndaelDecrypt(word8 a[16], word8 b[16], word8 rk[MAXROUNDS+1][4][4], int ROUNDS);
-
-#ifdef INTERMEDIATE_VALUE_KAT
-int rijndaelDecryptRound(word8 a[4][4], word8 rk[MAXROUNDS+1][4][4], int ROUNDS, int rounds);
-#endif /* INTERMEDIATE_VALUE_KAT */
-
-#endif /* __RIJNDAEL_ALG_FST_H */
diff --git a/lib/rtp/rijndael-api-fst.c b/lib/rtp/rijndael-api-fst.c
deleted file mode 100644
index 4f2507a4..00000000
--- a/lib/rtp/rijndael-api-fst.c
+++ /dev/null
@@ -1,498 +0,0 @@
-/*
- * rijndael-api-fst.c   v2.4   April '2000
- *
- * Optimised ANSI C code
- *
- * authors: v1.0: Antoon Bosselaers
- *          v2.0: Vincent Rijmen
- *          v2.1: Vincent Rijmen
- *          v2.2: Vincent Rijmen
- *          v2.3: Paulo Barreto
- *          v2.4: Vincent Rijmen
- *
- * This code is placed in the public domain.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "rijndael-alg-fst.h"
-#include "rijndael-api-fst.h"
-
-int makeKey(keyInstance *key, BYTE direction, int keyLen, char *keyMaterial) {
-	word8 k[MAXKC][4];
-	int i;
-	char *keyMat;
-	
-	if (key == NULL) {
-		return BAD_KEY_INSTANCE;
-	}
-
-	if ((direction == DIR_ENCRYPT) || (direction == DIR_DECRYPT)) {
-		key->direction = direction;
-	} else {
-		return BAD_KEY_DIR;
-	}
-
-	if ((keyLen == 128) || (keyLen == 192) || (keyLen == 256)) { 
-		key->keyLen = keyLen;
-	} else {
-		return BAD_KEY_MAT;
-	}
-
-	if (keyMaterial != NULL) {
-		strncpy(key->keyMaterial, keyMaterial, keyLen/4);
-	}
-
-	key->ROUNDS = keyLen/32 + 6;
-
-	/* initialize key schedule: */
-	keyMat = key->keyMaterial;
-#ifndef BINARY_KEY_MATERIAL
- 	for (i = 0; i < key->keyLen/8; i++) {
-		int t, j;
-
-		t = *keyMat++;
-		if ((t >= '0') && (t <= '9')) j = (t - '0') << 4;
-		else if ((t >= 'a') && (t <= 'f')) j = (t - 'a' + 10) << 4; 
-		else if ((t >= 'A') && (t <= 'F')) j = (t - 'A' + 10) << 4; 
-		else return BAD_KEY_MAT;
-		
-		t = *keyMat++;
-		if ((t >= '0') && (t <= '9')) j ^= (t - '0');
-		else if ((t >= 'a') && (t <= 'f')) j ^= (t - 'a' + 10); 
-		else if ((t >= 'A') && (t <= 'F')) j ^= (t - 'A' + 10); 
-		else return BAD_KEY_MAT;
-		
-		k[i >> 2][i & 3] = (word8)j; 
-	}
-#else
-	for (i = 0; i < key->keyLen/8; i++) {
-		k[i >> 2][i & 3] = (word8)keyMat[i]; 
-	}
-#endif /* ?BINARY_KEY_MATERIAL */
-	rijndaelKeySched(k, key->keySched, key->ROUNDS);
-	if (direction == DIR_DECRYPT) {
-		rijndaelKeyEncToDec(key->keySched, key->ROUNDS);
-	}
-
-	return TRUE;
-}
-
-int cipherInit(cipherInstance *cipher, BYTE mode, char *IV) {
-	if ((mode == MODE_ECB) || (mode == MODE_CBC) || (mode == MODE_CFB1)) {
-		cipher->mode = mode;
-	} else {
-		return BAD_CIPHER_MODE;
-	}
-	if (IV != NULL) {
-#ifndef BINARY_KEY_MATERIAL
-		int i;
- 		for (i = 0; i < MAX_IV_SIZE; i++) {
-			int t, j;
-
-			t = IV[2*i];
-			if ((t >= '0') && (t <= '9')) j = (t - '0') << 4;
-			else if ((t >= 'a') && (t <= 'f')) j = (t - 'a' + 10) << 4; 
-			else if ((t >= 'A') && (t <= 'F')) j = (t - 'A' + 10) << 4; 
-			else return BAD_CIPHER_INSTANCE;
-		
-			t = IV[2*i+1];
-			if ((t >= '0') && (t <= '9')) j ^= (t - '0');
-			else if ((t >= 'a') && (t <= 'f')) j ^= (t - 'a' + 10); 
-			else if ((t >= 'A') && (t <= 'F')) j ^= (t - 'A' + 10); 
-			else return BAD_CIPHER_INSTANCE;
-			
-			cipher->IV[i] = (word8)j;
-		}
-#else
-		memcpy(cipher->IV, IV, MAX_IV_SIZE);
-#endif /* ?BINARY_KEY_MATERIAL */
-	} else {
-		memset(cipher->IV, 0, MAX_IV_SIZE);
-	}
-	return TRUE;
-}
-
-int blockEncrypt(cipherInstance *cipher, keyInstance *key,
-		BYTE *input, int inputLen, BYTE *outBuffer) {
-	int i, k, numBlocks;
-	word8 block[16], iv[4][4];
-
-	if (cipher == NULL ||
-		key == NULL ||
-		key->direction == DIR_DECRYPT) {
-		return BAD_CIPHER_STATE;
-	}
-	if (input == NULL || inputLen <= 0) {
-		return 0; /* nothing to do */
-	}
-
-	numBlocks = inputLen/128;
-	
-	switch (cipher->mode) {
-	case MODE_ECB: 
-		for (i = numBlocks; i > 0; i--) {
-			rijndaelEncrypt(input, outBuffer, key->keySched, key->ROUNDS);
-			input += 16;
-			outBuffer += 16;
-		}
-		break;
-		
-	case MODE_CBC:
-		((word32*)block)[0] = ((word32*)cipher->IV)[0] ^ ((word32*)input)[0];
-		((word32*)block)[1] = ((word32*)cipher->IV)[1] ^ ((word32*)input)[1];
-		((word32*)block)[2] = ((word32*)cipher->IV)[2] ^ ((word32*)input)[2];
-		((word32*)block)[3] = ((word32*)cipher->IV)[3] ^ ((word32*)input)[3];
-		rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS);
-		input += 16;
-		for (i = numBlocks - 1; i > 0; i--) {
-			((word32*)block)[0] = ((word32*)outBuffer)[0] ^ ((word32*)input)[0];
-			((word32*)block)[1] = ((word32*)outBuffer)[1] ^ ((word32*)input)[1];
-			((word32*)block)[2] = ((word32*)outBuffer)[2] ^ ((word32*)input)[2];
-			((word32*)block)[3] = ((word32*)outBuffer)[3] ^ ((word32*)input)[3];
-			outBuffer += 16;
-			rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS);
-			input += 16;
-		}
-		break;
-	
-	case MODE_CFB1:
-#if STRICT_ALIGN 
-		memcpy(iv, cipher->IV, 16); 
-#else  /* !STRICT_ALIGN */
-		*((word32*)iv[0]) = *((word32*)(cipher->IV   ));
-		*((word32*)iv[1]) = *((word32*)(cipher->IV+ 4));
-		*((word32*)iv[2]) = *((word32*)(cipher->IV+ 8));
-		*((word32*)iv[3]) = *((word32*)(cipher->IV+12));
-#endif /* ?STRICT_ALIGN */
-		for (i = numBlocks; i > 0; i--) {
-			for (k = 0; k < 128; k++) {
-				*((word32*) block    ) = *((word32*)iv[0]);
-				*((word32*)(block+ 4)) = *((word32*)iv[1]);
-				*((word32*)(block+ 8)) = *((word32*)iv[2]);
-				*((word32*)(block+12)) = *((word32*)iv[3]);
-				rijndaelEncrypt(block, block, key->keySched, key->ROUNDS);
-				outBuffer[k/8] ^= (block[0] & 0x80) >> (k & 7);
-				iv[0][0] = (iv[0][0] << 1) | (iv[0][1] >> 7);
-				iv[0][1] = (iv[0][1] << 1) | (iv[0][2] >> 7);
-				iv[0][2] = (iv[0][2] << 1) | (iv[0][3] >> 7);
-				iv[0][3] = (iv[0][3] << 1) | (iv[1][0] >> 7);
-				iv[1][0] = (iv[1][0] << 1) | (iv[1][1] >> 7);
-				iv[1][1] = (iv[1][1] << 1) | (iv[1][2] >> 7);
-				iv[1][2] = (iv[1][2] << 1) | (iv[1][3] >> 7);
-				iv[1][3] = (iv[1][3] << 1) | (iv[2][0] >> 7);
-				iv[2][0] = (iv[2][0] << 1) | (iv[2][1] >> 7);
-				iv[2][1] = (iv[2][1] << 1) | (iv[2][2] >> 7);
-				iv[2][2] = (iv[2][2] << 1) | (iv[2][3] >> 7);
-				iv[2][3] = (iv[2][3] << 1) | (iv[3][0] >> 7);
-				iv[3][0] = (iv[3][0] << 1) | (iv[3][1] >> 7);
-				iv[3][1] = (iv[3][1] << 1) | (iv[3][2] >> 7);
-				iv[3][2] = (iv[3][2] << 1) | (iv[3][3] >> 7);
-				iv[3][3] = (iv[3][3] << 1) | ((outBuffer[k/8] >> (7-(k&7))) & 1);
-			}
-		}
-		break;
-	
-	default:
-		return BAD_CIPHER_STATE;
-	}
-	
-	return 128*numBlocks;
-}
-
-/**
- * Encrypt data partitioned in octets, using RFC 2040-like padding.
- *
- * @param   input           data to be encrypted (octet sequence)
- * @param   inputOctets		input length in octets (not bits)
- * @param   outBuffer       encrypted output data
- *
- * @return	length in octets (not bits) of the encrypted output buffer.
- */
-int padEncrypt(cipherInstance *cipher, keyInstance *key,
-		BYTE *input, int inputOctets, BYTE *outBuffer) {
-	int i, numBlocks, padLen;
-	word8 block[16], *iv;
-
-	if (cipher == NULL ||
-		key == NULL ||
-		key->direction == DIR_DECRYPT) {
-		return BAD_CIPHER_STATE;
-	}
-	if (input == NULL || inputOctets <= 0) {
-		return 0; /* nothing to do */
-	}
-
-	numBlocks = inputOctets/16;
-
-	switch (cipher->mode) {
-	case MODE_ECB: 
-		for (i = numBlocks; i > 0; i--) {
-			rijndaelEncrypt(input, outBuffer, key->keySched, key->ROUNDS);
-			input += 16;
-			outBuffer += 16;
-		}
-		padLen = 16 - (inputOctets - 16*numBlocks);
-		assert(padLen > 0 && padLen <= 16);
-		memcpy(block, input, 16 - padLen);
-		memset(block + 16 - padLen, padLen, padLen);
-		rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS);
-		break;
-
-	case MODE_CBC:
-		iv = cipher->IV;
-		for (i = numBlocks; i > 0; i--) {
-			((word32*)block)[0] = ((word32*)input)[0] ^ ((word32*)iv)[0];
-			((word32*)block)[1] = ((word32*)input)[1] ^ ((word32*)iv)[1];
-			((word32*)block)[2] = ((word32*)input)[2] ^ ((word32*)iv)[2];
-			((word32*)block)[3] = ((word32*)input)[3] ^ ((word32*)iv)[3];
-			rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS);
-			iv = outBuffer;
-			input += 16;
-			outBuffer += 16;
-		}
-		padLen = 16 - (inputOctets - 16*numBlocks);
-		assert(padLen > 0 && padLen <= 16);
-		for (i = 0; i < 16 - padLen; i++) {
-			block[i] = input[i] ^ iv[i];
-		}
-		for (i = 16 - padLen; i < 16; i++) {
-			block[i] = (BYTE)padLen ^ iv[i];
-		}
-		rijndaelEncrypt(block, outBuffer, key->keySched, key->ROUNDS);
-		break;
-
-	default:
-		return BAD_CIPHER_STATE;
-	}
-
-	return 16*(numBlocks + 1);
-}
-
-int blockDecrypt(cipherInstance *cipher, keyInstance *key,
-		BYTE *input, int inputLen, BYTE *outBuffer) {
-	int i, k, numBlocks;
-	word8 block[16], iv[4][4];
-
-	if (cipher == NULL ||
-		key == NULL ||
-		(cipher->mode != MODE_CFB1 && key->direction == DIR_ENCRYPT)) {
-		return BAD_CIPHER_STATE;
-	}
-	if (input == NULL || inputLen <= 0) {
-		return 0; /* nothing to do */
-	}
-
-	numBlocks = inputLen/128;
-
-	switch (cipher->mode) {
-	case MODE_ECB: 
-		for (i = numBlocks; i > 0; i--) { 
-			rijndaelDecrypt(input, outBuffer, key->keySched, key->ROUNDS);
-			input += 16;
-			outBuffer += 16;
-		}
-		break;
-		
-	case MODE_CBC:
-#if STRICT_ALIGN 
-		memcpy(iv, cipher->IV, 16); 
-#else
-		*((word32*)iv[0]) = *((word32*)(cipher->IV   ));
-		*((word32*)iv[1]) = *((word32*)(cipher->IV+ 4));
-		*((word32*)iv[2]) = *((word32*)(cipher->IV+ 8));
-		*((word32*)iv[3]) = *((word32*)(cipher->IV+12));
-#endif
-		for (i = numBlocks; i > 0; i--) {
-			rijndaelDecrypt(input, block, key->keySched, key->ROUNDS);
-			((word32*)block)[0] ^= *((word32*)iv[0]);
-			((word32*)block)[1] ^= *((word32*)iv[1]);
-			((word32*)block)[2] ^= *((word32*)iv[2]);
-			((word32*)block)[3] ^= *((word32*)iv[3]);
-#if STRICT_ALIGN
-			memcpy(iv, input, 16);
-			memcpy(outBuf, block, 16);
-#else
-			*((word32*)iv[0]) = ((word32*)input)[0]; ((word32*)outBuffer)[0] = ((word32*)block)[0];
-			*((word32*)iv[1]) = ((word32*)input)[1]; ((word32*)outBuffer)[1] = ((word32*)block)[1];
-			*((word32*)iv[2]) = ((word32*)input)[2]; ((word32*)outBuffer)[2] = ((word32*)block)[2];
-			*((word32*)iv[3]) = ((word32*)input)[3]; ((word32*)outBuffer)[3] = ((word32*)block)[3];
-#endif
-			input += 16;
-			outBuffer += 16;
-		}
-		break;
-	
-	case MODE_CFB1:
-#if STRICT_ALIGN 
-		memcpy(iv, cipher->IV, 16); 
-#else
-		*((word32*)iv[0]) = *((word32*)(cipher->IV));
-		*((word32*)iv[1]) = *((word32*)(cipher->IV+ 4));
-		*((word32*)iv[2]) = *((word32*)(cipher->IV+ 8));
-		*((word32*)iv[3]) = *((word32*)(cipher->IV+12));
-#endif
-		for (i = numBlocks; i > 0; i--) {
-			for (k = 0; k < 128; k++) {
-				*((word32*) block    ) = *((word32*)iv[0]);
-				*((word32*)(block+ 4)) = *((word32*)iv[1]);
-				*((word32*)(block+ 8)) = *((word32*)iv[2]);
-				*((word32*)(block+12)) = *((word32*)iv[3]);
-				rijndaelEncrypt(block, block, key->keySched, key->ROUNDS);
-				iv[0][0] = (iv[0][0] << 1) | (iv[0][1] >> 7);
-				iv[0][1] = (iv[0][1] << 1) | (iv[0][2] >> 7);
-				iv[0][2] = (iv[0][2] << 1) | (iv[0][3] >> 7);
-				iv[0][3] = (iv[0][3] << 1) | (iv[1][0] >> 7);
-				iv[1][0] = (iv[1][0] << 1) | (iv[1][1] >> 7);
-				iv[1][1] = (iv[1][1] << 1) | (iv[1][2] >> 7);
-				iv[1][2] = (iv[1][2] << 1) | (iv[1][3] >> 7);
-				iv[1][3] = (iv[1][3] << 1) | (iv[2][0] >> 7);
-				iv[2][0] = (iv[2][0] << 1) | (iv[2][1] >> 7);
-				iv[2][1] = (iv[2][1] << 1) | (iv[2][2] >> 7);
-				iv[2][2] = (iv[2][2] << 1) | (iv[2][3] >> 7);
-				iv[2][3] = (iv[2][3] << 1) | (iv[3][0] >> 7);
-				iv[3][0] = (iv[3][0] << 1) | (iv[3][1] >> 7);
-				iv[3][1] = (iv[3][1] << 1) | (iv[3][2] >> 7);
-				iv[3][2] = (iv[3][2] << 1) | (iv[3][3] >> 7);
-				iv[3][3] = (iv[3][3] << 1) | ((input[k/8] >> (7-(k&7))) & 1);
-				outBuffer[k/8] ^= (block[0] & 0x80) >> (k & 7);
-			}
-		}
-		break;
-
-	default:
-		return BAD_CIPHER_STATE;
-	}
-	
-	return 128*numBlocks;
-}
-
-int padDecrypt(cipherInstance *cipher, keyInstance *key,
-		BYTE *input, int inputOctets, BYTE *outBuffer) {
-	int i, numBlocks, padLen;
-	word8 block[16];
-	word32 iv[4];
-
-	if (cipher == NULL ||
-		key == NULL ||
-		key->direction == DIR_ENCRYPT) {
-		return BAD_CIPHER_STATE;
-	}
-	if (input == NULL || inputOctets <= 0) {
-		return 0; /* nothing to do */
-	}
-	if (inputOctets % 16 != 0) {
-		return BAD_DATA;
-	}
-
-	numBlocks = inputOctets/16;
-
-	switch (cipher->mode) {
-	case MODE_ECB:
-		/* all blocks but last */
-		for (i = numBlocks - 1; i > 0; i--) { 
-			rijndaelDecrypt(input, outBuffer, key->keySched, key->ROUNDS);
-			input += 16;
-			outBuffer += 16;
-		}
-		/* last block */
-		rijndaelDecrypt(input, block, key->keySched, key->ROUNDS);
-		padLen = block[15];
-		if (padLen >= 16) {
-			return BAD_DATA;
-		}
-		for (i = 16 - padLen; i < 16; i++) {
-			if (block[i] != padLen) {
-				return BAD_DATA;
-			}
-		}
-		memcpy(outBuffer, block, 16 - padLen);
-		break;
-		
-	case MODE_CBC:
-		memcpy(iv, cipher->IV, 16);
-		/* all blocks but last */
-		for (i = numBlocks - 1; i > 0; i--) {
-			rijndaelDecrypt(input, block, key->keySched, key->ROUNDS);
-			((word32*)block)[0] ^= iv[0];
-			((word32*)block)[1] ^= iv[1];
-			((word32*)block)[2] ^= iv[2];
-			((word32*)block)[3] ^= iv[3];
-			memcpy(iv, input, 16);
-			memcpy(outBuffer, block, 16);
-			input += 16;
-			outBuffer += 16;
-		}
-		/* last block */
-		rijndaelDecrypt(input, block, key->keySched, key->ROUNDS);
-		((word32*)block)[0] ^= iv[0];
-		((word32*)block)[1] ^= iv[1];
-		((word32*)block)[2] ^= iv[2];
-		((word32*)block)[3] ^= iv[3];
-		padLen = block[15];
-		if (padLen <= 0 || padLen > 16) {
-			return BAD_DATA;
-		}
-		for (i = 16 - padLen; i < 16; i++) {
-			if (block[i] != padLen) {
-				return BAD_DATA;
-			}
-		}
-		memcpy(outBuffer, block, 16 - padLen);
-		break;
-	
-	default:
-		return BAD_CIPHER_STATE;
-	}
-	
-	return 16*numBlocks - padLen;
-}
-
-#ifdef INTERMEDIATE_VALUE_KAT
-/**
- *	cipherUpdateRounds:
- *
- *	Encrypts/Decrypts exactly one full block a specified number of rounds.
- *	Only used in the Intermediate Value Known Answer Test.	
- *
- *	Returns:
- *		TRUE - on success
- *		BAD_CIPHER_STATE - cipher in bad state (e.g., not initialized)
- */
-int cipherUpdateRounds(cipherInstance *cipher, keyInstance *key,
-		BYTE *input, int inputLen, BYTE *outBuffer, int rounds) {
-	int j;
-	word8 block[4][4];
-
-	if (cipher == NULL || key == NULL) {
-		return BAD_CIPHER_STATE;
-	}
-
-	for (j = 3; j >= 0; j--) {
-		/* parse input stream into rectangular array */
-  		*((word32*)block[j]) = *((word32*)(input+4*j));
-	}
-
-	switch (key->direction) {
-	case DIR_ENCRYPT:
-		rijndaelEncryptRound(block, key->keySched, key->ROUNDS, rounds);
-		break;
-		
-	case DIR_DECRYPT:
-		rijndaelDecryptRound(block, key->keySched, key->ROUNDS, rounds);
-		break;
-		
-	default:
-		return BAD_KEY_DIR;
-	} 
-
-	for (j = 3; j >= 0; j--) {
-		/* parse rectangular array into output ciphertext bytes */
-		*((word32*)(outBuffer+4*j)) = *((word32*)block[j]);
-	}
-	
-	return TRUE;
-}
-#endif /* INTERMEDIATE_VALUE_KAT */
diff --git a/lib/rtp/rijndael-api-fst.h b/lib/rtp/rijndael-api-fst.h
deleted file mode 100644
index 4aca46c0..00000000
--- a/lib/rtp/rijndael-api-fst.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * rijndael-api-fst.h   v2.4   April '2000
- *
- * Optimised ANSI C code
- *
- * #define INTERMEDIATE_VALUE_KAT to generate the Intermediate Value Known Answer Test.
- */
-
-#ifndef __RIJNDAEL_API_FST_H
-#define __RIJNDAEL_API_FST_H
-
-#include <stdio.h>
-#include "rijndael-alg-fst.h"
-
-/*  Defines:
-	Add any additional defines you need
-*/
-
-#define     DIR_ENCRYPT           0 /*  Are we encrpyting?  */
-#define     DIR_DECRYPT           1 /*  Are we decrpyting?  */
-#define     MODE_ECB              1 /*  Are we ciphering in ECB mode?   */
-#define     MODE_CBC              2 /*  Are we ciphering in CBC mode?   */
-#define     MODE_CFB1             3 /*  Are we ciphering in 1-bit CFB mode? */
-#define     TRUE                  1
-#define     FALSE                 0
-#define     BITSPERBLOCK        128 /* Default number of bits in a cipher block */
-
-/*  Error Codes - CHANGE POSSIBLE: inclusion of additional error codes  */
-#define     BAD_KEY_DIR          -1 /*  Key direction is invalid, e.g., unknown value */
-#define     BAD_KEY_MAT          -2 /*  Key material not of correct length */
-#define     BAD_KEY_INSTANCE     -3 /*  Key passed is not valid */
-#define     BAD_CIPHER_MODE      -4 /*  Params struct passed to cipherInit invalid */
-#define     BAD_CIPHER_STATE     -5 /*  Cipher in wrong state (e.g., not initialized) */
-#define     BAD_BLOCK_LENGTH     -6
-#define     BAD_CIPHER_INSTANCE  -7
-#define     BAD_DATA             -8 /*  Data contents are invalid, e.g., invalid padding */
-#define     BAD_OTHER            -9 /*  Unknown error */
-
-/*  CHANGE POSSIBLE:  inclusion of algorithm specific defines  */
-#define     MAX_KEY_SIZE         64 /* # of ASCII char's needed to represent a key */
-#define     MAX_IV_SIZE          16 /* # bytes needed to represent an IV  */
-
-/*  Typedefs:
-
-	Typedef'ed data storage elements.  Add any algorithm specific 
-parameters at the bottom of the structs as appropriate.
-*/
-
-typedef unsigned char   BYTE;
-
-/*  The structure for key information */
-typedef struct {
-    BYTE  direction;                /* Key used for encrypting or decrypting? */
-    int   keyLen;                   /* Length of the key  */
-    char  keyMaterial[MAX_KEY_SIZE+1];  /* Raw key data in ASCII, e.g., user input or KAT values */
-        /*  The following parameters are algorithm dependent, replace or add as necessary  */
-	int   ROUNDS;                   /* key-length-dependent number of rounds */
-    int   blockLen;                 /* block length */
-    word8 keySched[MAXROUNDS+1][4][4];	/* key schedule		*/
-} keyInstance;
-
-/*  The structure for cipher information */
-typedef struct {                    /* changed order of the components */
-    BYTE  mode;                     /* MODE_ECB, MODE_CBC, or MODE_CFB1 */
-    BYTE  IV[MAX_IV_SIZE];          /* A possible Initialization Vector for ciphering */
-        /*  Add any algorithm specific parameters needed here  */
-    int   blockLen;                 /* Sample: Handles non-128 bit block sizes (if available) */
-} cipherInstance;
-
-/*  Function prototypes  */
-/*  CHANGED: nothing
-	TODO: implement the following extensions to setup 192-bit and 256-bit block lengths:
-        makeKeyEx():    parameter blockLen added
-                        -- this parameter is absolutely necessary if you want to
-                        setup the round keys in a variable block length setting 
-	    cipherInitEx(): parameter blockLen added (for obvious reasons)		
- */
-
-int makeKey(keyInstance *key, BYTE direction, int keyLen, char *keyMaterial);
-
-int cipherInit(cipherInstance *cipher, BYTE mode, char *IV);
-
-int blockEncrypt(cipherInstance *cipher, keyInstance *key,
-        BYTE *input, int inputLen, BYTE *outBuffer);
-
-int padEncrypt(cipherInstance *cipher, keyInstance *key,
-		BYTE *input, int inputOctets, BYTE *outBuffer);
-
-int blockDecrypt(cipherInstance *cipher, keyInstance *key,
-        BYTE *input, int inputLen, BYTE *outBuffer);
-
-int padDecrypt(cipherInstance *cipher, keyInstance *key,
-		BYTE *input, int inputOctets, BYTE *outBuffer);
-
-#ifdef INTERMEDIATE_VALUE_KAT
-int cipherUpdateRounds(cipherInstance *cipher, keyInstance *key,
-        BYTE *input, int inputLen, BYTE *outBuffer, int Rounds);
-#endif /* INTERMEDIATE_VALUE_KAT */
-
-#endif /*  __RIJNDAEL_API_FST_H */
diff --git a/mp4live_encoder_check.sh b/mp4live_encoder_check.sh
index a40e7743..d3ee8779 100644
--- a/mp4live_encoder_check.sh
+++ b/mp4live_encoder_check.sh
@@ -19,6 +19,7 @@ have_ffmpeg=no
 have_xvid=no
 have_x264=no
 have_lame=no
+have_twolame=no
 have_faac=no
 have_one=no
 if grep HAVE_FFMPEG mpeg4ip_config.h | grep define > /dev/null; then
@@ -51,6 +52,12 @@ if grep HAVE_FAAC mpeg4ip_config.h | grep define > /dev/null; then
 else
    echo "*** faac encoder is not installed"
 fi
+if grep HAVE_TWOLAME mpeg4ip_config.h | grep define > /dev/null; then
+   echo "    twolame encoder is installed"
+   have_one=yes
+else
+   echo "*** twolame encoder is not installed"
+fi
 
 if test have_one = "no"; then
    echo 
diff --git a/player/plugin/video/ffmpeg/ffmpeg.cpp b/player/plugin/video/ffmpeg/ffmpeg.cpp
index b9850f7c..34b34898 100644
--- a/player/plugin/video/ffmpeg/ffmpeg.cpp
+++ b/player/plugin/video/ffmpeg/ffmpeg.cpp
@@ -90,6 +90,9 @@ static enum CodecID ffmpeg_find_codec (const char *stream_type,
     if (type == MPEG2T_ST_H264_VIDEO) {
       return CODEC_ID_H264;
     }
+    if (type == MPEG2T_ST_MPEG4_VIDEO) {
+      return CODEC_ID_MPEG4;
+    }
     return CODEC_ID_NONE;
   }
 
diff --git a/server/mp4live/Makefile.am b/server/mp4live/Makefile.am
index b8462bff..95a77772 100644
--- a/server/mp4live/Makefile.am
+++ b/server/mp4live/Makefile.am
@@ -39,6 +39,8 @@ endif
 
 
 libmp4live_la_SOURCES = \
+	audio_alsa_source.cpp \
+	audio_alsa_source.h \
 	audio_encoder_base.cpp \
 	audio_encoder_class.cpp \
 	audio_encoder.h \
@@ -50,10 +52,10 @@ libmp4live_la_SOURCES = \
 	audio_lame.h \
 	audio_l16.cpp \
 	audio_l16.h \
-	audio_alsa_source.cpp \
-	audio_alsa_source.h \
 	audio_oss_source.cpp \
 	audio_oss_source.h \
+	audio_twolame.cpp \
+	audio_twolame.h \
 	config_list.cpp \
 	config_list.h \
 	encoder_gui_options.h \
@@ -157,6 +159,7 @@ mp4live_LDADD = \
 	@GTK_LIBS@ @GLIB_LIBS@ \
 	@FAAC_LIB@ \
 	@LAME_LIB@ \
+	@TWOLAME_LIB@ \
 	./h261/libmp4live_h261.la \
 	$(GUIADD) \
 	libmp4live.la \
diff --git a/server/mp4live/audio_encoder_base.cpp b/server/mp4live/audio_encoder_base.cpp
index 1d7e37fd..76fb6914 100644
--- a/server/mp4live/audio_encoder_base.cpp
+++ b/server/mp4live/audio_encoder_base.cpp
@@ -29,6 +29,9 @@
 #ifdef HAVE_LAME
 #include "audio_lame.h"
 #endif
+#ifdef HAVE_TWOLAME
+#include "audio_twolame.h"
+#endif
 #include "audio_faac.h"
 #ifdef HAVE_FFMPEG
 #include "audio_ffmpeg.h"
@@ -54,6 +57,11 @@ void AudioProfileCheck (CAudioProfile *ap)
 #ifdef HAVE_FFMPEG
     return;
 #else
+#endif
+  } else if (!strcasecmp(encoderName, AUDIO_ENCODER_TWOLAME)) {
+#ifdef HAVE_TWOLAME
+    return;
+#else
 #endif
   } else if (strcasecmp(encoderName, AUDIO_ENCODER_G711) == 0) {
     return;
@@ -89,6 +97,12 @@ CAudioEncoder* AudioEncoderBaseCreate(CAudioProfile *ap,
     return new CLameAudioEncoder(ap, next, srcChannels, srcSampleRate, mtu, realTime);
 #else
     error_message("lame encoder not available in this build");
+#endif
+  } else if (!strcasecmp(encoderName, AUDIO_ENCODER_TWOLAME)) {
+#ifdef HAVE_TWOLAME
+    return new CTwoLameAudioEncoder(ap, next, srcChannels, srcSampleRate, mtu, realTime);
+#else
+    error_message("twolame encoder not available in this build");
 #endif
   } else if (strcasecmp(encoderName, VIDEO_ENCODER_FFMPEG) == 0) {
 #ifdef HAVE_FFMPEG
@@ -139,6 +153,18 @@ MediaType get_base_audio_mp4_fileinfo (CAudioProfile *pConfig,
     return UNDEFINEDFRAME;
 #endif
 
+  } else if (!strcasecmp(encoderName, AUDIO_ENCODER_TWOLAME)) {
+#ifdef HAVE_TWOLAME
+    return twolame_mp4_fileinfo(pConfig, mpeg4,
+				isma_compliant, 
+				audioProfile, 
+				audioConfig,
+				audioConfigLen,
+				mp4_audio_type);
+#else
+    return UNDEFINEDFRAME;
+#endif
+
   } else if (!strcasecmp(encoderName, VIDEO_ENCODER_FFMPEG)) {
 #ifdef HAVE_FFMPEG
     return ffmpeg_mp4_fileinfo(pConfig, mpeg4,
@@ -201,6 +227,16 @@ media_desc_t *create_base_audio_sdp (CAudioProfile *pConfig,
 #else
     return NULL;
 #endif
+  } else if (!strcasecmp(encoderName, AUDIO_ENCODER_TWOLAME)) {
+#ifdef HAVE_TWOLAME
+    return twolame_create_audio_sdp(pConfig, mpeg4,
+				    isma_compliant, 
+				    audioProfile, 
+				    audioConfig,
+				    audioConfigLen);
+#else
+    return NULL;
+#endif
 
   } else if (!strcasecmp(encoderName, VIDEO_ENCODER_FFMPEG)) {
 #ifdef HAVE_FFMPEG
@@ -305,6 +341,21 @@ bool get_base_audio_rtp_info (CAudioProfile *pConfig,
 				   ud);
 #else
     return false;
+#endif
+  } else if (!strcasecmp(encoderName, AUDIO_ENCODER_TWOLAME)) {
+#ifdef HAVE_TWOLAME
+    return twolame_get_audio_rtp_info(pConfig,
+				      audioFrameType,
+				      audioTimeScale,
+				      audioPayloadNumber,
+				      audioPayloadBytesPerPacket,
+				      audioPayloadBytesPerFrame,
+				      audioQueueMaxCount,
+				      audio_set_header,
+				      audio_set_jumbo,
+				      ud);
+#else
+    return false;
 #endif
   } else if (!strcasecmp(encoderName, VIDEO_ENCODER_FFMPEG)) {
 #ifdef HAVE_FFMPEG
diff --git a/server/mp4live/audio_encoder_tables.cpp b/server/mp4live/audio_encoder_tables.cpp
index 5d2f51c7..bccd3897 100644
--- a/server/mp4live/audio_encoder_tables.cpp
+++ b/server/mp4live/audio_encoder_tables.cpp
@@ -22,6 +22,7 @@
 #include "mp4live.h"
 #include "audio_encoder.h"
 #include "audio_lame.h"
+#include "audio_twolame.h"
 #include "audio_faac.h"
 #include "audio_g711.h"
 #include "audio_l16.h"
@@ -43,6 +44,9 @@ void InitAudioEncoders (void)
 #ifdef HAVE_LAME
   AddAudioEncoderTable(&lame_audio_encoder_table);
 #endif
+#ifdef HAVE_TWOLAME
+  AddAudioEncoderTable(&twolame_audio_encoder_table);
+#endif
 #ifdef HAVE_FAAC
   AddAudioEncoderTable(&faac_audio_encoder_table);
 #endif
diff --git a/server/mp4live/audio_twolame.cpp b/server/mp4live/audio_twolame.cpp
new file mode 100644
index 00000000..bd61f50f
--- /dev/null
+++ b/server/mp4live/audio_twolame.cpp
@@ -0,0 +1,377 @@
+/*
+ * The contents of this file are subject to the Mozilla Public
+ * License Version 1.1 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.mozilla.org/MPL/
+ * 
+ * Software distributed under the License is distributed on an "AS
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * rights and limitations under the License.
+ * 
+ * The Original Code is MPEG4IP.
+ * 
+ * The Initial Developer of the Original Code is Cisco Systems Inc.
+ * Portions created by Cisco Systems Inc. are
+ * Copyright (C) Cisco Systems Inc. 2000, 2001.  All Rights Reserved.
+ * 
+ * Contributor(s): 
+ *		Dave Mackie		dmackie@cisco.com
+ */
+
+#include "mp4live.h"
+#ifdef HAVE_TWOLAME
+#include "audio_twolame.h"
+#include <mp4av.h>
+
+GUI_BOOL(gui_mp3use14, CFG_RTP_USE_MP3_PAYLOAD_14, "Transmit MP3 using RFC-2250");
+DECLARE_TABLE(twolame_gui_options) = {
+  TABLE_GUI(gui_mp3use14),
+};
+DECLARE_TABLE_FUNC(twolame_gui_options);
+
+static const uint32_t twolame_sample_rates[] = {
+  44100, 48000, 32000
+};
+
+static const uint twolame_bitrate_table[] = 
+  { 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 };
+
+#define NUM_BITRATES (NUM_ELEMENTS_IN_ARRAY(twolame_bitrate_table))
+
+static uint32_t *twolame_bitrate_for_samplerate (uint32_t samplerate, 
+						 uint8_t chans,
+						 uint32_t *ret_size)
+{
+  uint iy;
+
+  uint32_t *ret = (uint32_t *)malloc(NUM_BITRATES * sizeof(uint32_t));
+  *ret_size = 0;
+  twolame_options *twolameParams;
+
+  for (iy = 0; iy < NUM_BITRATES; iy++) {
+    twolameParams = twolame_init();
+    twolame_set_num_channels(twolameParams, chans);
+    twolame_set_in_samplerate(twolameParams, samplerate);
+    twolame_set_mode(twolameParams,
+		     (chans == 1 ? TWOLAME_MONO : TWOLAME_STEREO));	
+    twolame_set_VBR(twolameParams, 0);
+    twolame_set_brate(twolameParams, twolame_bitrate_table[iy]);
+
+    if (twolame_init_params(twolameParams) != -1) {
+      if (twolame_get_in_samplerate(twolameParams) == twolame_get_out_samplerate(twolameParams)) {
+	ret[*ret_size] = twolame_bitrate_table[iy] * 1000;
+	*ret_size = *ret_size + 1;
+      }
+    }
+    twolame_close(&twolameParams);
+  }
+  return ret;
+}
+
+audio_encoder_table_t twolame_audio_encoder_table =  {
+  "MP1 Layer 2 - twolame",
+  AUDIO_ENCODER_TWOLAME,
+  AUDIO_ENCODING_MP3,
+  twolame_sample_rates,
+  NUM_ELEMENTS_IN_ARRAY(twolame_sample_rates),
+  twolame_bitrate_for_samplerate,
+  2,
+  twolame_gui_options_f,
+};
+    
+MediaType twolame_mp4_fileinfo (CAudioProfile *pConfig,
+				bool *mpeg4,
+				bool *isma_compliant,
+				uint8_t *audioProfile,
+				uint8_t **audioConfig,
+				uint32_t *audioConfigLen,
+				uint8_t *mp4AudioType)
+{
+  *mpeg4 = false; // legal in an mp4 - create an iod
+  *isma_compliant = false;
+  *audioProfile = 0xfe;
+  *audioConfig = NULL;
+  *audioConfigLen = 0;
+  if (mp4AudioType != NULL) {
+    *mp4AudioType = MP4_MPEG1_AUDIO_TYPE;
+  }
+  return MP3AUDIOFRAME;
+}
+
+media_desc_t *twolame_create_audio_sdp (CAudioProfile *pConfig,
+					bool *mpeg4,
+					bool *isma_compliant,
+					uint8_t *audioProfile,
+					uint8_t **audioConfig,
+					uint32_t *audioConfigLen)
+{
+  media_desc_t *sdpMediaAudio;
+  format_list_t *sdpMediaAudioFormat;
+  
+  twolame_mp4_fileinfo(pConfig, mpeg4, isma_compliant, audioProfile,
+		       audioConfig, audioConfigLen, NULL);
+  
+  sdpMediaAudio = MALLOC_STRUCTURE(media_desc_t);
+  memset(sdpMediaAudio, 0, sizeof(*sdpMediaAudio));
+
+  sdpMediaAudioFormat = MALLOC_STRUCTURE(format_list_t);
+  memset(sdpMediaAudioFormat, 0, sizeof(*sdpMediaAudioFormat));
+  sdpMediaAudio->fmt_list = sdpMediaAudioFormat;
+  sdpMediaAudioFormat->media = sdpMediaAudio;
+  
+  if (pConfig->GetBoolValue(CFG_RTP_USE_MP3_PAYLOAD_14)) {
+    sdpMediaAudioFormat->fmt = strdup("14");
+    sdpMediaAudioFormat->rtpmap_clock_rate = 90000;
+  } else {
+    sdpMediaAudioFormat->rtpmap_clock_rate = 
+      pConfig->GetIntegerValue(CFG_AUDIO_SAMPLE_RATE);
+    sdpMediaAudioFormat->fmt = strdup("97");
+  }
+  sdpMediaAudioFormat->rtpmap_name = strdup("MPA");
+	
+  return sdpMediaAudio;
+
+}
+
+static bool twolame_set_rtp_header (struct iovec *iov,
+				 int queue_cnt,
+				 void *ud, 
+				 bool *mbit)
+{
+  *mbit = 1;
+  *(uint32_t *)ud = 0;
+  iov[0].iov_base = ud;
+  iov[0].iov_len = 4;
+  return true;
+}
+
+static bool twolame_set_rtp_jumbo (struct iovec *iov,
+				uint32_t dataOffset,
+				uint32_t bufferLen,
+				uint32_t rtpPacketMax,
+				bool &mbit,
+				void *ud)
+{
+  uint8_t *payloadHeader = (uint8_t *)ud;
+  uint32_t send;
+
+  payloadHeader[0] = 0;
+  payloadHeader[1] = 0;
+  payloadHeader[2] = (dataOffset >> 8);
+  payloadHeader[3] = (dataOffset & 0xff);
+
+  send = MIN(bufferLen - dataOffset, rtpPacketMax - 4);
+
+  iov[0].iov_base = payloadHeader;
+  iov[0].iov_len = 4;
+  iov[1].iov_len = send;
+
+  mbit = (dataOffset == 0);
+  return true;
+}
+
+bool twolame_get_audio_rtp_info (CAudioProfile *pConfig,
+			      MediaType *audioFrameType,
+			      uint32_t *audioTimeScale,
+			      uint8_t *audioPayloadNumber,
+			      uint8_t *audioPayloadBytesPerPacket,
+			      uint8_t *audioPayloadBytesPerFrame,
+			      uint8_t *audioQueueMaxCount,
+			      audio_set_rtp_header_f *audio_set_header,
+			      audio_set_rtp_jumbo_frame_f *audio_set_jumbo,
+			      void **ud)
+{
+  *audioFrameType = MP3AUDIOFRAME;
+  if (pConfig->GetBoolValue(CFG_RTP_USE_MP3_PAYLOAD_14)) {
+    *audioPayloadNumber = 14;
+    *audioTimeScale = 90000;
+  } else {
+    *audioPayloadNumber = 97;
+    *audioTimeScale = pConfig->GetIntegerValue(CFG_AUDIO_SAMPLE_RATE);
+  }
+  *audioPayloadBytesPerPacket = 4;
+  *audioPayloadBytesPerFrame = 0;
+  *audioQueueMaxCount = 8;
+  *audio_set_header = twolame_set_rtp_header;
+  *audio_set_jumbo = twolame_set_rtp_jumbo;
+  *ud = malloc(4);
+  memset(*ud, 0, 4);
+  return true;
+}
+
+
+CTwoLameAudioEncoder::CTwoLameAudioEncoder(CAudioProfile *ap, 
+				     CAudioEncoder *next, 
+				     u_int8_t srcChannels,
+				     u_int32_t srcSampleRate,
+				     uint16_t mtu,
+				     bool realTime) :
+  CAudioEncoder(ap, next, srcChannels, srcSampleRate, mtu, realTime)
+{
+	m_mp3FrameBuffer = NULL;
+}
+
+bool CTwoLameAudioEncoder::Init (void)
+{
+	if ((m_twolameParams = twolame_init()) == NULL) {
+		error_message("error: failed to get twolame_global_flags");
+		return false;
+	} 
+	twolame_set_num_channels(m_twolameParams,
+			      Profile()->GetIntegerValue(CFG_AUDIO_CHANNELS));
+	twolame_set_in_samplerate(m_twolameParams,
+			       Profile()->GetIntegerValue(CFG_AUDIO_SAMPLE_RATE));
+	twolame_set_brate(m_twolameParams,
+		       Profile()->GetIntegerValue(CFG_AUDIO_BIT_RATE) / 1000);
+	twolame_set_mode(m_twolameParams,
+		      (Profile()->GetIntegerValue(CFG_AUDIO_CHANNELS) == 1 ? TWOLAME_MONO : TWOLAME_STEREO));		
+	twolame_set_VBR(m_twolameParams, FALSE);
+	//	twolame_set_quality(m_twolameParams,2);
+
+	// no match for silent flag
+
+	// no match for gtkflag
+
+	// THIS IS VERY IMPORTANT. MP4PLAYER DOES NOT SEEM TO LIKE VBR
+	//twolame_set_bWriteVbrTag(m_twolameParams,0);
+
+	if (twolame_init_params(m_twolameParams) == -1) {
+		error_message("error: failed init twolame params");
+		return false;
+	}
+	if (twolame_get_in_samplerate(m_twolameParams) != twolame_get_out_samplerate(m_twolameParams)) {
+		error_message("warning: twolame audio sample rate mismatch - wanted %d got %d",
+			      twolame_get_in_samplerate(m_twolameParams), 
+			      twolame_get_out_samplerate(m_twolameParams));
+		Profile()->SetIntegerValue(CFG_AUDIO_SAMPLE_RATE,
+			twolame_get_out_samplerate(m_twolameParams));
+	}
+
+	//error_message("twolame version is %d", twolame_get_version(m_twolameParams));
+	m_samplesPerFrame = 1152; // always 1152
+
+	m_mp3FrameMaxSize = (u_int)(1.25 * m_samplesPerFrame) + 7200;
+
+	m_mp3FrameBufferSize = 2 * m_mp3FrameMaxSize;
+
+	m_mp3FrameBufferLength = 0;
+
+	m_mp3FrameBuffer = (u_int8_t*)malloc(m_mp3FrameBufferSize);
+
+	if (!m_mp3FrameBuffer) {
+		return false;
+	}
+
+	Initialize();
+	return true;
+}
+
+u_int32_t CTwoLameAudioEncoder::GetSamplesPerFrame()
+{
+	return m_samplesPerFrame;
+}
+
+bool CTwoLameAudioEncoder::EncodeSamples(
+	int16_t* pSamples, 
+	u_int32_t numSamplesPerChannel,
+	u_int8_t numChannels)
+{
+  if (numChannels != 1 && numChannels != 2) {
+    return false;	// invalid numChannels
+  }
+
+  u_int32_t mp3DataLength = 0;
+
+  if (pSamples != NULL) { 
+    int16_t* pLeftBuffer = NULL;
+    int16_t* pRightBuffer = NULL;
+
+    if (numChannels == 1) {
+      pLeftBuffer = pSamples;
+
+      // both right and left need to be the same - can't 
+      // pass NULL as pRightBuffer
+      pRightBuffer = pSamples;
+      mp3DataLength = twolame_encode_buffer(
+					    m_twolameParams,
+					    pLeftBuffer, 
+					    pRightBuffer, 
+					    m_samplesPerFrame,
+					    (unsigned char*)&m_mp3FrameBuffer[m_mp3FrameBufferLength], 
+					    m_mp3FrameBufferSize - m_mp3FrameBufferLength);
+
+    } else { // numChannels == 2
+      // let twolame handle stereo to mono conversion
+      mp3DataLength = 
+	twolame_encode_buffer_interleaved(m_twolameParams,
+					  pSamples,
+					  m_samplesPerFrame,
+					  (unsigned char *)&m_mp3FrameBuffer[m_mp3FrameBufferLength],
+					  m_mp3FrameBufferSize - m_mp3FrameBufferLength);
+    } 
+  } else { // pSamples == NULL
+    // signal to stop encoding
+    mp3DataLength = 
+      twolame_encode_flush( m_twolameParams,
+			    (unsigned char*)&m_mp3FrameBuffer[m_mp3FrameBufferLength], 
+			    m_mp3FrameBufferSize - m_mp3FrameBufferLength);
+  }
+
+  m_mp3FrameBufferLength += mp3DataLength;
+  //debug_message("audio -return from twolame_encode_buffer is %d %d", mp3DataLength, m_mp3FrameBufferLength);
+
+  return (mp3DataLength >= 0);
+}
+
+bool CTwoLameAudioEncoder::GetEncodedFrame(
+	u_int8_t** ppBuffer, 
+	u_int32_t* pBufferLength,
+	u_int32_t* pNumSamplesPerChannel)
+{
+	const u_int8_t* mp3Frame;
+	u_int32_t mp3FrameLength;
+
+	if (!MP4AV_Mp3GetNextFrame(m_mp3FrameBuffer, m_mp3FrameBufferLength, 
+	  &mp3Frame, &mp3FrameLength)) {
+	  //debug_message("Can't find frame header - len %d", m_mp3FrameBufferLength);
+		return false;
+	}
+
+	// check if we have all the bytes for the MP3 frame
+	if (mp3FrameLength > m_mp3FrameBufferLength) {
+	  //debug_message("Not enough in buffer - %d %d", m_mp3FrameBufferLength, mp3FrameLength);
+		return false;
+	}
+
+	// need a buffer for this MP3 frame
+	*ppBuffer = (u_int8_t*)malloc(mp3FrameLength);
+	if (*ppBuffer == NULL) {
+	  error_message("Cannot alloc memory");
+		return false;
+	}
+
+	// copy the MP3 frame
+	memcpy(*ppBuffer, mp3Frame, mp3FrameLength);
+	*pBufferLength = mp3FrameLength;
+
+	// shift what remains in the buffer down
+	memmove(m_mp3FrameBuffer, 
+		mp3Frame + mp3FrameLength, 
+		m_mp3FrameBufferLength - mp3FrameLength);
+	m_mp3FrameBufferLength -= mp3FrameLength;
+
+	*pNumSamplesPerChannel = m_samplesPerFrame;
+
+	return true;
+}
+
+void CTwoLameAudioEncoder::StopEncoder (void)
+{
+	free(m_mp3FrameBuffer);
+	m_mp3FrameBuffer = NULL;
+	twolame_close(&m_twolameParams);
+	m_twolameParams = NULL;
+}
+
+#endif // HAVE_TWOLAME
diff --git a/server/mp4live/audio_twolame.h b/server/mp4live/audio_twolame.h
new file mode 100644
index 00000000..08b7671e
--- /dev/null
+++ b/server/mp4live/audio_twolame.h
@@ -0,0 +1,96 @@
+/*
+ * The contents of this file are subject to the Mozilla Public
+ * License Version 1.1 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.mozilla.org/MPL/
+ * 
+ * Software distributed under the License is distributed on an "AS
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * rights and limitations under the License.
+ * 
+ * The Original Code is MPEG4IP.
+ * 
+ * The Initial Developer of the Original Code is Cisco Systems Inc.
+ * Portions created by Cisco Systems Inc. are
+ * Copyright (C) Cisco Systems Inc. 2000, 2001.  All Rights Reserved.
+ * 
+ * Contributor(s): 
+ *		Dave Mackie		dmackie@cisco.com
+ */
+
+#ifndef __AUDIO_TWOLAME_H__
+#define __AUDIO_TWOLAME_H__
+
+#include "audio_encoder.h"
+#ifdef HAVE_TWOLAME
+#include <twolame.h>
+#include <sdp.h>
+
+media_desc_t *twolame_create_audio_sdp(CAudioProfile *pConfig,
+				    bool *mpeg4,
+				    bool *isma_compliant,
+				    uint8_t *audioProfile,
+				    uint8_t **audioConfig,
+				    uint32_t *audioConfigLen);
+MediaType twolame_mp4_fileinfo(CAudioProfile *pConfig,
+			    bool *mpeg4,
+			    bool *isma_compliant,
+			    uint8_t *audioProfile,
+			    uint8_t **audioConfig,
+			    uint32_t *audioConfigLen,
+			    uint8_t *mp4AudioType);
+
+bool twolame_get_audio_rtp_info (CAudioProfile *pConfig,
+			      MediaType *audioFrameType,
+			      uint32_t *audioTimeScale,
+			      uint8_t *audioPayloadNumber,
+			      uint8_t *audioPayloadBytesPerPacket,
+			      uint8_t *audioPayloadBytesPerFrame,
+			      uint8_t *audioQueueMaxCount,
+			      audio_set_rtp_header_f *audio_set_header,
+			      audio_set_rtp_jumbo_frame_f *audio_set_jumbo,
+			      void **ud);
+
+class CTwoLameAudioEncoder : public CAudioEncoder {
+public:
+	CTwoLameAudioEncoder(CAudioProfile *ap,
+			  CAudioEncoder *next, 
+			  u_int8_t srcChannels, 
+			  u_int32_t srcSampleRate,
+			  uint16_t mtu,
+			  bool realTime = true);
+
+	bool Init(void);
+
+	MediaType GetFrameType(void) {
+		return MP3AUDIOFRAME;
+	}
+
+	u_int32_t GetSamplesPerFrame();
+
+	bool EncodeSamples(
+		int16_t* pSamples, 
+		u_int32_t numSamplesPerChannel,
+		u_int8_t numChannels);
+
+	bool GetEncodedFrame(
+		u_int8_t** ppBuffer, 
+		u_int32_t* pBufferLength,
+		u_int32_t* pNumSamplesPerChannel);
+
+
+protected:
+	void StopEncoder(void);
+	twolame_options	*m_twolameParams;
+	u_int32_t			m_samplesPerFrame;
+	u_int8_t*			m_mp3FrameBuffer;
+	u_int32_t			m_mp3FrameBufferLength;
+	u_int32_t			m_mp3FrameBufferSize;
+	u_int32_t			m_mp3FrameMaxSize;
+};
+
+extern audio_encoder_table_t twolame_audio_encoder_table;
+#endif
+#endif /* __AUDIO_TWOLAME_H__ */
+
diff --git a/server/mp4live/profile_audio.h b/server/mp4live/profile_audio.h
index 9021772e..821b6730 100644
--- a/server/mp4live/profile_audio.h
+++ b/server/mp4live/profile_audio.h
@@ -26,6 +26,7 @@
 #define AUDIO_ENCODER_LAME		"lame"
 #define AUDIO_ENCODER_G711              "g711"
 #define AUDIO_ENCODER_L16               "l16"
+#define AUDIO_ENCODER_TWOLAME           "twolame"
 
 #define AUDIO_ENCODING_NONE		"None"
 #define AUDIO_ENCODING_PCM16	"PCM16"
diff --git a/server/mp4live/video_ffmpeg.cpp b/server/mp4live/video_ffmpeg.cpp
index 1738a317..4b60e355 100644
--- a/server/mp4live/video_ffmpeg.cpp
+++ b/server/mp4live/video_ffmpeg.cpp
@@ -89,6 +89,8 @@ bool CFfmpegVideoEncoder::Init (void)
     m_media_frame = MPEG4VIDEOFRAME;
 #ifdef OUTPUT_RAW
     m_outfile = fopen("raw.m4v", FOPEN_WRITE_BINARY);
+    fwrite(Profile()->m_videoMpeg4Config, 
+	   Profile()->m_videoMpeg4ConfigLength, 1, m_outfile);
 #endif
   } else if (strcasecmp(Profile()->GetStringValue(CFG_VIDEO_ENCODING),
 			VIDEO_ENCODING_H263) == 0) {
@@ -123,9 +125,8 @@ bool CFfmpegVideoEncoder::Init (void)
   m_avctx->frame_rate_base = 1;
 #else
   m_avctx->time_base = (AVRational){1, (int)(Profile()->GetFloatValue(CFG_VIDEO_FRAME_RATE) + .5)};
-  m_avctx->strict_std_compliance = -1;
   m_avctx->pix_fmt = PIX_FMT_YUV420P;
-  m_avctx->profile = Profile()->m_videoMpeg4ProfileId;
+  m_avctx->me_method = ME_EPZS;
 #endif
   if (Profile()->GetIntegerValue(CFG_VIDEO_MPEG4_PAR_WIDTH) > 0 &&
       Profile()->GetIntegerValue(CFG_VIDEO_MPEG4_PAR_HEIGHT) > 0) {
@@ -150,13 +151,20 @@ bool CFfmpegVideoEncoder::Init (void)
 #endif
   m_usingBFrames = false;
   m_BFrameCount = 0;
+
   if (m_media_frame == MPEG2VIDEOFRAME) {
     m_avctx->gop_size = 15;
     m_avctx->b_frame_strategy = 0;
     m_avctx->max_b_frames = 2;
     m_usingBFrames = true;
     m_BFrameCount = 2;
+#ifdef HAVE_AVCODECCONTEXT_TIME_BASE
+    m_avctx->strict_std_compliance = 0;
+#endif
   } else {
+#ifdef HAVE_AVCODECCONTEXT_TIME_BASE
+    m_avctx->strict_std_compliance = -1;
+#endif
     if (m_media_frame == H263VIDEOFRAME) {
       m_avctx->bit_rate = 
 	Profile()->GetIntegerValue(CFG_VIDEO_BIT_RATE) * 800;