diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..5eadd86
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,3 @@
+Version: 1.0.0-beta1
+
+Initial release (beta).
diff --git a/Example b/Example
new file mode 100644
index 0000000..c79ee67
--- /dev/null
+++ b/Example
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+
+from yarp import *
+
+# A primary file is specified here.
+primary_path = '<...>/SYSTEM'
+
+# Discover transaction log files to be used to recover the primary file, if required.
+transaction_logs = RegistryHelpers.DiscoverLogFiles(primary_path)
+
+# Open the primary file and each transaction log file discovered.
+primary_file = open(primary_path, 'rb')
+
+if transaction_logs.log_path is not None:
+ log_file = open(transaction_logs.log_path, 'rb')
+else:
+ log_file = None
+
+if transaction_logs.log1_path is not None:
+ log1_file = open(transaction_logs.log1_path, 'rb')
+else:
+ log1_file = None
+
+if transaction_logs.log2_path is not None:
+ log2_file = open(transaction_logs.log2_path, 'rb')
+else:
+ log2_file = None
+
+# Open the hive and recover it, if required.
+hive = Registry.RegistryHive(primary_file)
+recovery_result = hive.recover_auto(log_file, log1_file, log2_file)
+if recovery_result.recovered:
+ print('The hive has been recovered')
+
+# Print basic information about the hive.
+print('Last written timestamp: {}'.format(hive.last_written_timestamp()))
+print('Last reorganized timestamp: {}'.format(hive.last_reorganized_timestamp()))
+
+# Find an existing key.
+key = hive.find_key('controlset001\\services')
+print('Found a key: {}'.format(key.path()))
+
+# Print information about its subkeys.
+for sk in key.subkeys():
+ print(sk)
+
+# Pick an existing subkey.
+key = key.subkey('exfat')
+
+# Print information about it.
+print('Found a subkey: {}'.format(key.name()))
+print('Last written timestamp: {}'.format(key.last_written_timestamp()))
+
+# Print information about its values.
+for v in key.values():
+ print(v)
+
+# Pick an existing value.
+v = key.value('description')
+
+# Print more information about this value.
+print('Some information about a specific value:')
+print('Value name is \'{}\''.format(v.name()))
+print('Value type is {} as a string (or {} as an integer)'.format(v.type_str(), v.type_raw()))
+print('Value data is:')
+print(v.data())
+print('The same data as raw bytes:')
+print(v.data_raw())
+
+# Close everything.
+hive = None
+primary_file.close()
+if log_file is not None:
+ log_file.close()
+if log1_file is not None:
+ log1_file.close()
+if log2_file is not None:
+ log2_file.close()
diff --git a/License b/License
new file mode 100644
index 0000000..94a9ed0
--- /dev/null
+++ b/License
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/ReadMe b/ReadMe
new file mode 100644
index 0000000..a506dcc
--- /dev/null
+++ b/ReadMe
@@ -0,0 +1,33 @@
+yarp: yet another registry parser
+
+1. Project goals: the library and tools
+
+- Parse Windows registry files in a proper way (with forensics in mind).
+- Expose values of all fields of underlying registry structures.
+- Support for truncated registry files.
+- Support for recovering deleted keys and values.
+- Support for carving of registry hives.
+- Support for transaction log files.
+
+2. Hive version numbers supported
+
+- Full support: 1.1-1.6.
+- No support: 1.0.
+
+In general, full support is available for hive files from installations of
+Windows NT 3.1 and later versions of Windows NT (including Windows 10);
+hive files from installations of pre-release versions of Windows NT 3.1 are
+not supported.
+
+3. Documentation
+
+See the docstrings in the module.
+For a basic usage example, see the 'Example' file.
+
+4. License
+
+This project is made available under the terms of the GNU GPL, version 3.
+See the 'License' file.
+
+---
+(c) Maxim Suhanov
diff --git a/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive
new file mode 100644
index 0000000..40e4f34
Binary files /dev/null and b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive differ
diff --git a/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG1 b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG1
new file mode 100644
index 0000000..a68ef6f
Binary files /dev/null and b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG1 differ
diff --git a/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG2 b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG2
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/BadListHive b/hives_for_tests/BadListHive
new file mode 100644
index 0000000..f32bf61
Binary files /dev/null and b/hives_for_tests/BadListHive differ
diff --git a/hives_for_tests/BadLogHive1/BadLogHive b/hives_for_tests/BadLogHive1/BadLogHive
new file mode 100644
index 0000000..1c3745f
Binary files /dev/null and b/hives_for_tests/BadLogHive1/BadLogHive differ
diff --git a/hives_for_tests/BadLogHive1/BadLogHive.LOG1 b/hives_for_tests/BadLogHive1/BadLogHive.LOG1
new file mode 100644
index 0000000..a0c7e00
Binary files /dev/null and b/hives_for_tests/BadLogHive1/BadLogHive.LOG1 differ
diff --git a/hives_for_tests/BadLogHive1/BadLogHive.LOG2 b/hives_for_tests/BadLogHive1/BadLogHive.LOG2
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/BadLogHive2/BadLogHive b/hives_for_tests/BadLogHive2/BadLogHive
new file mode 100644
index 0000000..1c3745f
Binary files /dev/null and b/hives_for_tests/BadLogHive2/BadLogHive differ
diff --git a/hives_for_tests/BadLogHive2/BadLogHive.LOG1 b/hives_for_tests/BadLogHive2/BadLogHive.LOG1
new file mode 100644
index 0000000..00548c4
Binary files /dev/null and b/hives_for_tests/BadLogHive2/BadLogHive.LOG1 differ
diff --git a/hives_for_tests/BadLogHive2/BadLogHive.LOG2 b/hives_for_tests/BadLogHive2/BadLogHive.LOG2
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/BadLogHive3/BadLogHive b/hives_for_tests/BadLogHive3/BadLogHive
new file mode 100644
index 0000000..3c7c1cd
Binary files /dev/null and b/hives_for_tests/BadLogHive3/BadLogHive differ
diff --git a/hives_for_tests/BadLogHive3/BadLogHive.LOG1 b/hives_for_tests/BadLogHive3/BadLogHive.LOG1
new file mode 100644
index 0000000..dce0485
Binary files /dev/null and b/hives_for_tests/BadLogHive3/BadLogHive.LOG1 differ
diff --git a/hives_for_tests/BadLogHive3/BadLogHive.LOG2 b/hives_for_tests/BadLogHive3/BadLogHive.LOG2
new file mode 100644
index 0000000..d0dd640
Binary files /dev/null and b/hives_for_tests/BadLogHive3/BadLogHive.LOG2 differ
diff --git a/hives_for_tests/BadSubkeyHive b/hives_for_tests/BadSubkeyHive
new file mode 100644
index 0000000..aafc82a
Binary files /dev/null and b/hives_for_tests/BadSubkeyHive differ
diff --git a/hives_for_tests/BigDataHive b/hives_for_tests/BigDataHive
new file mode 100644
index 0000000..7b0df9d
Binary files /dev/null and b/hives_for_tests/BigDataHive differ
diff --git a/hives_for_tests/BogusKeyNamesHive b/hives_for_tests/BogusKeyNamesHive
new file mode 100644
index 0000000..8bbc0eb
Binary files /dev/null and b/hives_for_tests/BogusKeyNamesHive differ
diff --git a/hives_for_tests/Carving/0 b/hives_for_tests/Carving/0
new file mode 100644
index 0000000..a95c189
Binary files /dev/null and b/hives_for_tests/Carving/0 differ
diff --git a/hives_for_tests/Carving/512 b/hives_for_tests/Carving/512
new file mode 100644
index 0000000..a0a7ee5
Binary files /dev/null and b/hives_for_tests/Carving/512 differ
diff --git a/hives_for_tests/CompHive b/hives_for_tests/CompHive
new file mode 100644
index 0000000..768c846
Binary files /dev/null and b/hives_for_tests/CompHive differ
diff --git a/hives_for_tests/DeletedDataHive b/hives_for_tests/DeletedDataHive
new file mode 100644
index 0000000..1b2f486
Binary files /dev/null and b/hives_for_tests/DeletedDataHive differ
diff --git a/hives_for_tests/DeletedTreeHive b/hives_for_tests/DeletedTreeHive
new file mode 100644
index 0000000..49417d6
Binary files /dev/null and b/hives_for_tests/DeletedTreeHive differ
diff --git a/hives_for_tests/DeletedTreeNoRootFlagHive b/hives_for_tests/DeletedTreeNoRootFlagHive
new file mode 100644
index 0000000..a165f0a
Binary files /dev/null and b/hives_for_tests/DeletedTreeNoRootFlagHive differ
diff --git a/hives_for_tests/DeletedTreePartialPathHive b/hives_for_tests/DeletedTreePartialPathHive
new file mode 100644
index 0000000..1e36ae3
Binary files /dev/null and b/hives_for_tests/DeletedTreePartialPathHive differ
diff --git a/hives_for_tests/Discovery/1/aa b/hives_for_tests/Discovery/1/aa
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/1/aa.LOG b/hives_for_tests/Discovery/1/aa.LOG
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/1/aa.LOG1 b/hives_for_tests/Discovery/1/aa.LOG1
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/1/aa.LOG2 b/hives_for_tests/Discovery/1/aa.LOG2
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/2/AA b/hives_for_tests/Discovery/2/AA
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/2/aa.LOG1 b/hives_for_tests/Discovery/2/aa.LOG1
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/2/aa.LOG2 b/hives_for_tests/Discovery/2/aa.LOG2
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/3/AA b/hives_for_tests/Discovery/3/AA
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/3/aa.log b/hives_for_tests/Discovery/3/aa.log
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/3/aa.log1 b/hives_for_tests/Discovery/3/aa.log1
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/4/AA b/hives_for_tests/Discovery/4/AA
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/4/aa.LOG b/hives_for_tests/Discovery/4/aa.LOG
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/4/aa.log1 b/hives_for_tests/Discovery/4/aa.log1
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/4/aa.log2 b/hives_for_tests/Discovery/4/aa.log2
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/Discovery/5/aa b/hives_for_tests/Discovery/5/aa
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/DuplicateSubkeysHive b/hives_for_tests/DuplicateSubkeysHive
new file mode 100644
index 0000000..046194c
Binary files /dev/null and b/hives_for_tests/DuplicateSubkeysHive differ
diff --git a/hives_for_tests/EffectiveSizeHive b/hives_for_tests/EffectiveSizeHive
new file mode 100644
index 0000000..1f2f021
Binary files /dev/null and b/hives_for_tests/EffectiveSizeHive differ
diff --git a/hives_for_tests/EmptyHive b/hives_for_tests/EmptyHive
new file mode 100644
index 0000000..83cb26a
Binary files /dev/null and b/hives_for_tests/EmptyHive differ
diff --git a/hives_for_tests/ExtendedASCIIHive b/hives_for_tests/ExtendedASCIIHive
new file mode 100644
index 0000000..0da7d2f
Binary files /dev/null and b/hives_for_tests/ExtendedASCIIHive differ
diff --git a/hives_for_tests/GarbageHive b/hives_for_tests/GarbageHive
new file mode 100644
index 0000000..2b1a6fa
Binary files /dev/null and b/hives_for_tests/GarbageHive differ
diff --git a/hives_for_tests/HealedHive b/hives_for_tests/HealedHive
new file mode 100644
index 0000000..1dcde22
Binary files /dev/null and b/hives_for_tests/HealedHive differ
diff --git a/hives_for_tests/InvalidParentHive b/hives_for_tests/InvalidParentHive
new file mode 100644
index 0000000..e9618a6
Binary files /dev/null and b/hives_for_tests/InvalidParentHive differ
diff --git a/hives_for_tests/ManySubkeysHive b/hives_for_tests/ManySubkeysHive
new file mode 100644
index 0000000..321c21f
Binary files /dev/null and b/hives_for_tests/ManySubkeysHive differ
diff --git a/hives_for_tests/MultiSzHive b/hives_for_tests/MultiSzHive
new file mode 100644
index 0000000..d61cc41
Binary files /dev/null and b/hives_for_tests/MultiSzHive differ
diff --git a/hives_for_tests/NewDirtyHive1/NewDirtyHive b/hives_for_tests/NewDirtyHive1/NewDirtyHive
new file mode 100644
index 0000000..3c7c1cd
Binary files /dev/null and b/hives_for_tests/NewDirtyHive1/NewDirtyHive differ
diff --git a/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG1 b/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG1
new file mode 100644
index 0000000..af3b067
Binary files /dev/null and b/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG1 differ
diff --git a/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG2 b/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG2
new file mode 100644
index 0000000..15f0a49
Binary files /dev/null and b/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG2 differ
diff --git a/hives_for_tests/NewDirtyHive1/RecoveredHive_Windows10 b/hives_for_tests/NewDirtyHive1/RecoveredHive_Windows10
new file mode 100644
index 0000000..f352712
Binary files /dev/null and b/hives_for_tests/NewDirtyHive1/RecoveredHive_Windows10 differ
diff --git a/hives_for_tests/NewDirtyHive2/NewDirtyHive b/hives_for_tests/NewDirtyHive2/NewDirtyHive
new file mode 100644
index 0000000..233e7ee
Binary files /dev/null and b/hives_for_tests/NewDirtyHive2/NewDirtyHive differ
diff --git a/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG1 b/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG1
new file mode 100644
index 0000000..af3b067
Binary files /dev/null and b/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG1 differ
diff --git a/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG2 b/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG2
new file mode 100644
index 0000000..15f0a49
Binary files /dev/null and b/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG2 differ
diff --git a/hives_for_tests/NewFlagsHive b/hives_for_tests/NewFlagsHive
new file mode 100644
index 0000000..2d09bd9
Binary files /dev/null and b/hives_for_tests/NewFlagsHive differ
diff --git a/hives_for_tests/OldDirtyHive/OldDirtyHive b/hives_for_tests/OldDirtyHive/OldDirtyHive
new file mode 100644
index 0000000..1c3745f
Binary files /dev/null and b/hives_for_tests/OldDirtyHive/OldDirtyHive differ
diff --git a/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG1 b/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG1
new file mode 100644
index 0000000..a68ef6f
Binary files /dev/null and b/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG1 differ
diff --git a/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG2 b/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG2
new file mode 100644
index 0000000..e69de29
diff --git a/hives_for_tests/OldDirtyHive/RecoveredHive_Windows7 b/hives_for_tests/OldDirtyHive/RecoveredHive_Windows7
new file mode 100644
index 0000000..fafd5aa
Binary files /dev/null and b/hives_for_tests/OldDirtyHive/RecoveredHive_Windows7 differ
diff --git a/hives_for_tests/RemnantsHive b/hives_for_tests/RemnantsHive
new file mode 100644
index 0000000..2c0a541
Binary files /dev/null and b/hives_for_tests/RemnantsHive differ
diff --git a/hives_for_tests/SlackHive b/hives_for_tests/SlackHive
new file mode 100644
index 0000000..9cf34e2
Binary files /dev/null and b/hives_for_tests/SlackHive differ
diff --git a/hives_for_tests/StringValuesHive b/hives_for_tests/StringValuesHive
new file mode 100644
index 0000000..19fe29f
Binary files /dev/null and b/hives_for_tests/StringValuesHive differ
diff --git a/hives_for_tests/TruncatedHive b/hives_for_tests/TruncatedHive
new file mode 100644
index 0000000..5c71d65
Binary files /dev/null and b/hives_for_tests/TruncatedHive differ
diff --git a/hives_for_tests/TruncatedNameHive b/hives_for_tests/TruncatedNameHive
new file mode 100644
index 0000000..bb2d158
Binary files /dev/null and b/hives_for_tests/TruncatedNameHive differ
diff --git a/hives_for_tests/UnicodeHive b/hives_for_tests/UnicodeHive
new file mode 100644
index 0000000..a95c189
Binary files /dev/null and b/hives_for_tests/UnicodeHive differ
diff --git a/hives_for_tests/WrongOrderHive b/hives_for_tests/WrongOrderHive
new file mode 100644
index 0000000..88463c3
Binary files /dev/null and b/hives_for_tests/WrongOrderHive differ
diff --git a/records_for_tests/dummy_db b/records_for_tests/dummy_db
new file mode 100644
index 0000000..980239e
Binary files /dev/null and b/records_for_tests/dummy_db differ
diff --git a/records_for_tests/dummy_lf b/records_for_tests/dummy_lf
new file mode 100644
index 0000000..17ff433
Binary files /dev/null and b/records_for_tests/dummy_lf differ
diff --git a/records_for_tests/dummy_lh b/records_for_tests/dummy_lh
new file mode 100644
index 0000000..26531f6
Binary files /dev/null and b/records_for_tests/dummy_lh differ
diff --git a/records_for_tests/dummy_li b/records_for_tests/dummy_li
new file mode 100644
index 0000000..7fb9ae7
Binary files /dev/null and b/records_for_tests/dummy_li differ
diff --git a/records_for_tests/dummy_list b/records_for_tests/dummy_list
new file mode 100644
index 0000000..6ec4164
Binary files /dev/null and b/records_for_tests/dummy_list differ
diff --git a/records_for_tests/dummy_nk b/records_for_tests/dummy_nk
new file mode 100644
index 0000000..fa137e8
Binary files /dev/null and b/records_for_tests/dummy_nk differ
diff --git a/records_for_tests/dummy_ri b/records_for_tests/dummy_ri
new file mode 100644
index 0000000..a90d6d6
Binary files /dev/null and b/records_for_tests/dummy_ri differ
diff --git a/records_for_tests/dummy_sk b/records_for_tests/dummy_sk
new file mode 100644
index 0000000..1126354
Binary files /dev/null and b/records_for_tests/dummy_sk differ
diff --git a/records_for_tests/dummy_vk b/records_for_tests/dummy_vk
new file mode 100644
index 0000000..3702200
Binary files /dev/null and b/records_for_tests/dummy_vk differ
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..d018eb2
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,19 @@
+from distutils.core import setup
+from yarp import __version__
+
+setup(
+ name = 'yarp',
+ version = __version__,
+ license = 'GPLv3',
+ packages = [ 'yarp' ],
+ scripts = [ 'yarp-carver', 'yarp-print', 'yarp-timeline' ],
+ description = 'Yet another registry parser',
+ author = 'Maxim Suhanov',
+ author_email = 'no.spam.c@mail.ru',
+ classifiers = [
+ 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python :: 3',
+ 'Development Status :: 4 - Beta'
+ ]
+)
diff --git a/test_cases.py b/test_cases.py
new file mode 100644
index 0000000..45e95ca
--- /dev/null
+++ b/test_cases.py
@@ -0,0 +1,1021 @@
+# coding: utf-8
+
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+import pytest
+from io import BytesIO
+from os import path, remove
+from hashlib import md5
+from yarp import Registry, RegistryFile, RegistryRecords, RegistryRecover, RegistryCarve, RegistryHelpers
+
+HIVES_DIR = 'hives_for_tests'
+RECORDS_DIR = 'records_for_tests'
+
+hive_empty = path.join(HIVES_DIR, 'EmptyHive')
+hive_bigdata = path.join(HIVES_DIR, 'BigDataHive')
+hive_many_subkeys = path.join(HIVES_DIR, 'ManySubkeysHive')
+hive_garbage = path.join(HIVES_DIR, 'GarbageHive')
+hive_duplicate_subkeys = path.join(HIVES_DIR, 'DuplicateSubkeysHive')
+
+hive_dirty_new1 = path.join(HIVES_DIR, 'NewDirtyHive1', 'NewDirtyHive')
+hive_dirty_new1_log1 = path.join(HIVES_DIR, 'NewDirtyHive1', 'NewDirtyHive.LOG1')
+hive_dirty_new1_log2 = path.join(HIVES_DIR, 'NewDirtyHive1', 'NewDirtyHive.LOG2')
+hive_dirty_new1_recovered = path.join(HIVES_DIR, 'NewDirtyHive1', 'RecoveredHive_Windows10')
+
+hive_dirty_new2 = path.join(HIVES_DIR, 'NewDirtyHive2', 'NewDirtyHive')
+hive_dirty_new2_log1 = path.join(HIVES_DIR, 'NewDirtyHive2', 'NewDirtyHive.LOG1')
+hive_dirty_new2_log2 = path.join(HIVES_DIR, 'NewDirtyHive2', 'NewDirtyHive.LOG2')
+
+hive_dirty_old = path.join(HIVES_DIR, 'OldDirtyHive', 'OldDirtyHive')
+hive_dirty_old_log = path.join(HIVES_DIR, 'OldDirtyHive', 'OldDirtyHive.LOG1')
+hive_dirty_old_recovered = path.join(HIVES_DIR, 'OldDirtyHive', 'RecoveredHive_Windows7')
+
+hive_unicode = path.join(HIVES_DIR, 'UnicodeHive')
+hive_extended_ascii = path.join(HIVES_DIR, 'ExtendedASCIIHive')
+hive_invalid_parent = path.join(HIVES_DIR, 'InvalidParentHive')
+hive_bad_list = path.join(HIVES_DIR, 'BadListHive')
+hive_bad_subkey = path.join(HIVES_DIR, 'BadSubkeyHive')
+
+hive_bad_baseblock = path.join(HIVES_DIR, 'BadBaseBlockHive', 'BadBaseBlockHive')
+hive_bad_baseblock_log1 = path.join(HIVES_DIR, 'BadBaseBlockHive', 'BadBaseBlockHive.LOG1')
+hive_bad_baseblock_log2 = path.join(HIVES_DIR, 'BadBaseBlockHive', 'BadBaseBlockHive.LOG2')
+
+hive_bad_log1 = path.join(HIVES_DIR, 'BadLogHive1', 'BadLogHive')
+hive_bad_log1_log1 = path.join(HIVES_DIR, 'BadLogHive1', 'BadLogHive.LOG1')
+hive_bad_log1_log2 = path.join(HIVES_DIR, 'BadLogHive1', 'BadLogHive.LOG2')
+
+hive_bad_log2 = path.join(HIVES_DIR, 'BadLogHive2', 'BadLogHive')
+hive_bad_log2_log1 = path.join(HIVES_DIR, 'BadLogHive2', 'BadLogHive.LOG1')
+hive_bad_log2_log2 = path.join(HIVES_DIR, 'BadLogHive2', 'BadLogHive.LOG2')
+
+hive_bad_log3 = path.join(HIVES_DIR, 'BadLogHive3', 'BadLogHive')
+hive_bad_log3_log1 = path.join(HIVES_DIR, 'BadLogHive3', 'BadLogHive.LOG1')
+hive_bad_log3_log2 = path.join(HIVES_DIR, 'BadLogHive3', 'BadLogHive.LOG2')
+
+hive_bogus_keynames = path.join(HIVES_DIR, 'BogusKeyNamesHive')
+hive_new_flags = path.join(HIVES_DIR, 'NewFlagsHive')
+hive_multisz = path.join(HIVES_DIR, 'MultiSzHive')
+hive_strings = path.join(HIVES_DIR, 'StringValuesHive')
+hive_wrong_order = path.join(HIVES_DIR, 'WrongOrderHive')
+hive_truncated_name = path.join(HIVES_DIR, 'TruncatedNameHive')
+hive_healed = path.join(HIVES_DIR, 'HealedHive')
+hive_deleted_data = path.join(HIVES_DIR, 'DeletedDataHive')
+hive_deleted_tree = path.join(HIVES_DIR, 'DeletedTreeHive')
+hive_comp = path.join(HIVES_DIR, 'CompHive')
+hive_remnants = path.join(HIVES_DIR, 'RemnantsHive')
+hive_truncated = path.join(HIVES_DIR, 'TruncatedHive')
+hive_effective_size = path.join(HIVES_DIR, 'EffectiveSizeHive')
+hive_deleted_tree_no_root_flag = path.join(HIVES_DIR, 'DeletedTreeNoRootFlagHive')
+hive_deleted_tree_partial_path = path.join(HIVES_DIR, 'DeletedTreePartialPathHive')
+hive_slack = path.join(HIVES_DIR, 'SlackHive')
+
+hive_carving0 = path.join(HIVES_DIR, 'Carving', '0')
+hive_carving512 = path.join(HIVES_DIR, 'Carving', '512')
+
+log_discovery = [
+ path.join(HIVES_DIR, 'Discovery', '1', 'aa'),
+ path.join(HIVES_DIR, 'Discovery', '2', 'AA'),
+ path.join(HIVES_DIR, 'Discovery', '3', 'AA'),
+ path.join(HIVES_DIR, 'Discovery', '4', 'AA'),
+ path.join(HIVES_DIR, 'Discovery', '5', 'aa')
+]
+
+record_nk = path.join(RECORDS_DIR, 'dummy_nk')
+record_vk = path.join(RECORDS_DIR, 'dummy_vk')
+record_sk = path.join(RECORDS_DIR, 'dummy_sk')
+record_li = path.join(RECORDS_DIR, 'dummy_li')
+record_lf = path.join(RECORDS_DIR, 'dummy_lf')
+record_lh = path.join(RECORDS_DIR, 'dummy_lh')
+record_ri = path.join(RECORDS_DIR, 'dummy_ri')
+record_list = path.join(RECORDS_DIR, 'dummy_list')
+record_db = path.join(RECORDS_DIR, 'dummy_db')
+
+def test_empty():
+ with open(hive_empty, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ assert hive.root_key().subkeys_count() == 0
+ for key in hive.root_key().subkeys():
+ assert False
+
+ assert hive.root_key().path() == ''
+ assert hive.root_key().path(True) != ''
+
+ timestamp = hive.last_written_timestamp()
+ assert timestamp.year == 2017
+ assert timestamp.month == 3
+ assert timestamp.day == 4
+ assert timestamp.hour == 16
+ assert timestamp.minute == 37
+ assert timestamp.second == 31
+
+ timestamp = hive.last_reorganized_timestamp()
+ assert timestamp is None
+
+def test_bigdata():
+ with open(hive_bigdata, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ key = hive.root_key().subkey('key_with_bigdata')
+ assert key.values_count() == 2
+
+ value = key.value()
+ assert hive.registry_file.get_cell(value.key_value.get_data_offset())[ : 2] == b'db'
+
+ data = value.data()
+ assert len(data) == 16345
+ for c in data.decode('windows-1252'):
+ assert c == '1'
+
+ value = key.value('V')
+ assert hive.registry_file.get_cell(value.key_value.get_data_offset())[ : 2] == b'db'
+
+ data = value.data()
+ assert len(data) == 81725
+ for c in data.decode('windows-1252'):
+ assert c == '2'
+
+ assert key.value('dont_exist') is None
+
+def test_many_subkeys():
+ with open(hive_many_subkeys, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ key = hive.find_key('key_with_many_subkeys')
+ assert key.subkeys_count() == 5000
+
+ assert hive.registry_file.get_cell(key.key_node.get_subkeys_list_offset())[ : 2] == b'ri'
+
+ allowed_range = range(1, 5000 + 1)
+ for subkey in key.subkeys():
+ assert int(subkey.name()) in allowed_range
+
+ key = hive.find_key('key_with_MAny_subkeys\\2119\\find_me')
+ assert key.path() == 'key_with_many_subkeys\\2119\\find_me'
+ assert key.path_partial() == key.path()
+
+ key = hive.find_key('\\key_with_maNY_sUBkeys\\2119\\Find_me')
+ assert key.path() == 'key_with_many_subkeys\\2119\\find_me'
+ assert key.path_partial() == key.path()
+
+ key = hive.find_key('key_with_many_subkeys\\2119\\find_me')
+ assert key.path() == 'key_with_many_subkeys\\2119\\find_me'
+ assert key.path_partial() == key.path()
+
+ key = hive.find_key('key_with_many_subkeys\\3000')
+ assert key is not None
+
+ key = hive.find_key('key_with_many_subkeys\\3000\\dont_exist')
+ assert key is None
+
+ key = hive.find_key('key_with_many_subkeys\\dont_exist\\dont_exist')
+ assert key is None
+
+def test_garbage():
+ with open(hive_garbage, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ assert hive.registry_file.baseblock.get_hbins_data_size() == hive.registry_file.baseblock.effective_hbins_data_size == 4096
+
+ cnt = 0
+ for hive_bin in hive.registry_file.hive_bins():
+ cnt += 1
+
+ assert cnt == 1
+
+def test_duplicate_subkeys():
+ with open(hive_duplicate_subkeys, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ with pytest.raises(Registry.WalkException):
+ key = hive.root_key().subkey('key_with_many_subkeys')
+
+ assert key is not None
+
+ for subkey in key.subkeys():
+ pass
+
+@pytest.mark.parametrize('reverse', [False, True])
+def test_dirty_new1(reverse):
+
+ def log_entry_counter():
+ log_entry_counter.c += 1
+
+ with open(hive_dirty_new1, 'rb') as primary, open(hive_dirty_new1_log1, 'rb') as log1, open(hive_dirty_new1_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+
+ key_1 = hive.find_key('Key1')
+ key_21 = hive.find_key('Key2\\Key2_1')
+ key_22 = hive.find_key('Key2\\Key2_2')
+ assert key_1 is not None
+ assert key_21 is not None
+ assert key_22 is not None
+
+ key_bad = hive.find_key('Key2\\Key2_2\\dont_exist')
+ assert key_bad is None
+
+ value = key_1.value()
+ value_data = value.data()
+ assert len(value_data) == 6001
+ for c in value_data[ : -1]:
+ assert c == '1'
+
+ assert value_data[-1] == '\x00'
+
+ assert len(hive.find_key('KEY2').value('v').data()) == 9
+ assert hive.find_key('key2').value('V').data() == 'testTEST\x00'
+
+ assert hive.registry_file.log_apply_count == 0
+
+ hive.log_entry_callback = log_entry_counter
+ log_entry_counter.c = 0
+
+ if not reverse:
+ hive.recover_new(log1, log2)
+ else:
+ hive.recover_new(log2, log1)
+
+ assert log_entry_counter.c == 4
+
+ assert hive.registry_file.log_apply_count == 2
+
+ hive.registry_file.file_object.seek(4096)
+ recovered_data_1 = hive.registry_file.file_object.read()
+ md5_1 = md5(recovered_data_1).hexdigest()
+
+ with open(hive_dirty_new1_recovered, 'rb') as f:
+ f.seek(4096)
+ recovered_data_2 = f.read()
+ md5_2 = md5(recovered_data_2).hexdigest()
+
+ assert md5_1 == md5_2
+
+ key_1 = hive.find_key('Key1')
+ key_21 = hive.find_key('Key2\\Key2_1')
+ key_22 = hive.find_key('key2\\key2_2')
+ assert key_1 is None
+ assert key_21 is None
+ assert key_22 is None
+
+ key_3 = hive.find_key('Key3')
+ key_31 = hive.find_key('Key3\\Key3_1')
+ key_32 = hive.find_key('Key3\\Key3_2')
+ key_33 = hive.find_key('key3\\key3_3')
+ assert key_3 is not None
+ assert key_31 is not None
+ assert key_32 is not None
+ assert key_33 is not None
+
+ key_bad = hive.find_key('Key3\\Key3_2\\dont_exist')
+ assert key_bad is None
+
+ value = key_3.value()
+ value_data = value.data()
+ assert len(value_data) == 1441
+ for c in value_data[ : -1]:
+ assert c == '1'
+
+ assert value_data[-1] == '\x00'
+
+def test_dirty_new2():
+ with open(hive_dirty_new2, 'rb') as primary, open(hive_dirty_new2_log1, 'rb') as log1, open(hive_dirty_new2_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+
+ assert hive.registry_file.baseblock.validate_checksum()
+ assert hive.registry_file.log_apply_count == 0
+ hive.recover_new(log1, log2)
+ assert hive.registry_file.log_apply_count == 1
+ assert hive.registry_file.last_sequence_number == 5
+
+def test_dirty_old():
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+
+ key_1 = hive.find_key('\\key_with_many_subkeys\\1')
+ assert key_1 is not None
+
+ key_5000_1 = hive.find_key('key_with_many_subkeys\\5000\\find_me_in_log')
+ assert key_5000_1 is None
+
+ value_4500 = hive.find_key('key_with_many_subkeys\\4500').value('v')
+ assert value_4500 is None
+
+ hive.recover_old(log)
+
+ key_1 = hive.find_key('\\key_with_many_subkeys\\1')
+ assert key_1 is None
+
+ key_5000_1 = hive.find_key('key_with_many_subkeys\\5000\\find_me_in_log')
+ assert key_5000_1 is not None
+ timestamp_1 = key_5000_1.last_written_timestamp()
+
+ value_4500 = hive.find_key('key_with_many_subkeys\\4500').value('V')
+ assert value_4500 is not None
+
+ assert value_4500.data() == [ 'a\x00', 'bb\x00', 'ccc\x00', '\x00' ]
+
+ with open(hive_dirty_old_recovered, 'rb') as recovered:
+ hive_r = Registry.RegistryHive(recovered)
+
+ key_5000_1_r = hive_r.find_key('key_with_many_subkeys\\5000\\find_me_in_log')
+ timestamp_2 = key_5000_1_r.last_written_timestamp()
+
+ value_4500_r = hive_r.find_key('key_with_many_subkeys\\4500').value('v')
+
+ assert timestamp_1 == timestamp_2
+ assert value_4500.data() == value_4500_r.data()
+
+def test_dirty_old_rollback_changes():
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+
+ hive.recover_old(log)
+ assert hive.registry_file.log_apply_count == 1
+ hive.rollback_changes()
+ assert hive.registry_file.log_apply_count == 0
+
+ key_1 = hive.find_key('\\key_with_many_subkeys\\1')
+ assert key_1 is not None
+
+ key_5000_1 = hive.find_key('key_with_many_subkeys\\5000\\find_me_in_log')
+ assert key_5000_1 is None
+
+ value_4500 = hive.find_key('key_with_many_subkeys\\4500').value('v')
+ assert value_4500 is None
+
+def test_recovery_not_required():
+ with open(hive_dirty_old_recovered, 'rb') as recovered:
+ hive = Registry.RegistryHive(recovered)
+ dummy = BytesIO()
+
+ with pytest.raises(RegistryFile.RecoveryException):
+ hive.recover_old(dummy)
+
+ with pytest.raises(RegistryFile.RecoveryException):
+ hive.recover_new(dummy)
+
+def test_unicode():
+ with open(hive_unicode, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ key = hive.find_key(u'ПриВет\\КлюЧ')
+ assert key is not None
+ assert key.path() == u'Привет\\Ключ'
+ assert key.path_partial() == key.path()
+
+ key = hive.find_key(u'\\ПриВет\\КлюЧ')
+ assert key is not None
+ assert key.path() == u'Привет\\Ключ'
+ assert key.path_partial() == key.path()
+
+ key = hive.find_key(u'привет')
+ assert key is not None
+ assert key.path().lower() == u'привет'
+ assert key.path_partial() == key.path()
+
+ key = hive.find_key(u'\\привеТ')
+ assert key is not None
+ assert key.path() == u'Привет'
+ assert key.path_partial() == key.path()
+
+def test_extended_ascii():
+ with open(hive_extended_ascii, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ key = hive.find_key(u'ëigenaardig')
+ assert key is not None
+ assert key.key_node.get_flags() & RegistryRecords.KEY_COMP_NAME > 0
+ assert key.path() == u'ëigenaardig'
+ assert key.path_partial() == key.path()
+
+ value = key.value(u'ëigenaardig')
+ assert value.key_value.get_flags() & RegistryRecords.VALUE_COMP_NAME > 0
+ assert value.data() == u'ëigenaardig\x00'
+
+def test_autorecovery():
+ def convert_tuple(t):
+ assert t.recovered
+ file_objects = t.file_objects
+ assert len(file_objects) < 3 and len(file_objects) > 0
+ if len(file_objects) == 1:
+ return (t.is_new_log, t.file_objects[0])
+ else:
+ return (t.is_new_log, t.file_objects[0], t.file_objects[1])
+
+ dummy = BytesIO()
+
+ with open(hive_dirty_new1, 'rb') as primary, open(hive_dirty_new1_log1, 'rb') as log1, open(hive_dirty_new1_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(dummy, log1, log2)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 2
+ assert len(t) == 3
+ assert t[0]
+ assert t[1] == log1 and t[2] == log2
+
+ with open(hive_dirty_new1, 'rb') as primary, open(hive_dirty_new1_log1, 'rb') as log1, open(hive_dirty_new1_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(None, log1, log2)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 2
+ assert len(t) == 3
+ assert t[0]
+ assert t[1] == log1 and t[2] == log2
+
+ with open(hive_dirty_new2, 'rb') as primary, open(hive_dirty_new2_log1, 'rb') as log1, open(hive_dirty_new2_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(dummy, log1, log2)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 3
+ assert t[0]
+ assert t[1] == log1 and t[2] == log2
+
+ with open(hive_dirty_new2, 'rb') as primary, open(hive_dirty_new2_log1, 'rb') as log1, open(hive_dirty_new2_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(None, log1, log2)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 3
+ assert t[0]
+ assert t[1] == log1 and t[2] == log2
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(log, dummy, dummy)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 2
+ assert not t[0]
+ assert t[1] == log
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(dummy, log, dummy)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 2
+ assert not t[0]
+ assert t[1] == log
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(dummy, dummy, log)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 2
+ assert not t[0]
+ assert t[1] == log
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(dummy, log, log)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 2
+ assert not t[0]
+ assert t[1] == log
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(log, log, log)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 2
+ assert not t[0]
+ assert t[1] == log
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(None, dummy, log)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 2
+ assert not t[0]
+ assert t[1] == log
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ t = hive.recover_auto(log, None, None)
+ t = convert_tuple(t)
+ assert hive.registry_file.log_apply_count == 1
+ assert len(t) == 2
+ assert not t[0]
+ assert t[1] == log
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.AutoRecoveryException):
+ hive.recover_auto(dummy, dummy, dummy)
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.AutoRecoveryException):
+ hive.recover_auto(dummy, log, None)
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.AutoRecoveryException):
+ hive.recover_auto(dummy, None, log)
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.AutoRecoveryException):
+ hive.recover_auto(None, None, log)
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.AutoRecoveryException):
+ hive.recover_auto(log, None, log)
+
+def test_invalid_parent():
+ with open(hive_invalid_parent, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.WalkException):
+ for subkey_1 in hive.root_key().subkeys():
+ for subkey_2 in subkey_1.subkeys():
+ pass
+def test_bad_list():
+ with open(hive_bad_list, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.WalkException):
+ for subkey_1 in hive.root_key().subkeys():
+ for subkey_2 in subkey_1.subkeys():
+ pass
+
+def test_bad_subkey():
+ with open(hive_bad_subkey, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.WalkException):
+ for subkey_1 in hive.root_key().subkeys():
+ for subkey_2 in subkey_1.subkeys():
+ pass
+
+def test_access_bits():
+ with open(hive_dirty_new1, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+ key = hive.find_key('\\key2\\key2_2')
+ assert key.access_bits() == 2
+
+def test_bad_baseblock():
+ with open(hive_bad_baseblock, 'rb') as primary, open(hive_bad_baseblock_log1, 'rb') as log1, open(hive_bad_baseblock_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+
+ assert hive.registry_file.log_apply_count == 0
+ assert hive.registry_file.baseblock.effective_version == 1
+
+ with pytest.raises(RegistryFile.CellOffsetException):
+ hive.find_key('key_with_many_subkeys')
+
+ t = hive.recover_auto(None, log1, log2)
+
+ assert hive.registry_file.log_apply_count == 1
+ assert hive.registry_file.baseblock.effective_version == 3
+ assert not t.is_new_log
+ assert t.file_objects == [log1]
+ assert hive.find_key('key_with_many_subkeys') is not None
+
+def test_bad_log1():
+ with open(hive_bad_log1, 'rb') as primary, open(hive_bad_log1_log1, 'rb') as log1, open(hive_bad_log1_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.AutoRecoveryException):
+ hive.recover_auto(None, log1, log2)
+def test_bad_log2():
+ with open(hive_bad_log2, 'rb') as primary, open(hive_bad_log2_log1, 'rb') as log1, open(hive_bad_log2_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.AutoRecoveryException):
+ hive.recover_auto(None, log1, log2)
+
+def test_bad_log3():
+ with open(hive_bad_log3, 'rb') as primary, open(hive_bad_log3_log1, 'rb') as log1, open(hive_bad_log3_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(Registry.AutoRecoveryException):
+ hive.recover_auto(None, log1, log2)
+def test_writable():
+ with open(hive_empty, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+
+ assert not hive.registry_file.writable
+ hive.registry_file.create_writable_file_object()
+ assert hive.registry_file.writable
+ hive.registry_file.discard_writable_file_object()
+
+ assert not hive.registry_file.writable
+ hive.registry_file.create_writable_file_object()
+ assert hive.registry_file.writable
+ hive.registry_file.create_writable_file_object()
+ assert hive.registry_file.writable
+ hive.registry_file.discard_writable_file_object()
+ assert not hive.registry_file.writable
+ hive.registry_file.discard_writable_file_object()
+ hive.registry_file.discard_writable_file_object()
+ hive.registry_file.discard_writable_file_object()
+ hive.registry_file.discard_writable_file_object()
+ assert not hive.registry_file.writable
+
+def test_bogus_keynames():
+ with open(hive_bogus_keynames, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+ for k in hive.root_key().subkeys():
+ assert k.name() == 'testnew\r\nne' or k.name() == 'testnu\x00l'
+
+ assert hive.find_key('testnew\r\nne') is not None
+ assert hive.find_key('testnu\x00l') is not None
+
+def test_new_flags():
+ with open(hive_new_flags, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+
+ key_1 = hive.find_key('1')
+ assert key_1 is not None
+ key_2 = hive.find_key('1\\2')
+ assert key_2 is not None
+
+ assert key_1.key_node.get_virtualization_control_flags() == 0
+ assert key_1.key_node.get_user_flags_new() == 0
+ assert key_2.key_node.get_virtualization_control_flags() == 0
+ assert key_2.key_node.get_user_flags_new() == RegistryRecords.KEY_FLAG_32BIT
+ assert key_2.key_node.get_user_flags_old() == 0
+
+def test_multisz():
+ with open(hive_multisz, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+ key = hive.find_key('key')
+ value_1 = key.value('1')
+ value_2 = key.value('2')
+ assert key.value() is None
+
+ assert value_1.data() == []
+ l = value_2.data()
+ assert len(l) == 3 and l[0] == u'привет\x00' and l[1] == u'как дела?\x00' and l[2] == '\x00'
+
+def test_strings():
+ with open(hive_strings, 'rb') as primary:
+ hive = Registry.RegistryHive(primary)
+ key = hive.find_key('key')
+
+ assert key.value().data() == u'test тест\x00'
+ assert key.value('1').data() == b'test'
+ assert key.value('2').data() == u'test тест\x00'
+ assert key.value('3').data() == u'test тест \x00'
+
+def test_unicode_garbage():
+ s = b'a\x00b\x00\x00\x00c\x00d\x00'
+ assert Registry.DecodeUnicode(s, True) == u'ab\x00'
+ assert Registry.DecodeUnicode(s, False) == u'ab\x00cd'
+
+ s = b'a\x00b\x00\x00\x00c\x00d\x00e'
+ assert Registry.DecodeUnicode(s, True) == u'ab\x00'
+ with pytest.raises(UnicodeDecodeError):
+ Registry.DecodeUnicode(s, False)
+
+ s = b'a\x00\x00\x00b\x00\x00\x00\x00\x00'
+ assert Registry.DecodeUnicodeMulti(s, True) == u'a\x00b\x00\x00'
+
+def test_security():
+ with open(hive_unicode, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+ sec = hive.root_key().security()
+ assert len(sec.descriptor()) == 144
+
+def test_wrong_order():
+ with open(hive_wrong_order, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ c = 0
+ with pytest.raises(Registry.WalkException):
+ for subkey in hive.find_key('1').subkeys():
+ c += 1
+
+ assert c == 1
+
+ with pytest.raises(Registry.WalkException):
+ for subkey in hive.find_key('2').subkeys():
+ c += 1
+
+ assert c == 4
+
+def test_truncated_name():
+ with open(hive_truncated_name, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ with pytest.raises(RegistryRecords.ParseException):
+ for subkey in hive.root_key().subkeys():
+ pass
+
+@pytest.mark.parametrize('walk_everywhere', [True, False])
+def test_unreferenced(walk_everywhere):
+ with open(hive_healed, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ if walk_everywhere:
+ hive.walk_everywhere()
+ assert len(hive.registry_file.cell_map_allocated - hive.registry_file.cell_map_referenced) == 5
+ else:
+ hive.registry_file.build_map_free()
+ assert len(hive.registry_file.cell_map_referenced) == 0
+ assert len(hive.registry_file.cell_map_free) == len(hive.registry_file.cell_map_unallocated)
+
+ with open(hive_bigdata, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ if walk_everywhere:
+ hive.walk_everywhere()
+ assert len(hive.registry_file.cell_map_allocated - hive.registry_file.cell_map_referenced) == 0
+ else:
+ hive.registry_file.build_map_free()
+ assert len(hive.registry_file.cell_map_referenced) == 0
+ assert len(hive.registry_file.cell_map_free) == len(hive.registry_file.cell_map_unallocated)
+
+def test_deleted():
+ with open(hive_deleted_data, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ hive.walk_everywhere()
+
+ scanner = RegistryRecover.Scanner(hive)
+ cnt_key_values = 0
+ cnt_key_nodes = 0
+ for i in scanner.scan():
+ if type(i) is Registry.RegistryValue:
+ cnt_key_values += 1
+
+ assert i.type_raw() == RegistryRecords.REG_SZ
+
+ if i.name() == 'v2':
+ assert i.data() == '456\x00'
+ elif i.name() == 'v':
+ assert i.data() == '123456\x00'
+ else:
+ assert False
+
+ elif type(i) is Registry.RegistryKey:
+ cnt_key_nodes += 1
+
+ assert i.name() == '456'
+
+ c = 0
+ for v in i.values():
+ c += 1
+ assert v.name() == 'v'
+ assert v.type_raw() == RegistryRecords.REG_SZ
+ assert v.data() == '123456\x00'
+
+ assert c == 1
+
+ assert cnt_key_values == 2
+ assert cnt_key_nodes == 1
+
+ with open(hive_deleted_tree, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ hive.walk_everywhere()
+
+ scanner = RegistryRecover.Scanner(hive)
+ for i in scanner.scan():
+ assert type(i) is Registry.RegistryKey
+ assert i.path() in [ '1\\2\\3', '1\\2\\3\\4', '1\\2\\3\\4\\5', '1\\2\\3\\4\\New Key #1' ]
+ assert i.path_partial() == i.path()
+
+ with open(hive_healed, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ hive.walk_everywhere()
+
+ scanner = RegistryRecover.Scanner(hive)
+ for i in scanner.scan():
+ if type(i) is Registry.RegistryKey:
+ assert i.name() == 'cccc'
+ for v in i.values():
+ assert v.name() == '123'
+ assert v.type_raw() == RegistryRecords.REG_SZ
+ assert v.data() == 'test\x00'
+
+ elif type(i) is Registry.RegistryValue:
+ assert i.name() == '123'
+ assert i.type_raw() == RegistryRecords.REG_SZ
+ assert i.data() == 'test\x00'
+
+def test_comp():
+ with open(hive_comp, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+ hive.walk_everywhere()
+
+def test_carving():
+ with open(hive_carving0, 'rb') as f:
+ carver = RegistryCarve.Carver(f)
+ for i in carver.carve():
+ assert i.offset == 0
+ assert i.size == 8192
+ assert not i.truncated
+ assert i.truncation_scenario == 0
+
+ with open(hive_carving512, 'rb') as f:
+ carver = RegistryCarve.Carver(f)
+ for i in carver.carve():
+ assert i.offset == 512
+ assert i.size == 8192
+ assert not i.truncated
+ assert i.truncation_scenario == 0
+
+def test_remnants():
+ with open(hive_remnants, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ hive.walk_everywhere()
+
+ scanner = RegistryRecover.Scanner(hive)
+
+ c = 0
+ for i in scanner.scan():
+ assert type(i) is Registry.RegistryValue
+ assert i.name() == ''
+ assert i.type_raw() == RegistryRecords.REG_DWORD
+ assert i.data() == 1
+ c += 1
+
+ assert c == 1
+
+def test_truncated():
+ with open(hive_truncated, 'rb') as f:
+ hive = Registry.RegistryHiveTruncated(f)
+
+ for i in hive.scan():
+ assert type(i) is Registry.RegistryKey
+ assert i.name() in [ '{6214ff27-7b1b-41a3-9ae4-5fb851ffed63}', 'key_with_many_subkeys' ] or int(i.name()) > 0
+
+def test_effective_hbins_data_size():
+ with open(hive_effective_size, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ assert hive.registry_file.baseblock.effective_hbins_data_size == 487424
+ assert hive.registry_file.baseblock.get_hbins_data_size() != hive.registry_file.baseblock.effective_hbins_data_size
+
+def test_log_discovery():
+ for i in range(len(log_discovery)):
+ p = log_discovery[i]
+ a = RegistryHelpers.DiscoverLogFiles(p)
+
+ assert a is not None
+
+ if i == 0:
+ assert path.normcase(path.basename(a.log_path)) == path.normcase('aa.LOG')
+ assert path.normcase(path.basename(a.log1_path)) == path.normcase('aa.LOG1')
+ assert path.normcase(path.basename(a.log2_path)) == path.normcase('aa.LOG2')
+ elif i == 1:
+ assert a.log_path is None
+ assert path.normcase(path.basename(a.log1_path)) == path.normcase('aa.LOG1')
+ assert path.normcase(path.basename(a.log2_path)) == path.normcase('aa.LOG2')
+ elif i == 2:
+ assert path.normcase(path.basename(a.log_path)) == path.normcase('aa.log')
+ assert path.normcase(path.basename(a.log1_path)) == path.normcase('aa.log1')
+ assert a.log2_path is None
+ elif i == 3:
+ assert path.normcase(path.basename(a.log_path)) == path.normcase('aa.LOG')
+
+ # These properties should be None if the file system is case-sensitive.
+ assert a.log1_path is None or path.normcase(path.basename(a.log1_path)) == path.normcase('aa.log1')
+ assert a.log2_path is None or path.normcase(path.basename(a.log2_path)) == path.normcase('aa.log2')
+ elif i == 4:
+ assert a.log_path is None
+ assert a.log1_path is None
+ assert a.log2_path is None
+ else:
+ assert False
+
+def test_deleted_tree_no_root_flag():
+ with open(hive_deleted_tree_no_root_flag, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ assert hive.root_key().key_node.get_flags() & RegistryRecords.KEY_HIVE_ENTRY == 0
+ hive.walk_everywhere()
+
+ scanner = RegistryRecover.Scanner(hive)
+ for i in scanner.scan():
+ assert type(i) is Registry.RegistryKey
+ assert i.path() in [ '1\\2\\3', '1\\2\\3\\4', '1\\2\\3\\4\\5', '1\\2\\3\\4\\New Key #1' ]
+ assert i.path_partial() == i.path()
+
+def test_deleted_tree_partial_path():
+ with open(hive_deleted_tree_partial_path, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ hive.walk_everywhere()
+
+ scanner = RegistryRecover.Scanner(hive)
+ for i in scanner.scan():
+ assert type(i) is Registry.RegistryKey
+ assert i.path_partial() in [ '3', '3\\4', '3\\4\\5', '3\\4\\New Key #1' ]
+
+def test_flags_converter():
+ cases = [
+ {'log_entry_flags': 0, 'baseblock_flags': 0, 'result': 0},
+ {'log_entry_flags': 1, 'baseblock_flags': 0, 'result': 1},
+ {'log_entry_flags': 1, 'baseblock_flags': 1, 'result': 1},
+ {'log_entry_flags': 0, 'baseblock_flags': 1, 'result': 0},
+ {'log_entry_flags': 0, 'baseblock_flags': 3, 'result': 2},
+ {'log_entry_flags': 1, 'baseblock_flags': 3, 'result': 3},
+ {'log_entry_flags': 1, 'baseblock_flags': 2, 'result': 3}
+ ]
+
+ for case in cases:
+ assert RegistryFile.LogEntryFlagsToBaseBlockFlags(case['log_entry_flags'], case['baseblock_flags']) == case['result']
+
+def test_hive_save():
+ def check_saved_hive(filepath):
+ with open(filepath, 'rb') as recovered:
+ hive_recovered = Registry.RegistryHive(recovered)
+ assert not hive_recovered.registry_file.baseblock.is_file_dirty
+ hive_recovered.walk_everywhere()
+
+ tmp_file = path.join(HIVES_DIR, 'temphive_delete_me')
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ with pytest.raises(RegistryFile.NotSupportedException):
+ hive.registry_file.save_recovered_hive(tmp_file)
+
+ with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log:
+ hive = Registry.RegistryHive(primary)
+ hive.recover_old(log)
+ hive.registry_file.save_recovered_hive(tmp_file)
+ check_saved_hive(tmp_file)
+
+ with open(hive_dirty_new1, 'rb') as primary, open(hive_dirty_new1_log1, 'rb') as log1, open(hive_dirty_new1_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ hive.recover_new(log1, log2)
+ hive.registry_file.save_recovered_hive(tmp_file)
+ check_saved_hive(tmp_file)
+
+ with open(hive_dirty_new2, 'rb') as primary, open(hive_dirty_new2_log1, 'rb') as log1, open(hive_dirty_new2_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ hive.recover_new(log1, log2)
+ hive.registry_file.save_recovered_hive(tmp_file)
+ check_saved_hive(tmp_file)
+
+ with open(hive_bad_baseblock, 'rb') as primary, open(hive_bad_baseblock_log1, 'rb') as log1, open(hive_bad_baseblock_log2, 'rb') as log2:
+ hive = Registry.RegistryHive(primary)
+ hive.recover_auto(None, log1, log2)
+ hive.registry_file.save_recovered_hive(tmp_file)
+ check_saved_hive(tmp_file)
+
+ remove(tmp_file)
+
+def test_slack():
+ with open(record_nk, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.KeyNode(buf)
+ assert r.get_slack() == b'SLCK'
+
+ with open(record_vk, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.KeyValue(buf)
+ assert r.get_slack() == b'SLCK'
+
+ with open(record_sk, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.KeySecurity(buf)
+ assert r.get_slack() == b'SLCK'
+
+ with open(record_li, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.IndexLeaf(buf)
+ assert r.get_slack() == b'SLCK'
+
+ with open(record_lh, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.HashLeaf(buf)
+ assert r.get_slack() == b'SLCK'
+
+ with open(record_lf, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.FastLeaf(buf)
+ assert r.get_slack() == b'SLCK'
+
+ with open(record_ri, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.IndexRoot(buf)
+ assert r.get_slack() == b'SLCK'
+
+ with open(record_list, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.KeyValuesList(buf, 3)
+ assert r.get_slack() == b'SLCK'
+
+ r = RegistryRecords.SegmentsList(buf, 3)
+ assert r.get_slack() == b'SLCK'
+
+ with open(record_db, 'rb') as f:
+ buf = f.read()
+ r = RegistryRecords.BigData(buf)
+ assert r.get_slack() == b'SLCK'
+
+def test_hive_slack():
+ with open(hive_slack, 'rb') as f:
+ hive = Registry.RegistryHive(f)
+
+ assert len(hive.effective_slack) == 0
+ hive.walk_everywhere()
+ assert len(hive.effective_slack) > 0
+ assert b'SLCK' in hive.effective_slack
diff --git a/yarp-carver b/yarp-carver
new file mode 100755
index 0000000..e785cc3
--- /dev/null
+++ b/yarp-carver
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from yarp import RegistryCarve
+import argparse
+from collections import namedtuple
+import os
+import sys
+
+PROGRAM_NAME = 'yarp-carver'
+PROGRAM_VERSION = '1.0.0-beta1'
+
+Arguments = namedtuple('Arguments', [ 'source_file', 'output_dir' ])
+
+def parse_args():
+ """Parse command line arguments and return a named tuple (Arguments)."""
+
+ parser = argparse.ArgumentParser(prog = PROGRAM_NAME, description = 'Carve Windows registry files from a disk image (or a similar source).', add_help = False, prefix_chars = '-')
+
+ group_main = parser.add_argument_group('Main arguments')
+ group_misc = parser.add_argument_group('Miscellaneous arguments')
+
+ group_main.add_argument('file', help = 'a disk image')
+ group_main.add_argument('outdir', help = 'an output directory')
+
+ group_misc.add_argument('--help', action = 'help', help = 'show this help message and exit')
+ group_misc.add_argument('--version', action = 'version', help = 'show the version number and exit', version = PROGRAM_VERSION)
+
+ parsed_args = parser.parse_args()
+
+ source_file = parsed_args.file
+ output_dir = parsed_args.outdir
+
+ return Arguments(source_file = source_file, output_dir = output_dir)
+
+def make_sane_filename(filename):
+ for bad_char in [ '\x00', '/', '\\', ':' ]:
+ filename = filename.replace(bad_char, '')
+
+ if filename == '':
+ filename = 'unknown'
+
+ return filename
+
+args = parse_args()
+
+if not os.path.isdir(args.output_dir):
+ print('Output directory does not exist: {}'.format(args.output_dir), file = sys.stderr)
+ sys.exit(255)
+
+try:
+ f = open(args.source_file, 'rb')
+except (OSError, IOError):
+ print('Source file cannot be opened: {}'.format(args.source_file), file = sys.stderr)
+ sys.exit(255)
+
+carver = RegistryCarve.Carver(f)
+print('Offset\tSize\tTruncated\tFile name')
+for carve_result in carver.carve():
+ print('{}\t{}\t{}\t{}'.format(carve_result.offset, carve_result.size, carve_result.truncated, carve_result.filename))
+
+ regf_filename = carve_result.filename
+ if regf_filename.rfind('\\') != -1:
+ regf_filename = regf_filename.split('\\')[-1]
+ regf_filename = make_sane_filename(regf_filename)
+
+ if carve_result.truncated:
+ output_filename = '{}_{}-truncated'.format(carve_result.offset, regf_filename)
+ else:
+ output_filename = '{}_{}'.format(carve_result.offset, regf_filename)
+
+ output_file = os.path.join(args.output_dir, output_filename)
+
+ with open(output_file, 'wb') as out_f:
+ f.seek(carve_result.offset)
+ buf = f.read(carve_result.size)
+ out_f.write(buf)
+
+f.close()
diff --git a/yarp-print b/yarp-print
new file mode 100755
index 0000000..a5f6240
--- /dev/null
+++ b/yarp-print
@@ -0,0 +1,304 @@
+#!/usr/bin/env python3
+
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from yarp import *
+import argparse
+from collections import namedtuple
+import os
+import sys
+
+PROGRAM_NAME = 'yarp-print'
+PROGRAM_VERSION = '1.0.0-beta1'
+
+Arguments = namedtuple('Arguments', [ 'primary_file', 'do_recovery', 'do_deleted' ])
+
+def parse_args():
+ """Parse command line arguments and return a named tuple (Arguments)."""
+
+ parser = argparse.ArgumentParser(prog = PROGRAM_NAME, description = 'Parse a Windows registry file, print all keys and values.', add_help = False, prefix_chars = '-')
+
+ group_main = parser.add_argument_group('Main arguments')
+ group_opt = parser.add_argument_group('Optional arguments')
+ group_misc = parser.add_argument_group('Miscellaneous arguments')
+
+ group_main.add_argument('file', help = 'a registry file (primary) to parse')
+ group_opt.add_argument('--no-recovery', action = 'store_true', help = 'do not discover and use transaction log files to recover the hive (in memory)')
+ group_opt.add_argument('--deleted', action = 'store_true', help = 'include deleted keys and values to the output')
+
+ group_misc.add_argument('--help', action = 'help', help = 'show this help message and exit')
+ group_misc.add_argument('--version', action = 'version', help = 'show the version number and exit', version = PROGRAM_VERSION)
+
+ parsed_args = parser.parse_args()
+
+ primary_file = parsed_args.file
+ do_recovery = not parsed_args.no_recovery
+ do_deleted = parsed_args.deleted
+
+ return Arguments(primary_file = primary_file, do_recovery = do_recovery, do_deleted = do_deleted)
+
+def print_hive_information(hive):
+ print('Last written timestamp (UTC): {}'.format(hive.last_written_timestamp()))
+ try:
+ print('Last reorganized timestamp (UTC): {}'.format(hive.last_reorganized_timestamp()))
+ except (ValueError, OverflowError):
+ pass
+
+ print()
+
+def print_value(value):
+ value_name = value.name()
+ if value_name == '':
+ print('Default value')
+ else:
+ print('Value name: {}'.format(value_name))
+
+ print('Value type: {}'.format(value.type_str()))
+ print('Data size: {}'.format(value.data_size()))
+
+ try:
+ data = value.data()
+ except UnicodeDecodeError:
+ data = value.data_raw()
+
+ if type(data) is bytes:
+ print('Data (hexdump):')
+ print(RegistryHelpers.HexDump(data))
+ elif type(data) is list:
+ print('Data (one list element per line):')
+ for element in data:
+ print(element)
+ else:
+ print('Data (decoded):')
+ print(data)
+
+ print()
+
+def print_key(key):
+ key_path = key.path()
+ if key_path == '':
+ print('Root key')
+ else:
+ print('Key path: {}'.format(key_path))
+
+ classname = key.classname()
+ if classname is not None:
+ print('Class name: {}'.format(classname))
+
+ print('Last written timestamp (UTC): {}'.format(key.last_written_timestamp()))
+ print('Access bits: {}'.format(key.access_bits()))
+
+ print()
+
+ for value in key.values():
+ print_value(value)
+
+ print('---')
+ print()
+
+def print_key_recursive(key):
+ print_key(key)
+
+ for subkey in key.subkeys():
+ print_key_recursive(subkey)
+
+def print_deleted_value(value):
+ value_name = value.name()
+ if value_name == '':
+ print('Default value')
+ else:
+ print('Value name: {}'.format(value_name))
+
+ print('Value type: {}'.format(value.type_str()))
+ print('Data size: {}'.format(value.data_size()))
+
+ try:
+ data = value.data()
+ except Registry.RegistryException:
+ data = None
+ except UnicodeDecodeError:
+ data = value.data_raw()
+
+ if data is None:
+ print('Data not recovered')
+ else:
+ if type(data) is bytes:
+ print('Data (hexdump):')
+ print(RegistryHelpers.HexDump(data))
+ elif type(data) is list:
+ print('Data (one list element per line):')
+ for element in data:
+ print(element)
+ else:
+ print('Data (decoded):')
+ print(data)
+
+ print()
+
+def print_deleted_key(key):
+ try:
+ key_path = key.path()
+ except Registry.RegistryException:
+ key_path = None
+
+ if key_path is None:
+ print('Unknown key path')
+ print('Partial key path: {}'.format(key.path_partial()))
+ print('Key name: {}'.format(key.name()))
+ else:
+ if key_path == '':
+ print('Root key')
+ else:
+ print('Key path: {}'.format(key_path))
+
+ try:
+ classname = key.classname()
+ except (Registry.RegistryException, UnicodeDecodeError):
+ classname = None
+
+ if classname is not None:
+ print('Class name: {}'.format(classname))
+
+ try:
+ print('Last written timestamp (UTC): {}'.format(key.last_written_timestamp()))
+ except (ValueError, OverflowError):
+ print('Last written timestamp is not plausible')
+
+ print('Access bits: {}'.format(key.access_bits()))
+
+ print()
+
+ try:
+ for value in key.values():
+ print_deleted_value(value)
+ except Registry.RegistryException:
+ pass
+
+ print('---')
+ print()
+
+# Currently, we can use functions for deleted keys and values to print keys and values in a truncated hive.
+print_truncated_key = print_deleted_key
+print_truncated_value = print_deleted_value
+
+args = parse_args()
+
+if not os.path.isfile(args.primary_file):
+ print('Primary file does not exist: {}'.format(args.primary_file), file = sys.stderr)
+ sys.exit(255)
+
+primary = open(args.primary_file, 'rb')
+
+try:
+ hive = Registry.RegistryHive(primary)
+except (RegistryFile.BaseBlockException, RegistryFile.NotSupportedException):
+ raise
+except Registry.RegistryException:
+ truncated = True
+else:
+ truncated = False
+
+if truncated:
+ print('Primary file seems to be truncated, only available keys and values will be printed', file = sys.stderr)
+ hive = Registry.RegistryHiveTruncated(primary)
+
+ print('Hive information:')
+ print()
+ print_hive_information(hive)
+
+ print('Keys and values (allocated):')
+ print()
+
+ all_values = []
+ for item in hive.scan():
+ if type(item) is Registry.RegistryValue:
+ all_values.append(item)
+ elif type(item) is Registry.RegistryKey:
+ print_truncated_key(item)
+
+ print('All values (allocated):')
+ print()
+ for value in all_values:
+ print_truncated_value(value)
+
+ if args.do_deleted:
+ print('Unallocated keys and values (may contain reallocated data):')
+ print()
+
+ scanner = RegistryRecover.Scanner(hive, False)
+ deleted_values = []
+
+ for item in scanner.scan():
+ if type(item) is Registry.RegistryKey:
+ print_deleted_key(item)
+ elif type(item) is Registry.RegistryValue:
+ deleted_values.append(item)
+
+ print('Unallocated values (all, may contain reallocated data):')
+ print()
+ for value in deleted_values:
+ print_deleted_value(value)
+
+ sys.exit(0)
+
+if args.do_recovery:
+ log_files = RegistryHelpers.DiscoverLogFiles(args.primary_file)
+
+ log = None
+ if log_files.log_path is not None:
+ log = open(log_files.log_path, 'rb')
+
+ log1 = None
+ if log_files.log1_path is not None:
+ log1 = open(log_files.log1_path, 'rb')
+
+ log2 = None
+ if log_files.log2_path is not None:
+ log2 = open(log_files.log2_path, 'rb')
+
+ try:
+ recovery_result = hive.recover_auto(log, log1, log2)
+ except Registry.AutoRecoveryException:
+ print('An error has occurred when recovering a hive using a transaction log', file = sys.stderr)
+
+hive.walk_everywhere()
+
+print('Hive information:')
+print()
+print_hive_information(hive)
+
+print('Keys and values:')
+print()
+print_key_recursive(hive.root_key())
+
+if args.do_deleted:
+ print('Deleted keys and values (may contain reallocated data):')
+ print()
+
+ scanner = RegistryRecover.Scanner(hive)
+ deleted_values = []
+
+ for item in scanner.scan():
+ if type(item) is Registry.RegistryKey:
+ print_deleted_key(item)
+ elif type(item) is Registry.RegistryValue:
+ deleted_values.append(item)
+
+ print('Deleted values (all, may contain reallocated data):')
+ print()
+ for value in deleted_values:
+ print_deleted_value(value)
+
+hive = None
+primary.close()
+
+if args.do_recovery:
+ if log is not None:
+ log.close()
+
+ if log1 is not None:
+ log1.close()
+
+ if log2 is not None:
+ log2.close()
diff --git a/yarp-timeline b/yarp-timeline
new file mode 100755
index 0000000..f74b231
--- /dev/null
+++ b/yarp-timeline
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from yarp import *
+import argparse
+from collections import namedtuple
+import os
+import sys
+
+PROGRAM_NAME = 'yarp-timeline'
+PROGRAM_VERSION = '1.0.0-beta1'
+
+Arguments = namedtuple('Arguments', [ 'primary_file', 'faster' ])
+TimelineEntry = namedtuple('TimelineEntry', [ 'path_or_name', 'is_deleted', 'is_path_known', 'timestamp' ])
+
+def parse_args():
+ """Parse command line arguments and return a named tuple (Arguments)."""
+
+ parser = argparse.ArgumentParser(prog = PROGRAM_NAME, description = 'Parse a Windows registry file, print the timeline for keys (including deleted ones).', add_help = False, prefix_chars = '-')
+
+ group_main = parser.add_argument_group('Main arguments')
+ group_opt = parser.add_argument_group('Optional arguments')
+ group_misc = parser.add_argument_group('Miscellaneous arguments')
+
+ group_main.add_argument('file', help = 'a registry file (primary) to parse')
+
+ group_opt.add_argument('--fast', action = 'store_true', help = 'do not use intermediate states to extend the timeline when applying a transaction log (new format)')
+
+ group_misc.add_argument('--help', action = 'help', help = 'show this help message and exit')
+ group_misc.add_argument('--version', action = 'version', help = 'show the version number and exit', version = PROGRAM_VERSION)
+
+ parsed_args = parser.parse_args()
+
+ primary_file = parsed_args.file
+ faster = parsed_args.fast
+
+ return Arguments(primary_file = primary_file, faster = faster)
+
+keys_list = []
+def extend_keys_list(do_deleted = False):
+ def process_key(key):
+ global keys_list
+
+ key_parsed = parse_key(key, False)
+ if key_parsed is not None and key_parsed not in keys_list:
+ keys_list.append(key_parsed)
+
+ for subkey in key.subkeys():
+ try:
+ process_key(subkey)
+ except Registry.RegistryException:
+ pass
+
+ global hive
+
+ process_key(hive.root_key())
+
+ if do_deleted:
+ global keys_list
+
+ try:
+ hive.walk_everywhere()
+ except Registry.RegistryException:
+ return
+
+ scanner = RegistryRecover.Scanner(hive)
+ for item in scanner.scan():
+ if type(item) is Registry.RegistryKey:
+ key_parsed = parse_key(item, True)
+ if key_parsed is not None and key_parsed not in keys_list:
+ keys_list.append(key_parsed)
+
+def parse_key(key, is_deleted):
+ try:
+ path_or_name = key.path()
+ except Registry.RegistryException:
+ path_or_name = key.name()
+ is_path_known = False
+ else:
+ is_path_known = True
+
+ try:
+ timestamp = key.last_written_timestamp()
+ except (ValueError, OverflowError):
+ return
+
+ return TimelineEntry(path_or_name = path_or_name, is_deleted = is_deleted, is_path_known = is_path_known, timestamp = timestamp)
+
+def print_timeline_header():
+ print('Registry file\tKey path/name\tIs deleted\tIs path known\tTimestamp (UTC)')
+
+def print_timeline_entry(entry, registry_file):
+ print('{}\t{}\t{}\t{}\t{}'.format(registry_file, entry.path_or_name, entry.is_deleted, entry.is_path_known, entry.timestamp))
+
+def print_timeline(registry_file):
+ global keys_list
+
+ print_timeline_header()
+ for entry in keys_list:
+ print_timeline_entry(entry, registry_file)
+
+args = parse_args()
+
+if not os.path.isfile(args.primary_file):
+ print('Primary file does not exist: {}'.format(args.primary_file), file = sys.stderr)
+ sys.exit(255)
+
+primary = open(args.primary_file, 'rb')
+hive = Registry.RegistryHive(primary)
+
+extend_keys_list(True) # Extend the list of keys (including deleted ones) for the first time, before applying a transaction log.
+
+log_files = RegistryHelpers.DiscoverLogFiles(args.primary_file)
+
+log = None
+if log_files.log_path is not None:
+ log = open(log_files.log_path, 'rb')
+
+log1 = None
+if log_files.log1_path is not None:
+ log1 = open(log_files.log1_path, 'rb')
+
+log2 = None
+if log_files.log2_path is not None:
+ log2 = open(log_files.log2_path, 'rb')
+
+if not args.faster:
+ hive.log_entry_callback = extend_keys_list # Extend the list of keys (without deleted ones) each time a log entry has been applied.
+
+try:
+ recovery_result = hive.recover_auto(log, log1, log2)
+except Registry.AutoRecoveryException:
+ print('An error has occurred when recovering a hive using a transaction log', file = sys.stderr)
+else:
+ if recovery_result.recovered and not recovery_result.is_new_log:
+ extend_keys_list() # Finally, extend the list of keys (without deleted ones) after an old transaction log file has been applied.
+ elif recovery_result.recovered and recovery_result.is_new_log and args.faster:
+ extend_keys_list()
+
+keys_list.sort(key = lambda x: x.timestamp, reverse = True)
+print_timeline(args.primary_file)
+
+hive = None
+primary.close()
+
+if log is not None:
+ log.close()
+
+if log1 is not None:
+ log1.close()
+
+if log2 is not None:
+ log2.close()
diff --git a/yarp/Registry.py b/yarp/Registry.py
new file mode 100644
index 0000000..858f156
--- /dev/null
+++ b/yarp/Registry.py
@@ -0,0 +1,858 @@
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from __future__ import unicode_literals
+
+from .RegistryFile import RegistryException
+from . import RegistryFile
+from . import RegistryRecords
+from struct import unpack
+from datetime import datetime, timedelta
+from collections import namedtuple
+
+ValueTypes = {
+RegistryRecords.REG_NONE: 'REG_NONE',
+RegistryRecords.REG_SZ: 'REG_SZ',
+RegistryRecords.REG_EXPAND_SZ: 'REG_EXPAND_SZ',
+RegistryRecords.REG_BINARY: 'REG_BINARY',
+RegistryRecords.REG_DWORD: 'REG_DWORD',
+RegistryRecords.REG_DWORD_BIG_ENDIAN: 'REG_DWORD_BIG_ENDIAN',
+RegistryRecords.REG_LINK: 'REG_LINK',
+RegistryRecords.REG_MULTI_SZ: 'REG_MULTI_SZ',
+RegistryRecords.REG_RESOURCE_LIST: 'REG_RESOURCE_LIST',
+RegistryRecords.REG_FULL_RESOURCE_DESCRIPTOR: 'REG_FULL_RESOURCE_DESCRIPTOR',
+RegistryRecords.REG_RESOURCE_REQUIREMENTS_LIST: 'REG_RESOURCE_REQUIREMENTS_LIST',
+RegistryRecords.REG_QWORD: 'REG_QWORD'
+}
+
+AutoRecoveryResult = namedtuple('AutoRecoveryResult', [ 'recovered', 'is_new_log', 'file_objects' ])
+
+class WalkException(RegistryException):
+ """This exception is raised when a walk error has occurred.
+ A walk error is a generic error when traversing registry records (entities).
+ """
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class AutoRecoveryException(RegistryException):
+ """This exception is raised when a primary file cannot be recovered in the 'auto' mode.
+ In particular, when no recovery scheme has been found.
+ """
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+def DecodeFiletime(Timestamp):
+ """Decode the FILETIME timestamp and return the datetime object."""
+
+ return datetime(1601, 1, 1) + timedelta(microseconds = Timestamp / 10)
+
+def DecodeUnicode(Buffer, RemoveGarbage = False):
+ """Decode the Unicode (UTF-16LE) string and return it.
+ When 'RemoveGarbage' is True, this function will attempt to sanitize a null-terminated Unicode string.
+ """
+
+ if RemoveGarbage and len(Buffer) > 2:
+ # Windows is using null-terminated Unicode strings, so we want to remove garbage, if any, after the end of the string.
+ pos = 0
+ while pos < len(Buffer):
+ two_bytes = Buffer[pos : pos + 2]
+ if two_bytes == b'\x00\x00':
+ return Buffer[ : pos + 2].decode('utf-16le') # Include the null character to the output string.
+
+ pos += 2
+
+ return Buffer.decode('utf-16le')
+
+def DecodeASCII(Buffer):
+ """Decode the ASCII (extended) string and return it."""
+
+ return Buffer.decode('latin-1') # This is equal to adding a null byte after each character, and then running .decode('utf-16le').
+
+def DecodeUnicodeMulti(Buffer, RemoveGarbage = False):
+ """Decode the Unicode (UTF-16LE) array of null-terminated strings and return it as is.
+ When 'RemoveGarbage' is True, this function will attempt to sanitize a null-terminated Unicode array.
+ """
+
+ if RemoveGarbage and len(Buffer) > 4:
+ # We want to remove garbage, if any, after the end of the array (marker: 0x00 0x00 0x00 0x00).
+ pos = 0
+ while pos < len(Buffer):
+ four_bytes = Buffer[pos : pos + 4]
+ if four_bytes == b'\x00\x00\x00\x00':
+ return DecodeUnicode(Buffer[ : pos + 4]) # Include the null characters to the output string.
+
+ pos += 2
+
+ return DecodeUnicode(Buffer)
+
+class RegistryHive(object):
+ """This is a high-level class for a registry hive."""
+
+ registry_file = None
+ """A primary file of a hive (a RegistryFile.PrimaryFile object)."""
+
+ log_entry_callback = None
+ """A callback function executed when a log entry has been applied."""
+
+ effective_slack = None
+ """A set of data strings from different slack space locations to be used in the deleted data recovery."""
+
+ def __init__(self, file_object, tolerate_minor_errors = True):
+ self.registry_file = RegistryFile.PrimaryFile(file_object, tolerate_minor_errors)
+ self.tolerate_minor_errors = tolerate_minor_errors
+ self.effective_slack = set()
+
+ def root_key(self):
+ """Get and return a root key node (a RegistryKey object)."""
+
+ return RegistryKey(self.registry_file, self.registry_file.get_root_cell(), 0, self.registry_file.baseblock.effective_root_cell_offset, self.tolerate_minor_errors)
+
+ def last_written_timestamp(self):
+ """Get, decode and return a last written timestamp (a datetime object)."""
+
+ return DecodeFiletime(self.registry_file.baseblock.effective_last_written_timestamp)
+
+ def last_reorganized_timestamp(self):
+ """Get, decode and return a last reorganized timestamp (a datetime object)."""
+
+ timestamp = self.registry_file.baseblock.effective_last_reorganized_timestamp
+ if timestamp is not None:
+ return DecodeFiletime(timestamp)
+
+ def find_key(self, path):
+ """Find a key node by its path (without a name of a root key), return a key node (a RegistryKey object) or None, if not found."""
+
+ if path == '\\' or len(path) == 0:
+ return self.root_key()
+
+ if path[0] == '\\':
+ path = path[1 : ]
+
+ current_key = self.root_key()
+ path_components = path.split('\\')
+
+ i = 0
+ while i < len(path_components) and current_key is not None:
+ current_key = current_key.subkey(path_components[i])
+ i += 1
+
+ return current_key
+
+ def recover_new(self, file_object_log_or_log1, file_object_log2 = None):
+ """Recover a primary file using a single transaction log file or two transaction log files.
+ When 'file_object_log2' is None, a single transaction log file is used.
+ Transaction log files should be in the new format.
+ """
+
+ if file_object_log2 is None:
+ self.registry_file.apply_new_log_file(file_object_log_or_log1, self.log_entry_callback)
+ else:
+ self.registry_file.apply_new_log_files(file_object_log_or_log1, file_object_log2, self.log_entry_callback)
+
+ def recover_old(self, file_object_log):
+ """Recover a primary file using a single transaction log file.
+ A transaction log file should be in the old format.
+ """
+
+ self.registry_file.apply_old_log_file(file_object_log)
+
+ def recover_auto(self, file_object_log, file_object_log1, file_object_log2):
+ """Recover a primary file using one, two or three candidate transaction log files (the 'auto' mode).
+ The format of transaction log files (new or old) and the logging scheme (single-logging or dual-logging) are guessed.
+ If a transaction log file with a corresponding extension (.LOG/.LOG1/.LOG2) is not present, use None as an argument for that file.
+ If a primary file is not dirty, no exception is raised. A named tuple (AutoRecoveryResult) is returned.
+ """
+
+ def try_log(file_object_log, log_class):
+ if file_object_log is None:
+ return
+
+ try:
+ log = log_class(file_object_log)
+ except (RegistryFile.ReadException, RegistryFile.BaseBlockException, RegistryFile.FileSizeException, RegistryFile.NotSupportedException, RegistryFile.DirtyVectorException):
+ return
+ else:
+ return log
+
+ if not self.registry_file.baseblock.is_file_dirty:
+ return AutoRecoveryResult(recovered = False, is_new_log = None, file_objects = None)
+
+ log, log1, log2 = file_object_log, file_object_log1, file_object_log2
+ use_log = log is not None
+
+ if (log1 is not None and log2 is None) or (log1 is None and log2 is not None):
+ raise AutoRecoveryException('No valid recovery scheme possible')
+
+ if use_log and log1 is None and log2 is None:
+ # This is the single-logging scheme.
+ log_new = try_log(log, RegistryFile.NewLogFile)
+ if log_new is not None:
+ self.recover_new(log)
+ return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log])
+
+ log_old = try_log(log, RegistryFile.OldLogFile)
+ if log_old is not None:
+ self.recover_old(log)
+ return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log])
+
+ if use_log:
+ log_new = try_log(log, RegistryFile.NewLogFile)
+ log1_new = try_log(log1, RegistryFile.NewLogFile)
+ log2_new = try_log(log2, RegistryFile.NewLogFile)
+
+ if use_log:
+ log_old = try_log(log, RegistryFile.OldLogFile)
+ log1_old = try_log(log1, RegistryFile.OldLogFile)
+ log2_old = try_log(log2, RegistryFile.OldLogFile)
+
+ # We prefer the new format and the dual-logging scheme.
+ if log1_new is not None and log2_new is not None:
+ self.recover_new(log1, log2)
+ return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log1, log2])
+
+ if log1_new is not None:
+ self.recover_new(log1)
+ return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log1])
+
+ if log2_new is not None:
+ self.recover_new(log2)
+ return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log2])
+
+ # Now, try the single-logging scheme for the new format.
+ if use_log and log_new is not None:
+ self.recover_new(log)
+ return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log])
+
+ # Now, switch to the old format (we still prefer the dual-logging scheme).
+ if log1_old is not None and log2_old is not None:
+ log1_timestamp = log1_old.baseblock.effective_last_written_timestamp
+ log2_timestamp = log2_old.baseblock.effective_last_written_timestamp
+ if log1_timestamp >= log2_timestamp: # Select the latest log.
+ self.recover_old(log1)
+ return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log1])
+ else:
+ self.recover_old(log2)
+ return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log2])
+
+ if log1_old is not None:
+ self.recover_old(log1)
+ return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log1])
+
+ if log2_old is not None:
+ self.recover_old(log2)
+ return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log2])
+
+ # Now, try the single-logging scheme.
+ if use_log and log_old is not None:
+ self.recover_old(log)
+ return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log])
+
+ # We failed.
+ raise AutoRecoveryException('No obvious recovery scheme found')
+
+ def save_recovered_hive(self, filepath):
+ """Save the recovered hive to a new primary file (using its path)."""
+
+ self.registry_file.save_recovered_hive(filepath)
+
+ def rollback_changes(self):
+ """Discard recovered data and use a primary file as is."""
+
+ self.registry_file.discard_writable_file_object()
+
+ def walk_everywhere(self):
+ """Visit and record each referenced cell, collect the slack space data. This will also ensure that a hive is consistent."""
+
+ def process_key(key):
+ security = key.security()
+ if security is not None:
+ security_descriptor = security.descriptor()
+
+ classname = key.classname()
+
+ for value in key.values():
+ value_data_raw = value.data_raw()
+
+ for subkey in key.subkeys():
+ process_key(subkey)
+
+ for slack in key.effective_slack:
+ if len(slack) >= 4: # Skip the slack space data if it is less than 4 bytes.
+ self.effective_slack.add(slack)
+
+ self.registry_file.record_referenced_cells = True
+ try:
+ process_key(self.root_key())
+ except RegistryException:
+ self.registry_file.record_referenced_cells = False
+ raise
+
+ self.registry_file.record_referenced_cells = False
+
+ self.registry_file.build_map_free()
+
+class RegistryKey(object):
+ """This is a high-level class for a registry key."""
+
+ registry_file = None
+ """A primary file of a hive (a RegistryFile.PrimaryFile object)."""
+
+ key_node = None
+ """A KeyNode object."""
+
+ effective_slack = None
+ """A set of data strings from different slack space locations to be used in the deleted data recovery."""
+
+ def __init__(self, primary_file, buf, layer, relative_cell_offset, tolerate_minor_errors = False, naive = False):
+ """When working with deleted registry keys, set 'naive' to True, 'relative_cell_offset' and 'layer' to None.
+ For a root key, set 'layer' to 0 (increment 'layer' by one when going to subkeys of a current key and decrement it by one when going to a parent key).
+ """
+
+ self.registry_file = primary_file
+ self.naive = naive
+ if not self.naive:
+ self.get_cell = self.registry_file.get_cell
+ else:
+ self.get_cell = self.registry_file.get_cell_naive
+
+ self.key_node = RegistryRecords.KeyNode(buf)
+ self.relative_cell_offset = relative_cell_offset
+ self.layer = layer
+ self.tolerate_minor_errors = tolerate_minor_errors
+ self.effective_slack = set()
+
+ def last_written_timestamp(self):
+ """Get, decode and return a last written timestamp (a datetime object)."""
+
+ return DecodeFiletime(self.key_node.get_last_written_timestamp())
+
+ def access_bits(self):
+ """Get and return access bits."""
+
+ if self.registry_file.baseblock.effective_version == 1:
+ return
+
+ return self.key_node.get_access_bits()
+
+ def name(self):
+ """Get, decode and return a key name string."""
+
+ name_buf = self.key_node.get_key_name()
+ is_ascii = self.registry_file.baseblock.effective_version > 1 and self.key_node.get_flags() & RegistryRecords.KEY_COMP_NAME > 0
+ if is_ascii:
+ name = DecodeASCII(name_buf)
+ else:
+ name = DecodeUnicode(name_buf)
+
+ if name.find('\\') != -1:
+ if not self.naive:
+ raise WalkException('Key node does not have a valid name, key path: {}'.format(self.path()))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Key node does not have a valid name')
+
+ return name
+
+ def classname(self):
+ """Get, decode and return a class name string."""
+
+ classname_length = self.key_node.get_classname_length()
+ if classname_length > 0:
+ classname_buf = self.get_cell(self.key_node.get_classname_offset())
+ return DecodeUnicode(classname_buf[ : classname_length])
+
+ def parent(self):
+ """Get and return a parent key node (a RegistryKey object)."""
+
+ if self.layer == 0:
+ # This is the root key.
+ return
+
+ if self.layer is None and (self.key_node.get_flags() & RegistryRecords.KEY_HIVE_ENTRY > 0 or self.relative_cell_offset == self.registry_file.baseblock.effective_root_cell_offset):
+ # This is the root key.
+ return
+
+ parent_offset = self.key_node.get_parent()
+ parent_buf = self.get_cell(parent_offset)
+
+ layer_up = None
+ if self.layer is not None:
+ layer_up = self.layer - 1
+
+ parent_key_node = RegistryKey(self.registry_file, parent_buf, layer_up, parent_offset, self.tolerate_minor_errors, self.naive)
+
+ return parent_key_node
+
+ def path(self, show_root = False):
+ """Construct and return a path to a key node.
+ When 'show_root' is True, a name of a root key node is included.
+ """
+
+ path_components = [ self.name() ]
+
+ if self.naive:
+ track = set()
+ track.add(self.key_node.get_parent())
+
+ p = self.parent()
+ while p is not None:
+ if self.naive:
+ p_parent = p.key_node.get_parent()
+ if p_parent in track:
+ raise WalkException('Invalid path when following parent keys')
+
+ track.add(p_parent)
+
+ path_components.append(p.name())
+ p = p.parent()
+
+ path_components.reverse()
+ if not show_root:
+ path_components = path_components[ 1 : ]
+
+ return '\\'.join(path_components)
+
+ def path_partial(self, show_root = False):
+ """Construct and return a path (possibly a partial one) to a key node.
+ When 'show_root' is True, a name of a root key node is included.
+ """
+
+ path_components = [ self.name() ]
+
+ if self.naive:
+ track = set()
+ track.add(self.key_node.get_parent())
+
+ try:
+ p = self.parent()
+ while p is not None:
+ if self.naive:
+ p_parent = p.key_node.get_parent()
+ if p_parent in track:
+ raise WalkException('Invalid path when following parent keys')
+
+ track.add(p_parent)
+
+ path_components.append(p.name())
+ p = p.parent()
+ except RegistryException:
+ root_found = False
+ else:
+ root_found = True
+
+ path_components.reverse()
+ if root_found and not show_root:
+ path_components = path_components[ 1 : ]
+
+ return '\\'.join(path_components)
+
+ def subkeys(self):
+ """This method yields subkeys (RegistryKey objects)."""
+
+ subkeys_names = set()
+
+ def process_leaf(leaf_buf):
+ leaf_signature = leaf_buf[ : 2]
+
+ if leaf_signature == b'li':
+ leaf = RegistryRecords.IndexLeaf(leaf_buf)
+ elif leaf_signature == b'lf':
+ leaf = RegistryRecords.FastLeaf(leaf_buf)
+ else: # b'lh'
+ leaf = RegistryRecords.HashLeaf(leaf_buf)
+
+ slack = leaf.get_slack()
+ self.effective_slack.add(slack)
+
+ layer_down = None
+ if self.layer is not None:
+ layer_down = self.layer + 1
+
+ if type(leaf) is RegistryRecords.IndexLeaf:
+ for leaf_element in leaf.elements():
+ subkey_offset = leaf_element.relative_offset
+
+ buf = self.get_cell(subkey_offset)
+ subkey = RegistryKey(self.registry_file, buf, layer_down, subkey_offset, self.tolerate_minor_errors, self.naive)
+ if self.relative_cell_offset is not None and subkey.key_node.get_parent() != self.relative_cell_offset:
+ if not self.naive:
+ raise WalkException('Key node does not point to a valid parent key node, key path: {}, name: {}'.format(self.path(), subkey.name()))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Key node does not point to a valid parent key node')
+
+ yield subkey
+
+ if type(leaf) is RegistryRecords.FastLeaf:
+ for leaf_element in leaf.elements():
+ subkey_offset = leaf_element.relative_offset
+
+ buf = self.get_cell(subkey_offset)
+ subkey = RegistryKey(self.registry_file, buf, layer_down, subkey_offset, self.tolerate_minor_errors, self.naive)
+ if self.relative_cell_offset is not None and subkey.key_node.get_parent() != self.relative_cell_offset:
+ if not self.naive:
+ raise WalkException('Key node does not point to a valid parent key node, key path: {}, name: {}'.format(self.path(), subkey.name()))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Key node does not point to a valid parent key node')
+
+ yield subkey
+
+ if type(leaf) is RegistryRecords.HashLeaf:
+ for leaf_element in leaf.elements():
+ subkey_offset = leaf_element.relative_offset
+
+ buf = self.get_cell(subkey_offset)
+ subkey = RegistryKey(self.registry_file, buf, layer_down, subkey_offset, self.tolerate_minor_errors, self.naive)
+ if self.relative_cell_offset is not None and subkey.key_node.get_parent() != self.relative_cell_offset:
+ if not self.naive:
+ raise WalkException('Key node does not point to a valid parent key node, key path: {}, name: {}'.format(self.path(), subkey.name()))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Key node does not point to a valid parent key node')
+
+ yield subkey
+
+
+ if self.key_node.get_subkeys_count() > 0:
+ list_offset = self.key_node.get_subkeys_list_offset()
+ list_buf = self.get_cell(list_offset)
+ list_signature = list_buf[ : 2]
+
+ prev_name = None
+
+ if list_signature == b'ri':
+ index_root = RegistryRecords.IndexRoot(list_buf)
+
+ slack = index_root.get_slack()
+ self.effective_slack.add(slack)
+
+ for leaf_offset in index_root.elements():
+ list_buf = self.get_cell(leaf_offset)
+ for subkey in process_leaf(list_buf):
+ curr_name = subkey.name().upper()
+ if curr_name not in subkeys_names:
+ subkeys_names.add(curr_name)
+ else:
+ if not self.naive:
+ raise WalkException('Duplicate subkey, key path: {}, name: {}'.format(self.path(), curr_name))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Duplicate subkey')
+
+ if prev_name is not None and curr_name <= prev_name:
+ if not self.naive:
+ raise WalkException('Wrong order of subkeys, key path: {}, offending name: {}'.format(self.path(), curr_name))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Wrong order of subkeys')
+
+ prev_name = curr_name
+
+ yield subkey
+ else:
+ for subkey in process_leaf(list_buf):
+ curr_name = subkey.name().upper()
+ if curr_name not in subkeys_names:
+ subkeys_names.add(curr_name)
+ else:
+ if not self.naive:
+ raise WalkException('Duplicate subkey, key path: {}, name: {}'.format(self.path(), curr_name))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Duplicate subkey')
+
+ if prev_name is not None and curr_name <= prev_name:
+ if not self.naive:
+ raise WalkException('Wrong order of subkeys, key path: {}, offending name: {}'.format(self.path(), curr_name))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Wrong order of subkeys')
+
+ prev_name = curr_name
+
+ yield subkey
+
+ def subkey(self, name):
+ """This method returns a subkey by its name (a RegistryKey object) or None, if not found."""
+
+ name = name.lower()
+ for current_subkey in self.subkeys():
+ curr_name = current_subkey.name().lower()
+ if name == curr_name:
+ return current_subkey
+
+ def subkeys_count(self):
+ """Get and return a number of subkeys. Volatile subkeys are not counted."""
+
+ return self.key_node.get_subkeys_count()
+
+ def values(self):
+ """This method yields key values (RegistryValue objects)."""
+
+ values_names = set()
+
+ values_count = self.key_node.get_key_values_count()
+ if values_count > 0 and self.key_node.get_flags() & RegistryRecords.KEY_PREDEF_HANDLE == 0:
+ list_offset = self.key_node.get_key_values_list_offset()
+ list_buf = self.get_cell(list_offset)
+
+ values_list = RegistryRecords.KeyValuesList(list_buf, values_count)
+
+ slack = values_list.get_slack()
+ self.effective_slack.add(slack)
+
+ for value_offset in values_list.elements():
+ buf = self.get_cell(value_offset)
+ curr_value = RegistryValue(self.registry_file, buf, self.naive)
+ curr_name = curr_value.name().lower()
+ if curr_name not in values_names:
+ values_names.add(curr_name)
+ else:
+ if not self.naive:
+ raise WalkException('Duplicate value name, key path: {}, value name: {}'.format(self.path(), curr_name))
+ else:
+ # Do not build the path, if we are trying to recover a key node.
+ raise WalkException('Duplicate value name')
+
+ yield curr_value
+
+ def value(self, name = ''):
+ """This method returns a key value by its name (a RegistryValue object) or None, if not found.
+ When 'name' is empty, a default value is returned (if any).
+ """
+
+ name = name.lower()
+
+ for curr_value in self.values():
+ curr_name = curr_value.name().lower()
+ if name == curr_name:
+ return curr_value
+
+ def values_count(self):
+ """Get and return a number of key values."""
+
+ if self.key_node.get_flags() & RegistryRecords.KEY_PREDEF_HANDLE > 0:
+ return 0
+
+ return self.key_node.get_key_values_count()
+
+ def security(self):
+ """Get and return a key security item (a RegistrySecurity object)."""
+
+ key_security_offset = self.key_node.get_key_security_offset()
+ if key_security_offset != RegistryFile.CELL_OFFSET_NIL:
+ buf = self.get_cell(key_security_offset)
+ return RegistrySecurity(self.registry_file, buf)
+
+ def __str__(self):
+ return 'RegistryKey, name: {}, subkeys: {}, values: {}'.format(self.name(), self.subkeys_count(), self.values_count())
+
+class RegistrySecurity(object):
+ """This is a high-level class for a key security item."""
+
+ registry_file = None
+ """A primary file of a hive (a RegistryFile.PrimaryFile object)."""
+
+ key_security = None
+ """A KeySecurity object."""
+
+ def __init__(self, primary_file, buf):
+ self.registry_file = primary_file
+
+ self.key_security = RegistryRecords.KeySecurity(buf)
+
+ def descriptor(self):
+ """Get and return a security descriptor (as raw bytes)."""
+
+ return self.key_security.get_security_descriptor()
+
+class RegistryValue(object):
+ """This is a high-level class for a registry value."""
+
+ registry_file = None
+ """A primary file of a hive (a RegistryFile.PrimaryFile object)."""
+
+ key_value = None
+ """A KeyValue object."""
+
+ def __init__(self, primary_file, buf, naive = False):
+ """When working with deleted registry values, set 'naive' to True."""
+
+ self.registry_file = primary_file
+ if not naive:
+ self.get_cell = self.registry_file.get_cell
+ else:
+ self.get_cell = self.registry_file.get_cell_naive
+
+ self.key_value = RegistryRecords.KeyValue(buf)
+
+ def name(self):
+ """Get, decode and return a value name string."""
+
+ name_buf = self.key_value.get_value_name()
+ is_ascii = self.registry_file.baseblock.effective_version > 1 and self.key_value.get_flags() & RegistryRecords.VALUE_COMP_NAME > 0
+ if is_ascii:
+ return DecodeASCII(name_buf)
+
+ return DecodeUnicode(name_buf)
+
+ def type_raw(self):
+ """Get and return a value type (as an integer)."""
+
+ return self.key_value.get_data_type()
+
+ def type_str(self):
+ """Get, decode and return a value type (as a string)."""
+
+ value_type = self.key_value.get_data_type()
+ if value_type in ValueTypes.keys():
+ return ValueTypes[value_type]
+ else:
+ return hex(value_type)
+
+ def data_size(self):
+ """Get and return a data size."""
+
+ return self.key_value.get_data_size_real()
+
+ def data_raw(self):
+ """Get and return data (as raw bytes)."""
+
+ if self.key_value.get_data_size_real() == 0:
+ return b''
+
+ if self.key_value.is_data_inline():
+ return self.key_value.get_inline_data()[ : self.key_value.get_data_size_real()]
+
+ is_big_data = self.registry_file.baseblock.effective_version > 3 and self.key_value.get_data_size_real() > 16344
+ if not is_big_data:
+ return self.get_cell(self.key_value.get_data_offset())[ : self.key_value.get_data_size_real()]
+
+ big_data_buf = self.get_cell(self.key_value.get_data_offset())
+ big_data = RegistryRecords.BigData(big_data_buf)
+
+ segments_list_offset = big_data.get_segments_list_offset()
+ segments_count = big_data.get_segments_count()
+
+ segments_list = RegistryRecords.SegmentsList(self.get_cell(segments_list_offset), segments_count)
+
+ data = b''
+ data_length = self.key_value.get_data_size_real()
+ for segment_offset in segments_list.elements():
+ buf = self.get_cell(segment_offset)
+
+ if data_length > 16344:
+ data_part = buf[ : 16344]
+ if len(data_part) != 16344:
+ raise WalkException('Invalid segment size: {} != 16344'.format(len(data_part)))
+
+ data += data_part
+ data_length -= 16344
+ else:
+ data += buf[ : data_length]
+ break
+
+ return data
+
+ def data(self):
+ """Get, decode and return data (as an integer, a string, a list of strings, or raw bytes).
+ A string may contain a terminating null character.
+ """
+
+ data_raw = self.data_raw()
+ data_length = len(data_raw)
+ type_int = self.type_raw()
+
+ if type_int == RegistryRecords.REG_DWORD and data_length == 4:
+ return unpack('L', data_raw)[0]
+
+ if type_int == RegistryRecords.REG_QWORD and data_length == 8:
+ return unpack(' 1:
+ return DecodeUnicode(data_raw, True)
+
+ if type_int == RegistryRecords.REG_LINK and data_length % 2 == 0 and data_length > 1:
+ return DecodeUnicode(data_raw)
+
+ if type_int == RegistryRecords.REG_MULTI_SZ and data_length % 2 == 0 and data_length > 1:
+ sz_list_data = DecodeUnicodeMulti(data_raw, True)
+ if sz_list_data == '\x00':
+ return []
+
+ if len(sz_list_data) > 2 and sz_list_data[-1] == '\x00' and sz_list_data[-2] == '\x00':
+ sz_list = sz_list_data[ : -1].split('\x00')
+
+ i = 0
+ while i < len(sz_list):
+ sz_list[i] += '\x00' # Restore the terminating null characters.
+ i += 1
+
+ return sz_list
+
+ return data_raw
+
+ def __str__(self):
+ name = self.name()
+ if len(name) > 0:
+ return 'RegistryValue, name: {}, data type: {}, data size: {}'.format(name, self.type_str(), self.data_size())
+ else:
+ return 'RegistryValue, default value (no name), data type: {}, data size: {}'.format(self.type_str(), self.data_size())
+
+class RegistryHiveTruncated(object):
+ """This is a high-level class for a truncated registry hive."""
+
+ registry_file = None
+ """A primary file of a hive (a RegistryFile.PrimaryFileTruncated object)."""
+
+ def __init__(self, file_object):
+ self.registry_file = RegistryFile.PrimaryFileTruncated(file_object)
+ self.effective_slack = set()
+
+ def last_written_timestamp(self):
+ """Get, decode and return a last written timestamp (a datetime object)."""
+
+ return DecodeFiletime(self.registry_file.baseblock.effective_last_written_timestamp)
+
+ def last_reorganized_timestamp(self):
+ """Get, decode and return a last reorganized timestamp (a datetime object)."""
+
+ timestamp = self.registry_file.baseblock.effective_last_reorganized_timestamp
+ if timestamp is not None:
+ return DecodeFiletime(timestamp)
+
+ def scan(self):
+ """This method yields RegistryKey objects for keys and RegistryValue objects for values."""
+
+ for cell in self.registry_file.cells():
+ cell_absolute_size = cell.get_absolute_size()
+ if cell_absolute_size > 76: # A key node with at least one character in the name.
+ cell_data = cell.get_cell_data()
+ try:
+ key = RegistryKey(self.registry_file, cell_data, None, None, True, False)
+ key_name = key.name()
+ except (RegistryException, UnicodeDecodeError):
+ pass
+ else:
+ yield key
+ elif cell_absolute_size >= 20: # A key value with no name (at least).
+ cell_data = cell.get_cell_data()
+ try:
+ value = RegistryValue(self.registry_file, cell_data, False)
+ value_name = value.name()
+ except (RegistryException, UnicodeDecodeError):
+ pass
+ else:
+ yield value
diff --git a/yarp/RegistryCarve.py b/yarp/RegistryCarve.py
new file mode 100644
index 0000000..59a2c52
--- /dev/null
+++ b/yarp/RegistryCarve.py
@@ -0,0 +1,188 @@
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from __future__ import unicode_literals
+
+from . import RegistryFile
+from .Registry import DecodeUnicode
+from struct import unpack
+from collections import namedtuple
+
+CarveResult = namedtuple('CarveResult', [ 'offset', 'size', 'truncated', 'truncation_point', 'truncation_scenario', 'filename' ])
+BaseBlockCheckResult = namedtuple('BaseBlockCheckResult', [ 'is_valid', 'hbins_data_size', 'filename', 'old_cells' ])
+HiveBinCheckResult = namedtuple('HiveBinCheckResult', [ 'is_valid', 'size' ])
+CellsCheckResult = namedtuple('CellsCheckResult', [ 'are_valid', 'truncation_point_relative' ])
+
+SECTOR_SIZE = 512 # This is an assumed sector size.
+FILE_MARGIN_SIZE = 4*1024*1024 # We will read more bytes than specified in the base block to account possible damage scenarios.
+FILE_SIZE_MAX_MIB = 500 # We do not expect primary files to be larger than this (in MiB).
+
+def CheckBaseBlockOfPrimaryFile(Buffer):
+ """Check if Buffer contains a valid base block of a primary file and a hive bin, return a named tuple (BaseBlockCheckResult)."""
+
+ if len(Buffer) < RegistryFile.BASE_BLOCK_LENGTH_PRIMARY + RegistryFile.HIVE_BIN_SIZE_ALIGNMENT:
+ return BaseBlockCheckResult(is_valid = False, hbins_data_size = None, filename = None, old_cells = None)
+
+ signature, __, __, __, major_version, minor_version, file_type, file_format, __, hbins_data_size, clustering_factor = unpack('<4sLLQLLLLLLL', Buffer[ : 48])
+
+ if (signature == b'regf' and major_version in RegistryFile.MAJOR_VERSION_NUMBERS_SUPPORTED and minor_version in RegistryFile.MINOR_VERSION_NUMBERS_SUPPORTED and
+ file_type == RegistryFile.FILE_TYPE_PRIMARY and file_format == RegistryFile.FILE_FORMAT_DIRECT_MEMORY_LOAD and clustering_factor == RegistryFile.FILE_CLUSTERING_FACTOR and
+ hbins_data_size >= RegistryFile.HIVE_BIN_SIZE_ALIGNMENT and hbins_data_size % RegistryFile.HIVE_BIN_SIZE_ALIGNMENT == 0 and
+ RegistryFile.BASE_BLOCK_LENGTH_PRIMARY + hbins_data_size <= FILE_SIZE_MAX_MIB * 1024 * 1024):
+
+ log_signature = Buffer[RegistryFile.BASE_BLOCK_LENGTH_LOG : RegistryFile.BASE_BLOCK_LENGTH_LOG + 4]
+ hbin_signature = Buffer[RegistryFile.BASE_BLOCK_LENGTH_PRIMARY : RegistryFile.BASE_BLOCK_LENGTH_PRIMARY + 4]
+ if log_signature != b'DIRT' and log_signature != b'HvLE' and hbin_signature == b'hbin':
+ try:
+ filename = DecodeUnicode(Buffer[48 : 48 + 64], True).rstrip('\x00')
+ except UnicodeDecodeError:
+ pass
+ else:
+ old_cells = minor_version in RegistryFile.MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT
+ return BaseBlockCheckResult(is_valid = True, hbins_data_size = hbins_data_size, filename = filename, old_cells = old_cells)
+
+ return BaseBlockCheckResult(is_valid = False, hbins_data_size = None, filename = None, old_cells = None)
+
+def CheckHiveBin(Buffer, ExpectedOffsetRelative):
+ """Check if Buffer contains a valid hive bin (without checking its cells), return a named tuple (HiveBinCheckResult)."""
+
+ if len(Buffer) < RegistryFile.HIVE_BIN_SIZE_ALIGNMENT:
+ return HiveBinCheckResult(is_valid = False, size = None)
+
+ signature, offset, size = unpack('<4sLL', Buffer[ : 12])
+ if signature == b'hbin' and offset == ExpectedOffsetRelative and size >= RegistryFile.HIVE_BIN_SIZE_ALIGNMENT and size % RegistryFile.HIVE_BIN_SIZE_ALIGNMENT == 0:
+ return HiveBinCheckResult(is_valid = True, size = size)
+
+ return HiveBinCheckResult(is_valid = False, size = None)
+
+def CheckCellsOfHiveBin(Buffer, OldCells = False):
+ """Check if Buffer contains a hive bin with valid cells (new format), return a named tuple (CellsCheckResult). A hive bin's header is not checked."""
+
+ curr_pos_relative = 32
+ while curr_pos_relative < len(Buffer):
+ four_bytes = Buffer[curr_pos_relative : curr_pos_relative + 4]
+ if len(four_bytes) < 4:
+ return CellsCheckResult(are_valid = False, truncation_point_relative = curr_pos_relative)
+
+ cell_size, = unpack(' regf_size:
+ regf_size = curr_pos_relative # Adjust the file size to include an unforeseeably large hive bin.
+ break
+
+ hbin_buf_partial = regf_buf[curr_pos_relative : curr_pos_relative + RegistryFile.HIVE_BIN_SIZE_ALIGNMENT]
+ check_result_hbin = CheckHiveBin(hbin_buf_partial, expected_hbin_offset_relative)
+ if not check_result_hbin.is_valid:
+ truncation_point = regf_offset + curr_pos_relative
+ regf_size = curr_pos_relative # Adjust the file size according to the truncation point.
+ break
+
+ last_hbin_buf = regf_buf[curr_pos_relative : curr_pos_relative + check_result_hbin.size]
+
+ curr_pos_relative += check_result_hbin.size
+ expected_hbin_offset_relative += check_result_hbin.size
+
+ if last_hbin_buf is None:
+ # No valid hive bins found.
+ pos += SECTOR_SIZE
+ continue
+
+ if truncation_point is None:
+ # Probably no truncation.
+ check_result_cells = CheckCellsOfHiveBin(last_hbin_buf, check_result.old_cells)
+ if check_result_cells.are_valid:
+ # No truncation.
+ yield CarveResult(offset = regf_offset, size = regf_size, truncated = False, truncation_point = None, truncation_scenario = 0,
+ filename = check_result.filename)
+ else:
+ # Truncation within the last hive bin.
+ truncation_point = regf_offset + regf_size - len(last_hbin_buf) + check_result_cells.truncation_point_relative
+ truncation_point = truncation_point // SECTOR_SIZE * SECTOR_SIZE # Adjust the truncation point according to the sector size.
+ regf_size = truncation_point - regf_offset # Adjust the file size according to the truncation point.
+
+ yield CarveResult(offset = regf_offset, size = regf_size, truncated = True, truncation_point = truncation_point, truncation_scenario = 2,
+ filename = check_result.filename)
+ else:
+ # Obvious truncation.
+ check_result_cells = CheckCellsOfHiveBin(last_hbin_buf, check_result.old_cells)
+ if check_result_cells.are_valid:
+ # Truncation at a boundary of a hive bin.
+ yield CarveResult(offset = regf_offset, size = regf_size, truncated = True, truncation_point = truncation_point, truncation_scenario = 1,
+ filename = check_result.filename)
+ else:
+ # Truncation within a hive bin.
+ truncation_point = regf_offset + regf_size - len(last_hbin_buf) + check_result_cells.truncation_point_relative
+ truncation_point = truncation_point // SECTOR_SIZE * SECTOR_SIZE # Adjust the truncation point according to the sector size.
+ regf_size = truncation_point - regf_offset # Adjust the file size according to the truncation point.
+
+ yield CarveResult(offset = regf_offset, size = regf_size, truncated = True, truncation_point = truncation_point, truncation_scenario = 3,
+ filename = check_result.filename)
+
+ if regf_size % SECTOR_SIZE == 0:
+ pos += regf_size
+ else:
+ pos += regf_size + SECTOR_SIZE - regf_size % SECTOR_SIZE
+
+ continue
+
+ pos += SECTOR_SIZE
diff --git a/yarp/RegistryFile.py b/yarp/RegistryFile.py
new file mode 100644
index 0000000..27a4e3d
--- /dev/null
+++ b/yarp/RegistryFile.py
@@ -0,0 +1,1290 @@
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from __future__ import unicode_literals
+
+from struct import unpack, pack
+from ctypes import c_uint32
+from io import BytesIO
+from shutil import copyfileobj
+from collections import namedtuple
+
+MAJOR_VERSION_NUMBERS_SUPPORTED = set([1])
+MINOR_VERSION_NUMBERS_SUPPORTED = set([1, 2, 3, 4, 5, 6])
+
+MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT = set([1])
+MINOR_VERSION_NUMBERS_FOR_NEW_CELL_FORMAT = set(MINOR_VERSION_NUMBERS_SUPPORTED - MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT)
+
+FILE_TYPE_PRIMARY = 0 # Primary (normal) file.
+FILE_TYPE_LOG_OLD = 1 # Transaction log file (old format).
+FILE_TYPE_LOG_VERYOLD = 2 # Transaction log file (the same old format, but with a different type number).
+FILE_TYPE_LOG_NEW = 6 # Transaction log file (new format).
+FILE_TYPES_SUPPORTED = set([FILE_TYPE_PRIMARY, FILE_TYPE_LOG_OLD, FILE_TYPE_LOG_VERYOLD, FILE_TYPE_LOG_NEW])
+
+FILE_FORMAT_DIRECT_MEMORY_LOAD = 1
+
+BASE_BLOCK_LENGTH_PRIMARY = 4096
+FILE_CLUSTERING_FACTOR = 1 # This is the only value expected (even when the sector size is not 512 bytes).
+BASE_BLOCK_LENGTH_LOG = 512 * FILE_CLUSTERING_FACTOR
+
+MARVIN32_SEED = 0x82EF4D887A4E55C5 # This is the seed for log entries.
+
+HIVE_BIN_SIZE_ALIGNMENT = 4096
+
+CELL_OFFSET_NIL = 0xFFFFFFFF
+CELL_SIZE_MAX_NAIVE = 10 * 1024 * 1024
+
+DirtyPageMeta = namedtuple('DirtyPageMeta', [ 'relative_offset_primary', 'relative_offset_log' ])
+DirtyPageReference = namedtuple('DirtyPageReference', [ 'relative_offset_primary', 'size' ])
+
+def Marvin32(Buffer, Seed = MARVIN32_SEED):
+ """Calculate and return the Marvin32 hash (64 bits) of Buffer."""
+
+ def ROTL(X, N, W):
+ return (X.value << N) | (X.value >> (W - N))
+
+ def Mix(State, Val):
+ lo, hi = State
+ lo.value += Val.value
+ hi.value ^= lo.value
+ lo.value = ROTL(lo, 20, 32) + hi.value
+ hi.value = ROTL(hi, 9, 32) ^ lo.value
+ lo.value = ROTL(lo, 27, 32) + hi.value
+ hi.value = ROTL(hi, 19, 32)
+ return (lo, hi)
+
+ lo = c_uint32(Seed)
+ hi = c_uint32(Seed >> 32)
+ state = (lo, hi)
+
+ length = len(Buffer)
+ pos = 0
+ val = c_uint32()
+
+ while length >= 4:
+ val.value = unpack(' 0:
+ if BaseBlockFlags & 1 == 0:
+ BaseBlockFlags += 1
+ else:
+ if BaseBlockFlags & 1 > 0:
+ BaseBlockFlags -= 1
+
+ return BaseBlockFlags
+
+class RegistryException(Exception):
+ """This is a top-level exception for this module."""
+
+ pass
+
+class ReadException(RegistryException):
+ """This exception is raised when a read error has occurred.
+ This exception does not supersede standard I/O exceptions.
+ """
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class NotSupportedException(RegistryException):
+ """This exception is raised when something is not supported."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class BaseBlockException(RegistryException):
+ """This exception is raised when something is invalid in a base block."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class FileSizeException(RegistryException):
+ """This exception is raised when a file has an obviously invalid size."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class HiveBinException(RegistryException):
+ """This exception is raised when something is invalid in a hive bin."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class HiveCellException(RegistryException):
+ """This exception is raised when something is wrong with a hive cell."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class DirtyVectorException(RegistryException):
+ """This exception is raised when something is invalid in a dirty vector."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class DirtyPageException(RegistryException):
+ """This exception is raised when a dirty page is invalid (truncated)."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class LogEntryException(RegistryException):
+ """This exception is raised when a log entry is invalid."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class RecoveryException(RegistryException):
+ """This exception is raised when a recovery error has occurred."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class NotEligibleException(RegistryException):
+ """This exception is raised when a transaction log file cannot be applied to a primary file."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class CellOffsetException(RegistryException):
+ """This exception is raised when an invalid cell has been requested."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class RegistryFile(object):
+ """This is a generic class for registry files, it provides low-level methods for reading, parsing, and writing data.
+ All methods are self-explanatory.
+ """
+
+ def __init__(self, file_object, file_offset = 0):
+ self.file_object = file_object
+ self.file_offset = file_offset
+
+ def get_file_size(self):
+ self.file_object.seek(0, 2)
+ return self.file_object.tell()
+
+ def read_binary(self, pos, length):
+ self.file_object.seek(self.file_offset + pos)
+ b = self.file_object.read(length)
+ if len(b) == length:
+ return b
+
+ raise ReadException('Cannot read data (expected: {} bytes, read: {} bytes)'.format(length, len(b)))
+
+ def write_binary(self, pos, data):
+ self.file_object.seek(self.file_offset + pos)
+ self.file_object.write(data)
+
+ def read_uint32(self, pos):
+ b = self.read_binary(pos, 4)
+ return unpack('> (bit_pos % 8)) & 1) != 0
+ if is_bit_set:
+ dirty_page_meta = DirtyPageMeta(relative_offset_primary = bit_pos * 512, relative_offset_log = i * 512)
+ yield dirty_page_meta
+ i += 1
+
+ bit_pos += 1
+
+class DirtyPage(RegistryFile):
+ """This is a class for a dirty page, describing its location and bytes (data)."""
+
+ primary_file_offset = None
+ log_file_offset = None
+ page_size = None
+
+ def __init__(self, file_object, log_file_offset, page_size, primary_file_offset):
+ super(DirtyPage, self).__init__(file_object, log_file_offset)
+
+ self.page_size = page_size
+ self.primary_file_offset = primary_file_offset
+ self.log_file_offset = log_file_offset
+
+ def get_bytes(self):
+ bytes_ = self.read_binary(0, self.page_size)
+ if len(bytes_) != self.page_size:
+ raise DirtyPageException('Truncated dirty page')
+
+ return bytes_
+
+class OldLogFile(object):
+ """This is a class for a transaction log file (old format)."""
+
+ baseblock = None
+ """A base block in a log file (a BaseBlock object)."""
+
+ dirtyvector = None
+ """A dirty vector in a log file (a DirtyVector object)."""
+
+ def __init__(self, file_object):
+ self.file_object = file_object
+
+ self.baseblock = BaseBlock(self.file_object)
+
+ if self.baseblock.get_file_type() != FILE_TYPE_LOG_OLD and self.baseblock.get_file_type() != FILE_TYPE_LOG_VERYOLD:
+ raise BaseBlockException('Invalid file type')
+
+ if self.baseblock.is_file_dirty:
+ raise BaseBlockException('Dirty state')
+
+ file_size = self.baseblock.get_file_size()
+ if file_size < self.get_dirty_pages_starting_offset() + 512: # Check if at least one dirty page (512 bytes) can be present in the file.
+ raise FileSizeException('Invalid file size: {}'.format(file_size))
+
+ self.dirtyvector = DirtyVector(self.file_object, BASE_BLOCK_LENGTH_LOG, self.baseblock.effective_hbins_data_size)
+
+ def get_dirty_pages_starting_offset(self):
+ offset_unaligned = BASE_BLOCK_LENGTH_LOG + len(b'DIRT') + self.baseblock.effective_hbins_data_size // 4096
+ sector_size = 512 # We do not expect other values (even when the sector size is not 512 bytes).
+
+ if offset_unaligned % sector_size == 0:
+ offset_aligned = offset_unaligned
+ else:
+ offset_aligned = offset_unaligned + sector_size - offset_unaligned % sector_size
+
+ return offset_aligned
+
+ def dirty_pages(self):
+ """This method yields DirtyPage objects."""
+
+ log_file_base = self.get_dirty_pages_starting_offset()
+ primary_file_base = BASE_BLOCK_LENGTH_PRIMARY
+
+ for dirty_page_meta in self.dirtyvector.dirty_pages_meta():
+ log_file_offset = dirty_page_meta.relative_offset_log + log_file_base
+ primary_file_offset = dirty_page_meta.relative_offset_primary + primary_file_base
+
+ dirty_page = DirtyPage(self.file_object, log_file_offset, 512, primary_file_offset)
+ yield dirty_page
+
+class LogEntry(RegistryFile):
+ """This is a class for a log entry, it provides methods to read dirty pages references and to map dirty pages.
+ Most methods are self-explanatory.
+ """
+
+ def __init__(self, file_object, file_offset, expected_sequence_number):
+ super(LogEntry, self).__init__(file_object, file_offset)
+
+ signature = self.get_signature()
+ if signature != b'HvLE':
+ raise LogEntryException('Invalid signature: {}'.format(signature))
+
+ size = self.get_size()
+ if size < 512 or size % 512 != 0:
+ raise LogEntryException('Invalid size: {}'.format(size))
+
+ hbins_data_size = self.get_hbins_data_size()
+ if hbins_data_size < HIVE_BIN_SIZE_ALIGNMENT or hbins_data_size % HIVE_BIN_SIZE_ALIGNMENT != 0:
+ raise LogEntryException('Invalid hive bins data size: {}'.format(hbins_data_size))
+
+ dirty_pages_count = self.get_dirty_pages_count()
+ if dirty_pages_count == 0:
+ raise LogEntryException('Invalid dirty pages count: {}'.format(dirty_pages_count))
+
+ if not self.validate_hashes():
+ raise LogEntryException('Invalid hashes'.format(dirty_pages_count))
+
+ sequence_number = self.get_sequence_number()
+ if sequence_number != expected_sequence_number:
+ raise LogEntryException('Unexpected sequence number: {} != {}'.format(sequence_number, expected_sequence_number))
+
+ def get_signature(self):
+ return self.read_binary(0, 4)
+
+ def get_size(self):
+ return self.read_uint32(4)
+
+ def get_flags(self):
+ return self.read_uint32(8)
+
+ def get_sequence_number(self):
+ return self.read_uint32(12)
+
+ def get_hbins_data_size(self):
+ return self.read_uint32(16)
+
+ def get_dirty_pages_count(self):
+ return self.read_uint32(20)
+
+ def get_hash_1(self):
+ return self.read_uint64(24)
+
+ def get_hash_2(self):
+ return self.read_uint64(32)
+
+ def calculate_hash_1(self):
+ b = bytearray(self.read_binary(40, self.get_size() - 40))
+ return Marvin32(b)
+
+ def calculate_hash_2(self):
+ b = bytearray(self.read_binary(0, 32))
+ return Marvin32(b)
+
+ def validate_hashes(self):
+ """Compare calculated hashes to hashes recorded in a log entry."""
+
+ return self.get_hash_2() == self.calculate_hash_2() and self.get_hash_1() == self.calculate_hash_1()
+
+ def get_dirty_pages_starting_offset(self):
+ return 40 + self.get_dirty_pages_count() * 8
+
+ def dirty_pages_references(self):
+ """This method yields DirtyPageReference tuples."""
+
+ curr_pos = 40
+ i = 0
+ while i < self.get_dirty_pages_count():
+ primary_file_offset_relative = self.read_uint32(curr_pos)
+ page_size = self.read_uint32(curr_pos + 4)
+
+ dirty_page_reference = DirtyPageReference(relative_offset_primary = primary_file_offset_relative, size = page_size)
+ yield dirty_page_reference
+
+ curr_pos += 8
+ i += 1
+
+ def dirty_pages(self):
+ """This method yields DirtyPage objects."""
+
+ log_file_base = self.file_offset + self.get_dirty_pages_starting_offset()
+ primary_file_base = BASE_BLOCK_LENGTH_PRIMARY
+
+ delta = 0
+ for dirty_page_reference in self.dirty_pages_references():
+ primary_file_offset = dirty_page_reference.relative_offset_primary + primary_file_base
+ page_size = dirty_page_reference.size
+
+ log_file_offset = log_file_base + delta
+
+ dirty_page = DirtyPage(self.file_object, log_file_offset, page_size, primary_file_offset)
+ yield dirty_page
+
+ delta += page_size
+
+class NewLogFile(object):
+ """This is a class for a transaction log file (new format)."""
+
+ baseblock = None
+ """A base block in a log file (a BaseBlock object)."""
+
+ def __init__(self, file_object):
+ self.file_object = file_object
+
+ self.baseblock = BaseBlock(self.file_object)
+
+ if self.baseblock.get_file_type() != FILE_TYPE_LOG_NEW:
+ raise BaseBlockException('Invalid file type')
+
+ if self.baseblock.is_file_dirty:
+ raise BaseBlockException('Dirty state')
+
+ self.file_size = self.baseblock.get_file_size()
+ if self.file_size <= BASE_BLOCK_LENGTH_LOG + 40: # Check if at least one log entry can be present in the file.
+ raise FileSizeException('Invalid file size: {}'.format(self.file_size))
+
+ def log_entries(self):
+ """This method yields LogEntry objects."""
+
+ current_sequence_number = self.baseblock.get_primary_sequence_number()
+
+ curr_pos = BASE_BLOCK_LENGTH_LOG
+ while curr_pos < self.file_size:
+ try:
+ curr_logentry = LogEntry(self.file_object, curr_pos, current_sequence_number)
+ except (LogEntryException, ReadException):
+ break # We could read garbage at the end of the file, this is normal.
+
+ yield curr_logentry
+
+ curr_pos += curr_logentry.get_size()
+ current_sequence_number = c_uint32(current_sequence_number + 1).value # Handle a possible overflow.
+
+class PrimaryFile(object):
+ """This is a class for a primary file, it provides methods to read the file, to build the maps of cells, and to recover the file using a transaction log."""
+
+ file = None
+ """A RegistryFile object for a primary file."""
+
+ baseblock = None
+ """A base block in a primary file (a BaseBlock object)."""
+
+ cell_map_allocated = None
+ """A map of allocated cells."""
+
+ cell_map_unallocated = None
+ """A map of unallocated cells."""
+
+ record_referenced_cells = False
+ """When True, the get_cell() method will add a requested cell to a map of allocated and referenced cells."""
+
+ cell_map_referenced = None
+ """A map of allocated and referenced cells (empty by default)."""
+
+ cell_map_free = None
+ """A map of free (unallocated, unreferenced) cells (empty by default, see the build_map_free() method)."""
+
+ def __init__(self, file_object, tolerate_minor_errors = True):
+ self.file_object = file_object
+ self.writable = False
+ self.file = RegistryFile(file_object)
+ self.tolerate_minor_errors = tolerate_minor_errors
+
+ self.old_log_file = None
+ self.new_log_file = None
+ self.log_apply_count = 0
+ self.last_sequence_number = None
+
+ self.baseblock = BaseBlock(self.file_object)
+ if not self.baseblock.is_primary_file:
+ raise NotSupportedException('Invalid file type')
+
+ self.build_cell_maps()
+
+ def hive_bins(self):
+ """This method yields HiveBin objects."""
+
+ curr_pos = BASE_BLOCK_LENGTH_PRIMARY
+ while curr_pos - BASE_BLOCK_LENGTH_PRIMARY < self.baseblock.effective_hbins_data_size:
+ try:
+ curr_hivebin = HiveBin(self.file_object, curr_pos, self.tolerate_minor_errors, self.baseblock.use_old_cell_format)
+ except (HiveBinException, ReadException):
+ if self.baseblock.is_file_dirty and self.log_apply_count == 0:
+ # We could read garbage at the end of the dirty file, this is normal.
+ self.baseblock.effective_hbins_data_size = curr_pos - BASE_BLOCK_LENGTH_PRIMARY
+ break
+ else:
+ raise # If the file is not dirty (or we recovered the data), this is a serious error.
+
+ yield curr_hivebin
+
+ curr_pos += curr_hivebin.get_size()
+
+ def build_cell_maps(self):
+ """Build the maps of allocated and unallocated cells, clear other maps."""
+
+ self.cell_map_allocated = set()
+ self.cell_map_unallocated = set()
+
+ for hbin in self.hive_bins():
+ for cell in hbin.cells:
+ cell_file_offset = cell.file_offset
+ if cell.is_allocated():
+ self.cell_map_allocated.add(cell_file_offset)
+ else:
+ self.cell_map_unallocated.add(cell_file_offset)
+
+ self.cell_map_free = set()
+ self.cell_map_referenced = set()
+
+ def build_map_free(self):
+ """Build the map of free cells."""
+
+ self.cell_map_free = set()
+
+ if len(self.cell_map_referenced) > 0:
+ self.cell_map_free = self.cell_map_allocated - self.cell_map_referenced
+
+ self.cell_map_free.update(self.cell_map_unallocated)
+
+ def get_root_cell(self):
+ """Get and return data from a root cell."""
+
+ return self.get_cell(self.baseblock.effective_root_cell_offset)
+
+ def get_cell(self, cell_relative_offset):
+ """Get and return data from a cell. The cell must be in the map of allocated cells."""
+
+ if cell_relative_offset == CELL_OFFSET_NIL:
+ raise CellOffsetException('Got CELL_OFFSET_NIL')
+
+ cell_file_offset = BASE_BLOCK_LENGTH_PRIMARY + cell_relative_offset
+ if len(self.cell_map_allocated) > 0 and cell_file_offset not in self.cell_map_allocated:
+ raise CellOffsetException('There is no valid cell starting at this offset (relative): {}'.format(cell_relative_offset))
+
+ if self.record_referenced_cells:
+ self.cell_map_referenced.add(cell_file_offset)
+
+ cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format)
+ return cell.get_cell_data()
+
+ def get_cell_naive(self, cell_relative_offset):
+ """Get and return data from a cell naively."""
+
+ if cell_relative_offset == CELL_OFFSET_NIL:
+ raise CellOffsetException('Got CELL_OFFSET_NIL')
+
+ cell_file_offset = BASE_BLOCK_LENGTH_PRIMARY + cell_relative_offset
+
+ cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format)
+
+ size = cell.get_absolute_size()
+ if size > CELL_SIZE_MAX_NAIVE:
+ raise CellOffsetException('Got an obviously invalid offset (relative)')
+
+ return cell.get_cell_data()
+
+ def create_writable_file_object(self):
+ """Create a writable copy of a file object (used to recover a primary file)."""
+
+ if self.writable:
+ return
+
+ new_file_object = BytesIO()
+
+ # Copy data to the new writable file object.
+ self.file_object.seek(0)
+ copyfileobj(self.file_object, new_file_object)
+
+ self.original_file_object = self.file_object
+ self.file_object = new_file_object
+ self.file = RegistryFile(self.file_object)
+
+ self.writable = True
+
+ def discard_writable_file_object(self):
+ """Discard a writable copy of a file object."""
+
+ if not self.writable:
+ return
+
+ self.file_object.close()
+ self.file_object = self.original_file_object
+ self.__init__(self.file_object, self.tolerate_minor_errors)
+
+ def save_recovered_hive(self, filepath):
+ """Save the recovered hive to a new primary file."""
+
+ if self.log_apply_count == 0:
+ raise NotSupportedException('Cannot save a hive that was not recovered')
+
+ if self.baseblock.is_baseblock_valid:
+ # The base block is valid, use it.
+ self.file_object.seek(0)
+ baseblock_bytes = self.file_object.read(BASE_BLOCK_LENGTH_PRIMARY)
+ else:
+ # The base block is invalid, use another one from a transaction log file.
+ if self.old_log_file is not None:
+ self.old_log_file.file_object.seek(0)
+ baseblock_bytes = self.old_log_file.file_object.read(BASE_BLOCK_LENGTH_LOG)
+ elif self.new_log_file is not None:
+ self.new_log_file.file_object.seek(0)
+ baseblock_bytes = self.new_log_file.file_object.read(BASE_BLOCK_LENGTH_LOG)
+ else:
+ raise NotSupportedException('Cannot find a log file to be used to recover the base block')
+
+ # Create a file object for the base block.
+ baseblock_object = BytesIO(b'\x00' * BASE_BLOCK_LENGTH_PRIMARY)
+
+ # Write the base block to the new file object.
+ baseblock_object.seek(0)
+ baseblock_object.write(baseblock_bytes)
+
+ # Create a new BaseBlock object.
+ baseblock = BaseBlock(baseblock_object, True)
+
+ # Update various fields in the base block.
+ if self.last_sequence_number is not None:
+ baseblock.write_synchronized_sequence_numbers(self.last_sequence_number)
+ else:
+ baseblock.write_synchronized_sequence_numbers(baseblock.get_primary_sequence_number())
+
+ baseblock.write_hbins_data_size(self.baseblock.effective_hbins_data_size)
+ baseblock.write_flags(self.baseblock.effective_flags)
+ baseblock.write_file_type(FILE_TYPE_PRIMARY)
+ baseblock.update_checksum()
+
+ with open(filepath, 'wb') as f:
+ # Copy the old base block and the recovered hive bins data to a file.
+ self.file_object.seek(0)
+ copyfileobj(self.file_object, f)
+
+ # Copy the new base block over the old one.
+ baseblock_object.seek(0)
+ f.seek(0)
+ f.write(baseblock_object.read())
+
+ # Close the file object.
+ baseblock_object.close()
+
+ def apply_old_log_file(self, log_file_object):
+ """Apply a transaction log file (old format) to a primary file."""
+
+ if self.log_apply_count > 0:
+ raise RecoveryException('A log file has been already applied')
+
+ if not self.baseblock.is_file_dirty:
+ raise RecoveryException('There is no need to apply the log file')
+
+ self.old_log_file = OldLogFile(log_file_object)
+ log_timestamp = self.old_log_file.baseblock.effective_last_written_timestamp
+ primary_timestamp = self.baseblock.effective_last_written_timestamp
+
+ if log_timestamp < primary_timestamp:
+ raise NotEligibleException('This log file cannot be applied')
+
+ self.baseblock.effective_hbins_data_size = self.old_log_file.baseblock.effective_hbins_data_size
+ self.baseblock.effective_root_cell_offset = self.old_log_file.baseblock.effective_root_cell_offset
+ self.baseblock.effective_version = self.old_log_file.baseblock.effective_version
+ self.baseblock.use_old_cell_format = self.baseblock.effective_version in MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT
+ self.baseblock.effective_last_reorganized_timestamp = self.old_log_file.baseblock.effective_last_reorganized_timestamp
+ self.baseblock.effective_last_written_timestamp = self.old_log_file.baseblock.effective_last_written_timestamp
+ self.baseblock.effective_flags = self.old_log_file.baseblock.effective_flags
+
+ self.create_writable_file_object()
+
+ for dirty_page in self.old_log_file.dirty_pages(): # Apply dirty pages.
+ self.file.write_binary(dirty_page.primary_file_offset, dirty_page.get_bytes())
+
+ self.log_apply_count += 1
+ self.build_cell_maps()
+
+ def apply_new_log_file(self, log_file_object, callback = None):
+ """Apply a single transaction log file (new format) to a primary file.
+ After a log entry has been applied, call an optional callback function.
+ """
+
+ if self.log_apply_count >= 2:
+ raise RecoveryException('No more than two log files can be applied')
+
+ if not self.baseblock.is_file_dirty:
+ raise RecoveryException('There is no need to apply the log file')
+
+ self.new_log_file = NewLogFile(log_file_object)
+
+ if self.last_sequence_number is not None and self.last_sequence_number >= self.new_log_file.baseblock.get_primary_sequence_number():
+ raise RecoveryException('This log file cannot be applied')
+
+ if self.baseblock.is_baseblock_valid and self.new_log_file.baseblock.get_primary_sequence_number() < self.baseblock.get_secondary_sequence_number():
+ raise NotEligibleException('This log file cannot be applied')
+
+ self.baseblock.effective_root_cell_offset = self.new_log_file.baseblock.effective_root_cell_offset
+ self.baseblock.effective_version = self.new_log_file.baseblock.effective_version
+ self.baseblock.use_old_cell_format = self.baseblock.effective_version in MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT
+ self.baseblock.effective_last_reorganized_timestamp = self.new_log_file.baseblock.effective_last_reorganized_timestamp
+ self.baseblock.effective_last_written_timestamp = self.new_log_file.baseblock.effective_last_written_timestamp
+ self.baseblock.effective_flags = self.new_log_file.baseblock.effective_flags
+
+ self.create_writable_file_object()
+
+ for log_entry in self.new_log_file.log_entries():
+ self.last_sequence_number = log_entry.get_sequence_number()
+ self.baseblock.effective_flags = LogEntryFlagsToBaseBlockFlags(log_entry.get_flags(), self.baseblock.effective_flags)
+ self.baseblock.effective_hbins_data_size = log_entry.get_hbins_data_size()
+
+ for dirty_page in log_entry.dirty_pages(): # Apply dirty pages.
+ self.file.write_binary(dirty_page.primary_file_offset, dirty_page.get_bytes())
+
+ if callback is not None:
+ self.build_cell_maps()
+ callback()
+
+ self.log_apply_count += 1
+
+ if callback is None:
+ self.build_cell_maps()
+
+ def apply_new_log_files(self, log_file_object_1, log_file_object_2, callback = None):
+ """Apply two transaction log files (new format) to a primary file.
+ After a log entry has been applied, call an optional callback function.
+ """
+
+ def is_starting_log(this_sequence_number, another_sequence_number):
+ if this_sequence_number >= another_sequence_number:
+ delta = this_sequence_number - another_sequence_number
+ starting = False
+ else:
+ delta = another_sequence_number - this_sequence_number
+ starting = True
+
+ if c_uint32(delta).value <= 0x7FFFFFFF:
+ return starting
+ else:
+ return not starting # Sequence numbers did overflow.
+
+
+ new_log_file_1 = NewLogFile(log_file_object_1)
+ sequence_number_1 = new_log_file_1.baseblock.get_primary_sequence_number()
+
+ new_log_file_2 = NewLogFile(log_file_object_2)
+ sequence_number_2 = new_log_file_2.baseblock.get_primary_sequence_number()
+
+ if is_starting_log(sequence_number_1, sequence_number_2):
+ first = log_file_object_1
+ second = log_file_object_2
+ else:
+ first = log_file_object_2
+ second = log_file_object_1
+
+ if self.baseblock.is_baseblock_valid:
+ try:
+ self.apply_new_log_file(first, callback)
+ except NotEligibleException:
+ pass
+
+ self.apply_new_log_file(second, callback)
+ else:
+ self.apply_new_log_file(second, callback) # This is how Windows works.
+
+class PrimaryFileTruncated(object):
+ """This is a class for a truncated primary file, it provides methods to read the truncated file, to build the maps of cells, and to yield each cell."""
+
+ file = None
+ """A RegistryFile object for a primary file."""
+
+ baseblock = None
+ """A base block in a primary file (a BaseBlock object)."""
+
+ cell_map_allocated = None
+ """A map of allocated cells."""
+
+ cell_map_unallocated = None
+ """A map of unallocated cells."""
+
+ cell_map_free = None
+ """A map of free (unallocated only) cells."""
+
+ def __init__(self, file_object):
+ self.file_object = file_object
+ self.writable = False
+ self.file = RegistryFile(file_object)
+
+ self.baseblock = BaseBlock(self.file_object)
+ if not self.baseblock.is_primary_file:
+ raise NotSupportedException('Invalid file type')
+
+ self.build_cell_maps()
+
+ def hive_bins(self):
+ """This method yields HiveBin objects."""
+
+ curr_pos = BASE_BLOCK_LENGTH_PRIMARY
+ while curr_pos - BASE_BLOCK_LENGTH_PRIMARY < self.baseblock.effective_hbins_data_size:
+ try:
+ curr_hivebin = HiveBin(self.file_object, curr_pos, True, self.baseblock.use_old_cell_format)
+ except (HiveBinException, ReadException):
+ break # Since we expect a truncation point, stop here.
+
+ yield curr_hivebin
+
+ curr_pos += curr_hivebin.get_size()
+
+ def build_cell_maps(self):
+ """Build the maps of allocated and unallocated cells."""
+
+ self.cell_map_allocated = set()
+ self.cell_map_unallocated = set()
+
+ for hbin in self.hive_bins():
+ for cell in hbin.cells:
+ cell_file_offset = cell.file_offset
+ if cell.is_allocated():
+ self.cell_map_allocated.add(cell_file_offset)
+ else:
+ self.cell_map_unallocated.add(cell_file_offset)
+
+ self.cell_map_free = self.cell_map_unallocated
+
+ def get_cell(self, cell_relative_offset):
+ """Get and return data from a cell. The cell must be in the map of allocated cell or in the map of unallocated cells."""
+
+ if cell_relative_offset == CELL_OFFSET_NIL:
+ raise CellOffsetException('Got CELL_OFFSET_NIL')
+
+ cell_file_offset = BASE_BLOCK_LENGTH_PRIMARY + cell_relative_offset
+ if cell_file_offset not in self.cell_map_allocated and cell_file_offset not in self.cell_map_unallocated:
+ raise CellOffsetException('There is no valid cell starting at this offset (relative): {}'.format(cell_relative_offset))
+
+ cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format)
+ return cell.get_cell_data()
+
+ def get_cell_naive(self, cell_relative_offset):
+ """Get and return data from a cell naively."""
+
+ if cell_relative_offset == CELL_OFFSET_NIL:
+ raise CellOffsetException('Got CELL_OFFSET_NIL')
+
+ cell_file_offset = BASE_BLOCK_LENGTH_PRIMARY + cell_relative_offset
+
+ cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format)
+ return cell.get_cell_data()
+
+ def cells(self, yield_unallocated_cells = False):
+ """This method yields a HiveCell object for each cell."""
+
+ for cell_file_offset in self.cell_map_allocated:
+ cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format)
+ yield cell
+
+ if yield_unallocated_cells:
+ for cell_file_offset in self.cell_map_unallocated:
+ cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format)
+ yield cell
diff --git a/yarp/RegistryHelpers.py b/yarp/RegistryHelpers.py
new file mode 100644
index 0000000..770356a
--- /dev/null
+++ b/yarp/RegistryHelpers.py
@@ -0,0 +1,105 @@
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from __future__ import unicode_literals
+
+from os import path, linesep
+from collections import namedtuple
+
+DiscoveredLogFiles = namedtuple('DiscoveredLogFiles', [ 'log_path', 'log1_path', 'log2_path' ])
+
+def DiscoverLogFiles(PrimaryPath):
+ """Return a named tuple (DiscoveredLogFiles) describing a path to each transaction log file of a supplied primary file."""
+
+ def DiscoverLogFilesInternal(PrimaryPath):
+ # We prefer uppercase extensions.
+ log = PrimaryPath + '.LOG'
+ log1 = PrimaryPath + '.LOG1'
+ log2 = PrimaryPath + '.LOG2'
+
+ if path.isfile(log) or path.isfile(log1) or path.isfile(log2):
+ # At least one file has an uppercase extension, use it and others (if present).
+ if not path.isfile(log):
+ log = None
+ if not path.isfile(log1):
+ log1 = None
+ if not path.isfile(log2):
+ log2 = None
+
+ return DiscoveredLogFiles(log_path = log, log1_path = log1, log2_path = log2)
+
+ # Now, switch to lowercase extensions.
+ log = PrimaryPath + '.log'
+ log1 = PrimaryPath + '.log1'
+ log2 = PrimaryPath + '.log2'
+
+ if path.isfile(log) or path.isfile(log1) or path.isfile(log2):
+ # At least one file has a lowercase extension, use it and others (if present).
+ if not path.isfile(log):
+ log = None
+ if not path.isfile(log1):
+ log1 = None
+ if not path.isfile(log2):
+ log2 = None
+
+ return DiscoveredLogFiles(log_path = log, log1_path = log1, log2_path = log2)
+
+ directory, filename = path.split(PrimaryPath)
+ filenames = sorted(set([ filename, filename.lower(), filename.upper() ]))
+ for filename in filenames:
+ result = DiscoverLogFilesInternal(path.join(directory, filename))
+ if result is not None:
+ return result
+
+ # Give up.
+ return DiscoveredLogFiles(log_path = None, log1_path = None, log2_path = None)
+
+def HexDump(Buffer):
+ """Return bytes from Buffer as a hexdump-like string (16 bytes per line)."""
+
+ def int2hex(i):
+ return '{:02X}'.format(i)
+
+ if type(Buffer) is not bytearray:
+ Buffer = bytearray(Buffer)
+
+ output_lines = ''
+
+ i = 0
+ while i < len(Buffer):
+ bytes_line = Buffer[i : i + 16]
+
+ address = int2hex(i)
+ address = str(address).zfill(8)
+ hex_line = ''
+ ascii_line = ''
+
+ k = 0
+ while k < len(bytes_line):
+ single_byte = bytes_line[k]
+
+ hex_line += int2hex(single_byte)
+ if k == 7 and k != len(bytes_line) - 1:
+ hex_line += '-'
+ elif k != len(bytes_line) - 1:
+ hex_line += ' '
+
+ if single_byte >= 32 and single_byte <= 126:
+ ascii_line += chr(single_byte)
+ else:
+ ascii_line += '.'
+
+ k += 1
+
+ padding_count = 16 - k
+ if padding_count > 0:
+ hex_line += ' ' * 3 * padding_count
+
+ output_lines += address + ' ' * 2 + hex_line + ' ' * 2 + ascii_line
+
+ i += 16
+
+ if i < len(Buffer):
+ output_lines += linesep
+
+ return output_lines
diff --git a/yarp/RegistryRecords.py b/yarp/RegistryRecords.py
new file mode 100644
index 0000000..ed1bfa4
--- /dev/null
+++ b/yarp/RegistryRecords.py
@@ -0,0 +1,524 @@
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from __future__ import unicode_literals
+
+from struct import unpack
+from collections import namedtuple
+from .RegistryFile import RegistryException
+
+# Key node flags.
+KEY_VOLATILE = 0x0001
+KEY_HIVE_EXIT = 0x0002
+KEY_HIVE_ENTRY = 0x0004
+KEY_NO_DELETE = 0x0008
+KEY_SYM_LINK = 0x0010
+KEY_COMP_NAME = 0x0020
+KEY_PREDEF_HANDLE = 0x0040
+KEY_VIRT_SOURCE = 0x0080
+KEY_VIRT_TARGET = 0x0100
+KEY_VIRT_STORE = 0x0200
+
+# User flags for a key node.
+KEY_FLAG_32BIT = 0x1
+KEY_FLAG_REFLECTED = 0x2
+KEY_FLAG_EXEMPT_REFLECTION = 0x4
+KEY_FLAG_OWNERSHIP_REFLECTION = 0x8
+
+# Virtualization control flags for a key node.
+REG_KEY_DONT_VIRTUALIZE = 0x2
+REG_KEY_DONT_SILENT_FAIL = 0x4
+REG_KEY_RECURSE_FLAG = 0x8
+
+# Debug flags for a key node.
+BREAK_ON_OPEN = 0x01
+BREAK_ON_DELETE = 0x02
+BREAK_ON_SECURITY_CHANGE = 0x04
+BREAK_ON_CREATE_SUBKEY = 0x08
+BREAK_ON_DELETE_SUBKEY = 0x10
+BREAK_ON_SET_VALUE = 0x20
+BREAK_ON_DELETE_VALUE = 0x40
+BREAK_ON_KEY_VIRTUALIZE = 0x80
+
+# Key value flags.
+VALUE_COMP_NAME = 0x0001
+VALUE_TOMBSTONE = 0x0002
+
+# Data types for a key value.
+REG_NONE = 0x00000000
+REG_SZ = 0x00000001
+REG_EXPAND_SZ = 0x00000002
+REG_BINARY = 0x00000003
+REG_DWORD = 0x00000004
+REG_DWORD_LITTLE_ENDIAN = REG_DWORD
+REG_DWORD_BIG_ENDIAN = 0x00000005
+REG_LINK = 0x00000006
+REG_MULTI_SZ = 0x00000007
+REG_RESOURCE_LIST = 0x00000008
+REG_FULL_RESOURCE_DESCRIPTOR = 0x00000009
+REG_RESOURCE_REQUIREMENTS_LIST = 0x0000000a
+REG_QWORD = 0x0000000b
+REG_QWORD_LITTLE_ENDIAN = REG_QWORD
+
+LeafElement = namedtuple('LeafElement', [ 'relative_offset', 'name_hint', 'name_hash' ])
+
+class ParseException(RegistryException):
+ """This exception is raised when a registry record is invalid."""
+
+ def __init__(self, value):
+ self._value = value
+
+ def __str__(self):
+ return repr(self._value)
+
+class MemoryBlock(object):
+ """This is a generic class for a memory block (cell data), it provides low-level methods for reading and parsing data.
+ All methods are self-explanatory.
+ """
+
+ def __init__(self, buf):
+ self.buf = buf
+
+ def read_binary(self, pos, length = None):
+ if length is None:
+ b = self.buf[pos : ]
+ return b
+
+ b = self.buf[pos : pos + length]
+ if len(b) != length:
+ raise ParseException('Cannot read data (expected: {} bytes, read: {} bytes)'.format(length, len(b)))
+
+ return b
+
+ def read_uint8(self, pos):
+ b = self.read_binary(pos, 1)
+ return unpack('> 12
+
+ def get_user_flags_new(self):
+ return self.get_virtualization_control_and_user_flags() & 0xF
+
+ def get_virtualization_control_flags(self):
+ return self.get_virtualization_control_and_user_flags() >> 4
+
+ def get_debug(self):
+ return self.read_uint8(55)
+
+ def get_largest_subkey_classname_length(self):
+ return self.read_uint32(56)
+
+ def get_largest_value_name_length(self):
+ return self.read_uint32(60)
+
+ def get_largest_value_data_size(self):
+ return self.read_uint32(64)
+
+ def get_workvar(self):
+ return self.read_uint32(68)
+
+ def get_key_name_length(self):
+ return self.read_uint16(72)
+
+ def get_classname_length(self):
+ return self.read_uint16(74)
+
+ def get_key_name(self):
+ """Get and return a key name string (as raw bytes)."""
+
+ return self.read_binary(76, self.get_key_name_length())
+
+ def get_slack(self):
+ return self.read_binary(76 + self.get_key_name_length())
+
+class KeyValuesList(MemoryBlock):
+ """This is a class for a key values list, it provides methods to read this list."""
+
+ def __init__(self, buf, elements_count):
+ super(KeyValuesList, self).__init__(buf)
+
+ self.elements_count = elements_count
+
+ def elements(self):
+ """This method yields key value offsets."""
+
+ i = 0
+ while i < self.elements_count:
+ yield self.read_uint32(i * 4)
+ i += 1
+
+ def get_slack(self):
+ return self.read_binary(self.elements_count * 4)
+
+class KeyValue(MemoryBlock):
+ """This is a class for a key value, it provides methods to access various fields of the key value.
+ Most methods are self-explanatory.
+ """
+
+ def __init__(self, buf):
+ super(KeyValue, self).__init__(buf)
+
+ signature = self.get_signature()
+ if signature != b'vk':
+ raise ParseException('Invalid signature: {}'.format(signature))
+
+ def get_signature(self):
+ return self.read_binary(0, 2)
+
+ def get_value_name_length(self):
+ return self.read_uint16(2)
+
+ def get_data_size(self):
+ return self.read_uint32(4)
+
+ def get_data_size_real(self):
+ """Get and return a real size of data (the most significant bit is ignored)."""
+
+ size = self.get_data_size()
+ if size >= 0x80000000:
+ size -= 0x80000000
+
+ return size
+
+ def is_data_inline(self):
+ """Return True if data is stored inline (in the data offset field)."""
+
+ return self.get_data_size() >= 0x80000000
+
+ def get_inline_data(self):
+ return self.read_binary(8, 4)
+
+ def get_data_offset(self):
+ return self.read_uint32(8)
+
+ def get_data_type(self):
+ return self.read_uint32(12)
+
+ def get_flags(self):
+ return self.read_uint16(16)
+
+ def get_spare(self):
+ return self.read_uint16(18)
+
+ def get_title_index(self):
+ return self.read_uint32(16) # The same offset as above.
+
+ def get_value_name(self):
+ """Get and return a value name string (as raw bytes)."""
+
+ return self.read_binary(20, self.get_value_name_length())
+
+ def get_slack(self):
+ return self.read_binary(20 + self.get_value_name_length())
+
+class KeySecurity(MemoryBlock):
+ """This is a class for a key security item, it provides methods to access various fields of the key security item.
+ All methods are self-explanatory.
+ """
+
+ def __init__(self, buf):
+ super(KeySecurity, self).__init__(buf)
+
+ signature = self.get_signature()
+ if signature != b'sk':
+ raise ParseException('Invalid signature: {}'.format(signature))
+
+ security_descriptor_size = self.get_security_descriptor_size()
+ if security_descriptor_size == 0:
+ raise ParseException('Empty security descriptor')
+
+ def get_signature(self):
+ return self.read_binary(0, 2)
+
+ def get_reserved(self):
+ return self.read_uint16(2)
+
+ def get_flink(self):
+ return self.read_uint32(4)
+
+ def get_blink(self):
+ return self.read_uint32(8)
+
+ def get_reference_count(self):
+ return self.read_uint32(12)
+
+ def get_security_descriptor_size(self):
+ return self.read_uint32(16)
+
+ def get_security_descriptor(self):
+ """Get and return a security descriptor (as raw bytes)."""
+
+ return self.read_binary(20, self.get_security_descriptor_size())
+
+ def get_slack(self):
+ return self.read_binary(20 + self.get_security_descriptor_size())
+
+class SegmentsList(MemoryBlock):
+ """This is a class for a segments list (big data), it provides a method to read this list."""
+
+ def __init__(self, buf, elements_count):
+ super(SegmentsList, self).__init__(buf)
+
+ self.elements_count = elements_count
+
+ def elements(self):
+ """This method yields segment offsets."""
+
+ i = 0
+ while i < self.elements_count:
+ yield self.read_uint32(i * 4)
+ i += 1
+
+ def get_slack(self):
+ return self.read_binary(self.elements_count * 4)
+
+class BigData(MemoryBlock):
+ """This is a class for a big data record, it provides methods to access various fields of the big data record.
+ All methods are self-explanatory.
+ """
+
+ def __init__(self, buf):
+ super(BigData, self).__init__(buf)
+
+ signature = self.get_signature()
+ if signature != b'db':
+ raise ParseException('Invalid signature: {}'.format(signature))
+
+ segments_count = self.get_segments_count()
+ if segments_count < 2:
+ raise ParseException('Invalid number of segments: {}'.format(segments_count))
+
+ def get_signature(self):
+ return self.read_binary(0, 2)
+
+ def get_segments_count(self):
+ return self.read_uint16(2)
+
+ def get_segments_list_offset(self):
+ return self.read_uint32(4)
+
+ def get_slack(self):
+ return self.read_binary(8)
diff --git a/yarp/RegistryRecover.py b/yarp/RegistryRecover.py
new file mode 100644
index 0000000..a1f1b5f
--- /dev/null
+++ b/yarp/RegistryRecover.py
@@ -0,0 +1,143 @@
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+from __future__ import unicode_literals
+
+from . import Registry
+from . import RegistryFile
+
+MAX_PLAUSIBLE_SUBKEYS_COUNT = 80000
+MAX_PLAUSIBLE_VALUES_COUNT = 70000
+MAX_PLAUSIBLE_NAME_LENGTH = 2048
+
+def ValidateKey(Key):
+ """Check whether or not a key looks plausible. If not, an exception is raised."""
+
+ key_name = Key.name()
+ if len(key_name) > MAX_PLAUSIBLE_NAME_LENGTH:
+ raise Registry.RegistryException('Implausible name length')
+
+ if Key.subkeys_count() > MAX_PLAUSIBLE_SUBKEYS_COUNT or Key.key_node.get_volatile_subkeys_count() > MAX_PLAUSIBLE_SUBKEYS_COUNT:
+ raise Registry.RegistryException('Implausible number of subkeys reported')
+
+ if Key.values_count() > MAX_PLAUSIBLE_VALUES_COUNT:
+ raise Registry.RegistryException('Implausible number of values reported')
+
+ timestamp_year = Key.last_written_timestamp().year
+ if timestamp_year < 1970 or timestamp_year > 2100:
+ raise Registry.RegistryException('Implausible last written timestamp')
+
+def ValidateValue(Value):
+ """Check whether or not a value looks plausible. If not, an exception is raised."""
+
+ value_name = Value.name()
+ if len(value_name) > MAX_PLAUSIBLE_NAME_LENGTH:
+ raise Registry.RegistryException('Implausible name length')
+
+ if Value.key_value.is_data_inline() and Value.key_value.get_data_size_real() > 4:
+ raise Registry.RegistryException('Value data is too large to be stored inline')
+
+class Scanner(object):
+ """This class is used to scan free cells for deleted keys and values."""
+
+ hive = None
+ """A RegistryHive object."""
+
+ def __init__(self, hive, scan_remnant_data = True, scan_slack_space = True):
+ """Argument:
+ - hive: a RegistryHive object;
+ - scan_remnant_data: when True, also scan remnant data within a primary file.
+ """
+
+ self.hive = hive
+ self.scan_remnant_data = scan_remnant_data
+ self.scan_slack_space = scan_slack_space
+
+ def virtual_cell(self):
+ """Get and return remnant data within a primary file as a virtual cell (if any, else return None)."""
+
+ if not self.scan_remnant_data:
+ return
+
+ offset = RegistryFile.BASE_BLOCK_LENGTH_PRIMARY + self.hive.registry_file.baseblock.effective_hbins_data_size
+ self.hive.registry_file.file_object.seek(offset)
+ data = self.hive.registry_file.file_object.read()
+ if len(data) == 0:
+ return
+
+ return data
+
+ def process_cell(self, cell):
+ """Scan data of a cell for deleted keys and values, yield them as RegistryKey and RegistryValue objects."""
+
+ pos = 0
+ while pos < len(cell):
+ if pos < len(cell) - 76: # A key node with at least one character in the name.
+ two_bytes = cell[pos : pos + 2]
+ if two_bytes == b'nk':
+ candidate_nk = cell[pos : ]
+ try:
+ key = Registry.RegistryKey(self.hive.registry_file, candidate_nk, None, None, True, True)
+ ValidateKey(key)
+ except (Registry.RegistryException, UnicodeDecodeError):
+ pass
+ else:
+ yield key
+
+ pos += 76 + key.key_node.get_key_name_length()
+ if pos % 2 != 0:
+ pos += 1
+
+ continue
+
+ pos += 2
+ continue
+
+ if pos <= len(cell) - 20: # A key value with no name (at least).
+ two_bytes = cell[pos : pos + 2]
+ if two_bytes == b'vk':
+ candidate_vk = cell[pos : ]
+ try:
+ value = Registry.RegistryValue(self.hive.registry_file, candidate_vk, True)
+ ValidateValue(value)
+ except (Registry.RegistryException, UnicodeDecodeError):
+ pass
+ else:
+ yield value
+
+ pos += 20 + value.key_value.get_value_name_length()
+ if pos % 2 != 0:
+ pos += 1
+
+ continue
+
+ pos += 2
+ continue
+
+ pos += 2
+
+ def scan(self):
+ """This method yields RegistryKey objects for deleted keys and RegistryValue objects for deleted values.
+ A hive is required to have the free map built (or nothing will be recovered).
+ """
+
+ for file_offset in self.hive.registry_file.cell_map_free:
+ cell = self.hive.registry_file.get_cell_naive(file_offset - RegistryFile.BASE_BLOCK_LENGTH_PRIMARY)
+
+ for result in self.process_cell(cell):
+ yield result
+
+ virtual_cell = self.virtual_cell()
+ if virtual_cell is not None:
+ for result in self.process_cell(virtual_cell):
+ yield result
+
+ if self.scan_slack_space:
+ for slack in self.hive.effective_slack:
+ if len(slack) % 2 != 0:
+ virtual_cell = slack[ 1 : ]
+ else:
+ virtual_cell = slack
+
+ for result in self.process_cell(virtual_cell):
+ yield result
diff --git a/yarp/__init__.py b/yarp/__init__.py
new file mode 100644
index 0000000..7e8344d
--- /dev/null
+++ b/yarp/__init__.py
@@ -0,0 +1,5 @@
+# yarp: yet another registry parser
+# (c) Maxim Suhanov
+
+__version__ = '1.0.0-beta1'
+__all__ = [ 'Registry', 'RegistryFile', 'RegistryRecords', 'RegistryRecover', 'RegistryCarve', 'RegistryHelpers' ]