diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..5eadd86 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,3 @@ +Version: 1.0.0-beta1 + +Initial release (beta). diff --git a/Example b/Example new file mode 100644 index 0000000..c79ee67 --- /dev/null +++ b/Example @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +from yarp import * + +# A primary file is specified here. +primary_path = '<...>/SYSTEM' + +# Discover transaction log files to be used to recover the primary file, if required. +transaction_logs = RegistryHelpers.DiscoverLogFiles(primary_path) + +# Open the primary file and each transaction log file discovered. +primary_file = open(primary_path, 'rb') + +if transaction_logs.log_path is not None: + log_file = open(transaction_logs.log_path, 'rb') +else: + log_file = None + +if transaction_logs.log1_path is not None: + log1_file = open(transaction_logs.log1_path, 'rb') +else: + log1_file = None + +if transaction_logs.log2_path is not None: + log2_file = open(transaction_logs.log2_path, 'rb') +else: + log2_file = None + +# Open the hive and recover it, if required. +hive = Registry.RegistryHive(primary_file) +recovery_result = hive.recover_auto(log_file, log1_file, log2_file) +if recovery_result.recovered: + print('The hive has been recovered') + +# Print basic information about the hive. +print('Last written timestamp: {}'.format(hive.last_written_timestamp())) +print('Last reorganized timestamp: {}'.format(hive.last_reorganized_timestamp())) + +# Find an existing key. +key = hive.find_key('controlset001\\services') +print('Found a key: {}'.format(key.path())) + +# Print information about its subkeys. +for sk in key.subkeys(): + print(sk) + +# Pick an existing subkey. +key = key.subkey('exfat') + +# Print information about it. +print('Found a subkey: {}'.format(key.name())) +print('Last written timestamp: {}'.format(key.last_written_timestamp())) + +# Print information about its values. +for v in key.values(): + print(v) + +# Pick an existing value. +v = key.value('description') + +# Print more information about this value. +print('Some information about a specific value:') +print('Value name is \'{}\''.format(v.name())) +print('Value type is {} as a string (or {} as an integer)'.format(v.type_str(), v.type_raw())) +print('Value data is:') +print(v.data()) +print('The same data as raw bytes:') +print(v.data_raw()) + +# Close everything. +hive = None +primary_file.close() +if log_file is not None: + log_file.close() +if log1_file is not None: + log1_file.close() +if log2_file is not None: + log2_file.close() diff --git a/License b/License new file mode 100644 index 0000000..94a9ed0 --- /dev/null +++ b/License @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/ReadMe b/ReadMe new file mode 100644 index 0000000..a506dcc --- /dev/null +++ b/ReadMe @@ -0,0 +1,33 @@ +yarp: yet another registry parser + +1. Project goals: the library and tools + +- Parse Windows registry files in a proper way (with forensics in mind). +- Expose values of all fields of underlying registry structures. +- Support for truncated registry files. +- Support for recovering deleted keys and values. +- Support for carving of registry hives. +- Support for transaction log files. + +2. Hive version numbers supported + +- Full support: 1.1-1.6. +- No support: 1.0. + +In general, full support is available for hive files from installations of +Windows NT 3.1 and later versions of Windows NT (including Windows 10); +hive files from installations of pre-release versions of Windows NT 3.1 are +not supported. + +3. Documentation + +See the docstrings in the module. +For a basic usage example, see the 'Example' file. + +4. License + +This project is made available under the terms of the GNU GPL, version 3. +See the 'License' file. + +--- +(c) Maxim Suhanov diff --git a/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive new file mode 100644 index 0000000..40e4f34 Binary files /dev/null and b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive differ diff --git a/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG1 b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG1 new file mode 100644 index 0000000..a68ef6f Binary files /dev/null and b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG1 differ diff --git a/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG2 b/hives_for_tests/BadBaseBlockHive/BadBaseBlockHive.LOG2 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/BadListHive b/hives_for_tests/BadListHive new file mode 100644 index 0000000..f32bf61 Binary files /dev/null and b/hives_for_tests/BadListHive differ diff --git a/hives_for_tests/BadLogHive1/BadLogHive b/hives_for_tests/BadLogHive1/BadLogHive new file mode 100644 index 0000000..1c3745f Binary files /dev/null and b/hives_for_tests/BadLogHive1/BadLogHive differ diff --git a/hives_for_tests/BadLogHive1/BadLogHive.LOG1 b/hives_for_tests/BadLogHive1/BadLogHive.LOG1 new file mode 100644 index 0000000..a0c7e00 Binary files /dev/null and b/hives_for_tests/BadLogHive1/BadLogHive.LOG1 differ diff --git a/hives_for_tests/BadLogHive1/BadLogHive.LOG2 b/hives_for_tests/BadLogHive1/BadLogHive.LOG2 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/BadLogHive2/BadLogHive b/hives_for_tests/BadLogHive2/BadLogHive new file mode 100644 index 0000000..1c3745f Binary files /dev/null and b/hives_for_tests/BadLogHive2/BadLogHive differ diff --git a/hives_for_tests/BadLogHive2/BadLogHive.LOG1 b/hives_for_tests/BadLogHive2/BadLogHive.LOG1 new file mode 100644 index 0000000..00548c4 Binary files /dev/null and b/hives_for_tests/BadLogHive2/BadLogHive.LOG1 differ diff --git a/hives_for_tests/BadLogHive2/BadLogHive.LOG2 b/hives_for_tests/BadLogHive2/BadLogHive.LOG2 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/BadLogHive3/BadLogHive b/hives_for_tests/BadLogHive3/BadLogHive new file mode 100644 index 0000000..3c7c1cd Binary files /dev/null and b/hives_for_tests/BadLogHive3/BadLogHive differ diff --git a/hives_for_tests/BadLogHive3/BadLogHive.LOG1 b/hives_for_tests/BadLogHive3/BadLogHive.LOG1 new file mode 100644 index 0000000..dce0485 Binary files /dev/null and b/hives_for_tests/BadLogHive3/BadLogHive.LOG1 differ diff --git a/hives_for_tests/BadLogHive3/BadLogHive.LOG2 b/hives_for_tests/BadLogHive3/BadLogHive.LOG2 new file mode 100644 index 0000000..d0dd640 Binary files /dev/null and b/hives_for_tests/BadLogHive3/BadLogHive.LOG2 differ diff --git a/hives_for_tests/BadSubkeyHive b/hives_for_tests/BadSubkeyHive new file mode 100644 index 0000000..aafc82a Binary files /dev/null and b/hives_for_tests/BadSubkeyHive differ diff --git a/hives_for_tests/BigDataHive b/hives_for_tests/BigDataHive new file mode 100644 index 0000000..7b0df9d Binary files /dev/null and b/hives_for_tests/BigDataHive differ diff --git a/hives_for_tests/BogusKeyNamesHive b/hives_for_tests/BogusKeyNamesHive new file mode 100644 index 0000000..8bbc0eb Binary files /dev/null and b/hives_for_tests/BogusKeyNamesHive differ diff --git a/hives_for_tests/Carving/0 b/hives_for_tests/Carving/0 new file mode 100644 index 0000000..a95c189 Binary files /dev/null and b/hives_for_tests/Carving/0 differ diff --git a/hives_for_tests/Carving/512 b/hives_for_tests/Carving/512 new file mode 100644 index 0000000..a0a7ee5 Binary files /dev/null and b/hives_for_tests/Carving/512 differ diff --git a/hives_for_tests/CompHive b/hives_for_tests/CompHive new file mode 100644 index 0000000..768c846 Binary files /dev/null and b/hives_for_tests/CompHive differ diff --git a/hives_for_tests/DeletedDataHive b/hives_for_tests/DeletedDataHive new file mode 100644 index 0000000..1b2f486 Binary files /dev/null and b/hives_for_tests/DeletedDataHive differ diff --git a/hives_for_tests/DeletedTreeHive b/hives_for_tests/DeletedTreeHive new file mode 100644 index 0000000..49417d6 Binary files /dev/null and b/hives_for_tests/DeletedTreeHive differ diff --git a/hives_for_tests/DeletedTreeNoRootFlagHive b/hives_for_tests/DeletedTreeNoRootFlagHive new file mode 100644 index 0000000..a165f0a Binary files /dev/null and b/hives_for_tests/DeletedTreeNoRootFlagHive differ diff --git a/hives_for_tests/DeletedTreePartialPathHive b/hives_for_tests/DeletedTreePartialPathHive new file mode 100644 index 0000000..1e36ae3 Binary files /dev/null and b/hives_for_tests/DeletedTreePartialPathHive differ diff --git a/hives_for_tests/Discovery/1/aa b/hives_for_tests/Discovery/1/aa new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/1/aa.LOG b/hives_for_tests/Discovery/1/aa.LOG new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/1/aa.LOG1 b/hives_for_tests/Discovery/1/aa.LOG1 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/1/aa.LOG2 b/hives_for_tests/Discovery/1/aa.LOG2 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/2/AA b/hives_for_tests/Discovery/2/AA new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/2/aa.LOG1 b/hives_for_tests/Discovery/2/aa.LOG1 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/2/aa.LOG2 b/hives_for_tests/Discovery/2/aa.LOG2 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/3/AA b/hives_for_tests/Discovery/3/AA new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/3/aa.log b/hives_for_tests/Discovery/3/aa.log new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/3/aa.log1 b/hives_for_tests/Discovery/3/aa.log1 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/4/AA b/hives_for_tests/Discovery/4/AA new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/4/aa.LOG b/hives_for_tests/Discovery/4/aa.LOG new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/4/aa.log1 b/hives_for_tests/Discovery/4/aa.log1 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/4/aa.log2 b/hives_for_tests/Discovery/4/aa.log2 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/Discovery/5/aa b/hives_for_tests/Discovery/5/aa new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/DuplicateSubkeysHive b/hives_for_tests/DuplicateSubkeysHive new file mode 100644 index 0000000..046194c Binary files /dev/null and b/hives_for_tests/DuplicateSubkeysHive differ diff --git a/hives_for_tests/EffectiveSizeHive b/hives_for_tests/EffectiveSizeHive new file mode 100644 index 0000000..1f2f021 Binary files /dev/null and b/hives_for_tests/EffectiveSizeHive differ diff --git a/hives_for_tests/EmptyHive b/hives_for_tests/EmptyHive new file mode 100644 index 0000000..83cb26a Binary files /dev/null and b/hives_for_tests/EmptyHive differ diff --git a/hives_for_tests/ExtendedASCIIHive b/hives_for_tests/ExtendedASCIIHive new file mode 100644 index 0000000..0da7d2f Binary files /dev/null and b/hives_for_tests/ExtendedASCIIHive differ diff --git a/hives_for_tests/GarbageHive b/hives_for_tests/GarbageHive new file mode 100644 index 0000000..2b1a6fa Binary files /dev/null and b/hives_for_tests/GarbageHive differ diff --git a/hives_for_tests/HealedHive b/hives_for_tests/HealedHive new file mode 100644 index 0000000..1dcde22 Binary files /dev/null and b/hives_for_tests/HealedHive differ diff --git a/hives_for_tests/InvalidParentHive b/hives_for_tests/InvalidParentHive new file mode 100644 index 0000000..e9618a6 Binary files /dev/null and b/hives_for_tests/InvalidParentHive differ diff --git a/hives_for_tests/ManySubkeysHive b/hives_for_tests/ManySubkeysHive new file mode 100644 index 0000000..321c21f Binary files /dev/null and b/hives_for_tests/ManySubkeysHive differ diff --git a/hives_for_tests/MultiSzHive b/hives_for_tests/MultiSzHive new file mode 100644 index 0000000..d61cc41 Binary files /dev/null and b/hives_for_tests/MultiSzHive differ diff --git a/hives_for_tests/NewDirtyHive1/NewDirtyHive b/hives_for_tests/NewDirtyHive1/NewDirtyHive new file mode 100644 index 0000000..3c7c1cd Binary files /dev/null and b/hives_for_tests/NewDirtyHive1/NewDirtyHive differ diff --git a/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG1 b/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG1 new file mode 100644 index 0000000..af3b067 Binary files /dev/null and b/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG1 differ diff --git a/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG2 b/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG2 new file mode 100644 index 0000000..15f0a49 Binary files /dev/null and b/hives_for_tests/NewDirtyHive1/NewDirtyHive.LOG2 differ diff --git a/hives_for_tests/NewDirtyHive1/RecoveredHive_Windows10 b/hives_for_tests/NewDirtyHive1/RecoveredHive_Windows10 new file mode 100644 index 0000000..f352712 Binary files /dev/null and b/hives_for_tests/NewDirtyHive1/RecoveredHive_Windows10 differ diff --git a/hives_for_tests/NewDirtyHive2/NewDirtyHive b/hives_for_tests/NewDirtyHive2/NewDirtyHive new file mode 100644 index 0000000..233e7ee Binary files /dev/null and b/hives_for_tests/NewDirtyHive2/NewDirtyHive differ diff --git a/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG1 b/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG1 new file mode 100644 index 0000000..af3b067 Binary files /dev/null and b/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG1 differ diff --git a/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG2 b/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG2 new file mode 100644 index 0000000..15f0a49 Binary files /dev/null and b/hives_for_tests/NewDirtyHive2/NewDirtyHive.LOG2 differ diff --git a/hives_for_tests/NewFlagsHive b/hives_for_tests/NewFlagsHive new file mode 100644 index 0000000..2d09bd9 Binary files /dev/null and b/hives_for_tests/NewFlagsHive differ diff --git a/hives_for_tests/OldDirtyHive/OldDirtyHive b/hives_for_tests/OldDirtyHive/OldDirtyHive new file mode 100644 index 0000000..1c3745f Binary files /dev/null and b/hives_for_tests/OldDirtyHive/OldDirtyHive differ diff --git a/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG1 b/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG1 new file mode 100644 index 0000000..a68ef6f Binary files /dev/null and b/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG1 differ diff --git a/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG2 b/hives_for_tests/OldDirtyHive/OldDirtyHive.LOG2 new file mode 100644 index 0000000..e69de29 diff --git a/hives_for_tests/OldDirtyHive/RecoveredHive_Windows7 b/hives_for_tests/OldDirtyHive/RecoveredHive_Windows7 new file mode 100644 index 0000000..fafd5aa Binary files /dev/null and b/hives_for_tests/OldDirtyHive/RecoveredHive_Windows7 differ diff --git a/hives_for_tests/RemnantsHive b/hives_for_tests/RemnantsHive new file mode 100644 index 0000000..2c0a541 Binary files /dev/null and b/hives_for_tests/RemnantsHive differ diff --git a/hives_for_tests/SlackHive b/hives_for_tests/SlackHive new file mode 100644 index 0000000..9cf34e2 Binary files /dev/null and b/hives_for_tests/SlackHive differ diff --git a/hives_for_tests/StringValuesHive b/hives_for_tests/StringValuesHive new file mode 100644 index 0000000..19fe29f Binary files /dev/null and b/hives_for_tests/StringValuesHive differ diff --git a/hives_for_tests/TruncatedHive b/hives_for_tests/TruncatedHive new file mode 100644 index 0000000..5c71d65 Binary files /dev/null and b/hives_for_tests/TruncatedHive differ diff --git a/hives_for_tests/TruncatedNameHive b/hives_for_tests/TruncatedNameHive new file mode 100644 index 0000000..bb2d158 Binary files /dev/null and b/hives_for_tests/TruncatedNameHive differ diff --git a/hives_for_tests/UnicodeHive b/hives_for_tests/UnicodeHive new file mode 100644 index 0000000..a95c189 Binary files /dev/null and b/hives_for_tests/UnicodeHive differ diff --git a/hives_for_tests/WrongOrderHive b/hives_for_tests/WrongOrderHive new file mode 100644 index 0000000..88463c3 Binary files /dev/null and b/hives_for_tests/WrongOrderHive differ diff --git a/records_for_tests/dummy_db b/records_for_tests/dummy_db new file mode 100644 index 0000000..980239e Binary files /dev/null and b/records_for_tests/dummy_db differ diff --git a/records_for_tests/dummy_lf b/records_for_tests/dummy_lf new file mode 100644 index 0000000..17ff433 Binary files /dev/null and b/records_for_tests/dummy_lf differ diff --git a/records_for_tests/dummy_lh b/records_for_tests/dummy_lh new file mode 100644 index 0000000..26531f6 Binary files /dev/null and b/records_for_tests/dummy_lh differ diff --git a/records_for_tests/dummy_li b/records_for_tests/dummy_li new file mode 100644 index 0000000..7fb9ae7 Binary files /dev/null and b/records_for_tests/dummy_li differ diff --git a/records_for_tests/dummy_list b/records_for_tests/dummy_list new file mode 100644 index 0000000..6ec4164 Binary files /dev/null and b/records_for_tests/dummy_list differ diff --git a/records_for_tests/dummy_nk b/records_for_tests/dummy_nk new file mode 100644 index 0000000..fa137e8 Binary files /dev/null and b/records_for_tests/dummy_nk differ diff --git a/records_for_tests/dummy_ri b/records_for_tests/dummy_ri new file mode 100644 index 0000000..a90d6d6 Binary files /dev/null and b/records_for_tests/dummy_ri differ diff --git a/records_for_tests/dummy_sk b/records_for_tests/dummy_sk new file mode 100644 index 0000000..1126354 Binary files /dev/null and b/records_for_tests/dummy_sk differ diff --git a/records_for_tests/dummy_vk b/records_for_tests/dummy_vk new file mode 100644 index 0000000..3702200 Binary files /dev/null and b/records_for_tests/dummy_vk differ diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d018eb2 --- /dev/null +++ b/setup.py @@ -0,0 +1,19 @@ +from distutils.core import setup +from yarp import __version__ + +setup( + name = 'yarp', + version = __version__, + license = 'GPLv3', + packages = [ 'yarp' ], + scripts = [ 'yarp-carver', 'yarp-print', 'yarp-timeline' ], + description = 'Yet another registry parser', + author = 'Maxim Suhanov', + author_email = 'no.spam.c@mail.ru', + classifiers = [ + 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Development Status :: 4 - Beta' + ] +) diff --git a/test_cases.py b/test_cases.py new file mode 100644 index 0000000..45e95ca --- /dev/null +++ b/test_cases.py @@ -0,0 +1,1021 @@ +# coding: utf-8 + +# yarp: yet another registry parser +# (c) Maxim Suhanov + +import pytest +from io import BytesIO +from os import path, remove +from hashlib import md5 +from yarp import Registry, RegistryFile, RegistryRecords, RegistryRecover, RegistryCarve, RegistryHelpers + +HIVES_DIR = 'hives_for_tests' +RECORDS_DIR = 'records_for_tests' + +hive_empty = path.join(HIVES_DIR, 'EmptyHive') +hive_bigdata = path.join(HIVES_DIR, 'BigDataHive') +hive_many_subkeys = path.join(HIVES_DIR, 'ManySubkeysHive') +hive_garbage = path.join(HIVES_DIR, 'GarbageHive') +hive_duplicate_subkeys = path.join(HIVES_DIR, 'DuplicateSubkeysHive') + +hive_dirty_new1 = path.join(HIVES_DIR, 'NewDirtyHive1', 'NewDirtyHive') +hive_dirty_new1_log1 = path.join(HIVES_DIR, 'NewDirtyHive1', 'NewDirtyHive.LOG1') +hive_dirty_new1_log2 = path.join(HIVES_DIR, 'NewDirtyHive1', 'NewDirtyHive.LOG2') +hive_dirty_new1_recovered = path.join(HIVES_DIR, 'NewDirtyHive1', 'RecoveredHive_Windows10') + +hive_dirty_new2 = path.join(HIVES_DIR, 'NewDirtyHive2', 'NewDirtyHive') +hive_dirty_new2_log1 = path.join(HIVES_DIR, 'NewDirtyHive2', 'NewDirtyHive.LOG1') +hive_dirty_new2_log2 = path.join(HIVES_DIR, 'NewDirtyHive2', 'NewDirtyHive.LOG2') + +hive_dirty_old = path.join(HIVES_DIR, 'OldDirtyHive', 'OldDirtyHive') +hive_dirty_old_log = path.join(HIVES_DIR, 'OldDirtyHive', 'OldDirtyHive.LOG1') +hive_dirty_old_recovered = path.join(HIVES_DIR, 'OldDirtyHive', 'RecoveredHive_Windows7') + +hive_unicode = path.join(HIVES_DIR, 'UnicodeHive') +hive_extended_ascii = path.join(HIVES_DIR, 'ExtendedASCIIHive') +hive_invalid_parent = path.join(HIVES_DIR, 'InvalidParentHive') +hive_bad_list = path.join(HIVES_DIR, 'BadListHive') +hive_bad_subkey = path.join(HIVES_DIR, 'BadSubkeyHive') + +hive_bad_baseblock = path.join(HIVES_DIR, 'BadBaseBlockHive', 'BadBaseBlockHive') +hive_bad_baseblock_log1 = path.join(HIVES_DIR, 'BadBaseBlockHive', 'BadBaseBlockHive.LOG1') +hive_bad_baseblock_log2 = path.join(HIVES_DIR, 'BadBaseBlockHive', 'BadBaseBlockHive.LOG2') + +hive_bad_log1 = path.join(HIVES_DIR, 'BadLogHive1', 'BadLogHive') +hive_bad_log1_log1 = path.join(HIVES_DIR, 'BadLogHive1', 'BadLogHive.LOG1') +hive_bad_log1_log2 = path.join(HIVES_DIR, 'BadLogHive1', 'BadLogHive.LOG2') + +hive_bad_log2 = path.join(HIVES_DIR, 'BadLogHive2', 'BadLogHive') +hive_bad_log2_log1 = path.join(HIVES_DIR, 'BadLogHive2', 'BadLogHive.LOG1') +hive_bad_log2_log2 = path.join(HIVES_DIR, 'BadLogHive2', 'BadLogHive.LOG2') + +hive_bad_log3 = path.join(HIVES_DIR, 'BadLogHive3', 'BadLogHive') +hive_bad_log3_log1 = path.join(HIVES_DIR, 'BadLogHive3', 'BadLogHive.LOG1') +hive_bad_log3_log2 = path.join(HIVES_DIR, 'BadLogHive3', 'BadLogHive.LOG2') + +hive_bogus_keynames = path.join(HIVES_DIR, 'BogusKeyNamesHive') +hive_new_flags = path.join(HIVES_DIR, 'NewFlagsHive') +hive_multisz = path.join(HIVES_DIR, 'MultiSzHive') +hive_strings = path.join(HIVES_DIR, 'StringValuesHive') +hive_wrong_order = path.join(HIVES_DIR, 'WrongOrderHive') +hive_truncated_name = path.join(HIVES_DIR, 'TruncatedNameHive') +hive_healed = path.join(HIVES_DIR, 'HealedHive') +hive_deleted_data = path.join(HIVES_DIR, 'DeletedDataHive') +hive_deleted_tree = path.join(HIVES_DIR, 'DeletedTreeHive') +hive_comp = path.join(HIVES_DIR, 'CompHive') +hive_remnants = path.join(HIVES_DIR, 'RemnantsHive') +hive_truncated = path.join(HIVES_DIR, 'TruncatedHive') +hive_effective_size = path.join(HIVES_DIR, 'EffectiveSizeHive') +hive_deleted_tree_no_root_flag = path.join(HIVES_DIR, 'DeletedTreeNoRootFlagHive') +hive_deleted_tree_partial_path = path.join(HIVES_DIR, 'DeletedTreePartialPathHive') +hive_slack = path.join(HIVES_DIR, 'SlackHive') + +hive_carving0 = path.join(HIVES_DIR, 'Carving', '0') +hive_carving512 = path.join(HIVES_DIR, 'Carving', '512') + +log_discovery = [ + path.join(HIVES_DIR, 'Discovery', '1', 'aa'), + path.join(HIVES_DIR, 'Discovery', '2', 'AA'), + path.join(HIVES_DIR, 'Discovery', '3', 'AA'), + path.join(HIVES_DIR, 'Discovery', '4', 'AA'), + path.join(HIVES_DIR, 'Discovery', '5', 'aa') +] + +record_nk = path.join(RECORDS_DIR, 'dummy_nk') +record_vk = path.join(RECORDS_DIR, 'dummy_vk') +record_sk = path.join(RECORDS_DIR, 'dummy_sk') +record_li = path.join(RECORDS_DIR, 'dummy_li') +record_lf = path.join(RECORDS_DIR, 'dummy_lf') +record_lh = path.join(RECORDS_DIR, 'dummy_lh') +record_ri = path.join(RECORDS_DIR, 'dummy_ri') +record_list = path.join(RECORDS_DIR, 'dummy_list') +record_db = path.join(RECORDS_DIR, 'dummy_db') + +def test_empty(): + with open(hive_empty, 'rb') as f: + hive = Registry.RegistryHive(f) + + assert hive.root_key().subkeys_count() == 0 + for key in hive.root_key().subkeys(): + assert False + + assert hive.root_key().path() == '' + assert hive.root_key().path(True) != '' + + timestamp = hive.last_written_timestamp() + assert timestamp.year == 2017 + assert timestamp.month == 3 + assert timestamp.day == 4 + assert timestamp.hour == 16 + assert timestamp.minute == 37 + assert timestamp.second == 31 + + timestamp = hive.last_reorganized_timestamp() + assert timestamp is None + +def test_bigdata(): + with open(hive_bigdata, 'rb') as f: + hive = Registry.RegistryHive(f) + + key = hive.root_key().subkey('key_with_bigdata') + assert key.values_count() == 2 + + value = key.value() + assert hive.registry_file.get_cell(value.key_value.get_data_offset())[ : 2] == b'db' + + data = value.data() + assert len(data) == 16345 + for c in data.decode('windows-1252'): + assert c == '1' + + value = key.value('V') + assert hive.registry_file.get_cell(value.key_value.get_data_offset())[ : 2] == b'db' + + data = value.data() + assert len(data) == 81725 + for c in data.decode('windows-1252'): + assert c == '2' + + assert key.value('dont_exist') is None + +def test_many_subkeys(): + with open(hive_many_subkeys, 'rb') as f: + hive = Registry.RegistryHive(f) + + key = hive.find_key('key_with_many_subkeys') + assert key.subkeys_count() == 5000 + + assert hive.registry_file.get_cell(key.key_node.get_subkeys_list_offset())[ : 2] == b'ri' + + allowed_range = range(1, 5000 + 1) + for subkey in key.subkeys(): + assert int(subkey.name()) in allowed_range + + key = hive.find_key('key_with_MAny_subkeys\\2119\\find_me') + assert key.path() == 'key_with_many_subkeys\\2119\\find_me' + assert key.path_partial() == key.path() + + key = hive.find_key('\\key_with_maNY_sUBkeys\\2119\\Find_me') + assert key.path() == 'key_with_many_subkeys\\2119\\find_me' + assert key.path_partial() == key.path() + + key = hive.find_key('key_with_many_subkeys\\2119\\find_me') + assert key.path() == 'key_with_many_subkeys\\2119\\find_me' + assert key.path_partial() == key.path() + + key = hive.find_key('key_with_many_subkeys\\3000') + assert key is not None + + key = hive.find_key('key_with_many_subkeys\\3000\\dont_exist') + assert key is None + + key = hive.find_key('key_with_many_subkeys\\dont_exist\\dont_exist') + assert key is None + +def test_garbage(): + with open(hive_garbage, 'rb') as f: + hive = Registry.RegistryHive(f) + + assert hive.registry_file.baseblock.get_hbins_data_size() == hive.registry_file.baseblock.effective_hbins_data_size == 4096 + + cnt = 0 + for hive_bin in hive.registry_file.hive_bins(): + cnt += 1 + + assert cnt == 1 + +def test_duplicate_subkeys(): + with open(hive_duplicate_subkeys, 'rb') as f: + hive = Registry.RegistryHive(f) + + with pytest.raises(Registry.WalkException): + key = hive.root_key().subkey('key_with_many_subkeys') + + assert key is not None + + for subkey in key.subkeys(): + pass + +@pytest.mark.parametrize('reverse', [False, True]) +def test_dirty_new1(reverse): + + def log_entry_counter(): + log_entry_counter.c += 1 + + with open(hive_dirty_new1, 'rb') as primary, open(hive_dirty_new1_log1, 'rb') as log1, open(hive_dirty_new1_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + + key_1 = hive.find_key('Key1') + key_21 = hive.find_key('Key2\\Key2_1') + key_22 = hive.find_key('Key2\\Key2_2') + assert key_1 is not None + assert key_21 is not None + assert key_22 is not None + + key_bad = hive.find_key('Key2\\Key2_2\\dont_exist') + assert key_bad is None + + value = key_1.value() + value_data = value.data() + assert len(value_data) == 6001 + for c in value_data[ : -1]: + assert c == '1' + + assert value_data[-1] == '\x00' + + assert len(hive.find_key('KEY2').value('v').data()) == 9 + assert hive.find_key('key2').value('V').data() == 'testTEST\x00' + + assert hive.registry_file.log_apply_count == 0 + + hive.log_entry_callback = log_entry_counter + log_entry_counter.c = 0 + + if not reverse: + hive.recover_new(log1, log2) + else: + hive.recover_new(log2, log1) + + assert log_entry_counter.c == 4 + + assert hive.registry_file.log_apply_count == 2 + + hive.registry_file.file_object.seek(4096) + recovered_data_1 = hive.registry_file.file_object.read() + md5_1 = md5(recovered_data_1).hexdigest() + + with open(hive_dirty_new1_recovered, 'rb') as f: + f.seek(4096) + recovered_data_2 = f.read() + md5_2 = md5(recovered_data_2).hexdigest() + + assert md5_1 == md5_2 + + key_1 = hive.find_key('Key1') + key_21 = hive.find_key('Key2\\Key2_1') + key_22 = hive.find_key('key2\\key2_2') + assert key_1 is None + assert key_21 is None + assert key_22 is None + + key_3 = hive.find_key('Key3') + key_31 = hive.find_key('Key3\\Key3_1') + key_32 = hive.find_key('Key3\\Key3_2') + key_33 = hive.find_key('key3\\key3_3') + assert key_3 is not None + assert key_31 is not None + assert key_32 is not None + assert key_33 is not None + + key_bad = hive.find_key('Key3\\Key3_2\\dont_exist') + assert key_bad is None + + value = key_3.value() + value_data = value.data() + assert len(value_data) == 1441 + for c in value_data[ : -1]: + assert c == '1' + + assert value_data[-1] == '\x00' + +def test_dirty_new2(): + with open(hive_dirty_new2, 'rb') as primary, open(hive_dirty_new2_log1, 'rb') as log1, open(hive_dirty_new2_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + + assert hive.registry_file.baseblock.validate_checksum() + assert hive.registry_file.log_apply_count == 0 + hive.recover_new(log1, log2) + assert hive.registry_file.log_apply_count == 1 + assert hive.registry_file.last_sequence_number == 5 + +def test_dirty_old(): + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + + key_1 = hive.find_key('\\key_with_many_subkeys\\1') + assert key_1 is not None + + key_5000_1 = hive.find_key('key_with_many_subkeys\\5000\\find_me_in_log') + assert key_5000_1 is None + + value_4500 = hive.find_key('key_with_many_subkeys\\4500').value('v') + assert value_4500 is None + + hive.recover_old(log) + + key_1 = hive.find_key('\\key_with_many_subkeys\\1') + assert key_1 is None + + key_5000_1 = hive.find_key('key_with_many_subkeys\\5000\\find_me_in_log') + assert key_5000_1 is not None + timestamp_1 = key_5000_1.last_written_timestamp() + + value_4500 = hive.find_key('key_with_many_subkeys\\4500').value('V') + assert value_4500 is not None + + assert value_4500.data() == [ 'a\x00', 'bb\x00', 'ccc\x00', '\x00' ] + + with open(hive_dirty_old_recovered, 'rb') as recovered: + hive_r = Registry.RegistryHive(recovered) + + key_5000_1_r = hive_r.find_key('key_with_many_subkeys\\5000\\find_me_in_log') + timestamp_2 = key_5000_1_r.last_written_timestamp() + + value_4500_r = hive_r.find_key('key_with_many_subkeys\\4500').value('v') + + assert timestamp_1 == timestamp_2 + assert value_4500.data() == value_4500_r.data() + +def test_dirty_old_rollback_changes(): + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + + hive.recover_old(log) + assert hive.registry_file.log_apply_count == 1 + hive.rollback_changes() + assert hive.registry_file.log_apply_count == 0 + + key_1 = hive.find_key('\\key_with_many_subkeys\\1') + assert key_1 is not None + + key_5000_1 = hive.find_key('key_with_many_subkeys\\5000\\find_me_in_log') + assert key_5000_1 is None + + value_4500 = hive.find_key('key_with_many_subkeys\\4500').value('v') + assert value_4500 is None + +def test_recovery_not_required(): + with open(hive_dirty_old_recovered, 'rb') as recovered: + hive = Registry.RegistryHive(recovered) + dummy = BytesIO() + + with pytest.raises(RegistryFile.RecoveryException): + hive.recover_old(dummy) + + with pytest.raises(RegistryFile.RecoveryException): + hive.recover_new(dummy) + +def test_unicode(): + with open(hive_unicode, 'rb') as f: + hive = Registry.RegistryHive(f) + + key = hive.find_key(u'ПриВет\\КлюЧ') + assert key is not None + assert key.path() == u'Привет\\Ключ' + assert key.path_partial() == key.path() + + key = hive.find_key(u'\\ПриВет\\КлюЧ') + assert key is not None + assert key.path() == u'Привет\\Ключ' + assert key.path_partial() == key.path() + + key = hive.find_key(u'привет') + assert key is not None + assert key.path().lower() == u'привет' + assert key.path_partial() == key.path() + + key = hive.find_key(u'\\привеТ') + assert key is not None + assert key.path() == u'Привет' + assert key.path_partial() == key.path() + +def test_extended_ascii(): + with open(hive_extended_ascii, 'rb') as f: + hive = Registry.RegistryHive(f) + + key = hive.find_key(u'ëigenaardig') + assert key is not None + assert key.key_node.get_flags() & RegistryRecords.KEY_COMP_NAME > 0 + assert key.path() == u'ëigenaardig' + assert key.path_partial() == key.path() + + value = key.value(u'ëigenaardig') + assert value.key_value.get_flags() & RegistryRecords.VALUE_COMP_NAME > 0 + assert value.data() == u'ëigenaardig\x00' + +def test_autorecovery(): + def convert_tuple(t): + assert t.recovered + file_objects = t.file_objects + assert len(file_objects) < 3 and len(file_objects) > 0 + if len(file_objects) == 1: + return (t.is_new_log, t.file_objects[0]) + else: + return (t.is_new_log, t.file_objects[0], t.file_objects[1]) + + dummy = BytesIO() + + with open(hive_dirty_new1, 'rb') as primary, open(hive_dirty_new1_log1, 'rb') as log1, open(hive_dirty_new1_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(dummy, log1, log2) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 2 + assert len(t) == 3 + assert t[0] + assert t[1] == log1 and t[2] == log2 + + with open(hive_dirty_new1, 'rb') as primary, open(hive_dirty_new1_log1, 'rb') as log1, open(hive_dirty_new1_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(None, log1, log2) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 2 + assert len(t) == 3 + assert t[0] + assert t[1] == log1 and t[2] == log2 + + with open(hive_dirty_new2, 'rb') as primary, open(hive_dirty_new2_log1, 'rb') as log1, open(hive_dirty_new2_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(dummy, log1, log2) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 3 + assert t[0] + assert t[1] == log1 and t[2] == log2 + + with open(hive_dirty_new2, 'rb') as primary, open(hive_dirty_new2_log1, 'rb') as log1, open(hive_dirty_new2_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(None, log1, log2) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 3 + assert t[0] + assert t[1] == log1 and t[2] == log2 + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(log, dummy, dummy) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 2 + assert not t[0] + assert t[1] == log + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(dummy, log, dummy) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 2 + assert not t[0] + assert t[1] == log + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(dummy, dummy, log) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 2 + assert not t[0] + assert t[1] == log + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(dummy, log, log) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 2 + assert not t[0] + assert t[1] == log + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(log, log, log) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 2 + assert not t[0] + assert t[1] == log + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(None, dummy, log) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 2 + assert not t[0] + assert t[1] == log + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + t = hive.recover_auto(log, None, None) + t = convert_tuple(t) + assert hive.registry_file.log_apply_count == 1 + assert len(t) == 2 + assert not t[0] + assert t[1] == log + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.AutoRecoveryException): + hive.recover_auto(dummy, dummy, dummy) + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.AutoRecoveryException): + hive.recover_auto(dummy, log, None) + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.AutoRecoveryException): + hive.recover_auto(dummy, None, log) + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.AutoRecoveryException): + hive.recover_auto(None, None, log) + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.AutoRecoveryException): + hive.recover_auto(log, None, log) + +def test_invalid_parent(): + with open(hive_invalid_parent, 'rb') as primary: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.WalkException): + for subkey_1 in hive.root_key().subkeys(): + for subkey_2 in subkey_1.subkeys(): + pass +def test_bad_list(): + with open(hive_bad_list, 'rb') as primary: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.WalkException): + for subkey_1 in hive.root_key().subkeys(): + for subkey_2 in subkey_1.subkeys(): + pass + +def test_bad_subkey(): + with open(hive_bad_subkey, 'rb') as primary: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.WalkException): + for subkey_1 in hive.root_key().subkeys(): + for subkey_2 in subkey_1.subkeys(): + pass + +def test_access_bits(): + with open(hive_dirty_new1, 'rb') as primary: + hive = Registry.RegistryHive(primary) + key = hive.find_key('\\key2\\key2_2') + assert key.access_bits() == 2 + +def test_bad_baseblock(): + with open(hive_bad_baseblock, 'rb') as primary, open(hive_bad_baseblock_log1, 'rb') as log1, open(hive_bad_baseblock_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + + assert hive.registry_file.log_apply_count == 0 + assert hive.registry_file.baseblock.effective_version == 1 + + with pytest.raises(RegistryFile.CellOffsetException): + hive.find_key('key_with_many_subkeys') + + t = hive.recover_auto(None, log1, log2) + + assert hive.registry_file.log_apply_count == 1 + assert hive.registry_file.baseblock.effective_version == 3 + assert not t.is_new_log + assert t.file_objects == [log1] + assert hive.find_key('key_with_many_subkeys') is not None + +def test_bad_log1(): + with open(hive_bad_log1, 'rb') as primary, open(hive_bad_log1_log1, 'rb') as log1, open(hive_bad_log1_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.AutoRecoveryException): + hive.recover_auto(None, log1, log2) +def test_bad_log2(): + with open(hive_bad_log2, 'rb') as primary, open(hive_bad_log2_log1, 'rb') as log1, open(hive_bad_log2_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.AutoRecoveryException): + hive.recover_auto(None, log1, log2) + +def test_bad_log3(): + with open(hive_bad_log3, 'rb') as primary, open(hive_bad_log3_log1, 'rb') as log1, open(hive_bad_log3_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + with pytest.raises(Registry.AutoRecoveryException): + hive.recover_auto(None, log1, log2) +def test_writable(): + with open(hive_empty, 'rb') as primary: + hive = Registry.RegistryHive(primary) + + assert not hive.registry_file.writable + hive.registry_file.create_writable_file_object() + assert hive.registry_file.writable + hive.registry_file.discard_writable_file_object() + + assert not hive.registry_file.writable + hive.registry_file.create_writable_file_object() + assert hive.registry_file.writable + hive.registry_file.create_writable_file_object() + assert hive.registry_file.writable + hive.registry_file.discard_writable_file_object() + assert not hive.registry_file.writable + hive.registry_file.discard_writable_file_object() + hive.registry_file.discard_writable_file_object() + hive.registry_file.discard_writable_file_object() + hive.registry_file.discard_writable_file_object() + assert not hive.registry_file.writable + +def test_bogus_keynames(): + with open(hive_bogus_keynames, 'rb') as primary: + hive = Registry.RegistryHive(primary) + for k in hive.root_key().subkeys(): + assert k.name() == 'testnew\r\nne' or k.name() == 'testnu\x00l' + + assert hive.find_key('testnew\r\nne') is not None + assert hive.find_key('testnu\x00l') is not None + +def test_new_flags(): + with open(hive_new_flags, 'rb') as primary: + hive = Registry.RegistryHive(primary) + + key_1 = hive.find_key('1') + assert key_1 is not None + key_2 = hive.find_key('1\\2') + assert key_2 is not None + + assert key_1.key_node.get_virtualization_control_flags() == 0 + assert key_1.key_node.get_user_flags_new() == 0 + assert key_2.key_node.get_virtualization_control_flags() == 0 + assert key_2.key_node.get_user_flags_new() == RegistryRecords.KEY_FLAG_32BIT + assert key_2.key_node.get_user_flags_old() == 0 + +def test_multisz(): + with open(hive_multisz, 'rb') as primary: + hive = Registry.RegistryHive(primary) + key = hive.find_key('key') + value_1 = key.value('1') + value_2 = key.value('2') + assert key.value() is None + + assert value_1.data() == [] + l = value_2.data() + assert len(l) == 3 and l[0] == u'привет\x00' and l[1] == u'как дела?\x00' and l[2] == '\x00' + +def test_strings(): + with open(hive_strings, 'rb') as primary: + hive = Registry.RegistryHive(primary) + key = hive.find_key('key') + + assert key.value().data() == u'test тест\x00' + assert key.value('1').data() == b'test' + assert key.value('2').data() == u'test тест\x00' + assert key.value('3').data() == u'test тест \x00' + +def test_unicode_garbage(): + s = b'a\x00b\x00\x00\x00c\x00d\x00' + assert Registry.DecodeUnicode(s, True) == u'ab\x00' + assert Registry.DecodeUnicode(s, False) == u'ab\x00cd' + + s = b'a\x00b\x00\x00\x00c\x00d\x00e' + assert Registry.DecodeUnicode(s, True) == u'ab\x00' + with pytest.raises(UnicodeDecodeError): + Registry.DecodeUnicode(s, False) + + s = b'a\x00\x00\x00b\x00\x00\x00\x00\x00' + assert Registry.DecodeUnicodeMulti(s, True) == u'a\x00b\x00\x00' + +def test_security(): + with open(hive_unicode, 'rb') as f: + hive = Registry.RegistryHive(f) + sec = hive.root_key().security() + assert len(sec.descriptor()) == 144 + +def test_wrong_order(): + with open(hive_wrong_order, 'rb') as f: + hive = Registry.RegistryHive(f) + + c = 0 + with pytest.raises(Registry.WalkException): + for subkey in hive.find_key('1').subkeys(): + c += 1 + + assert c == 1 + + with pytest.raises(Registry.WalkException): + for subkey in hive.find_key('2').subkeys(): + c += 1 + + assert c == 4 + +def test_truncated_name(): + with open(hive_truncated_name, 'rb') as f: + hive = Registry.RegistryHive(f) + + with pytest.raises(RegistryRecords.ParseException): + for subkey in hive.root_key().subkeys(): + pass + +@pytest.mark.parametrize('walk_everywhere', [True, False]) +def test_unreferenced(walk_everywhere): + with open(hive_healed, 'rb') as f: + hive = Registry.RegistryHive(f) + + if walk_everywhere: + hive.walk_everywhere() + assert len(hive.registry_file.cell_map_allocated - hive.registry_file.cell_map_referenced) == 5 + else: + hive.registry_file.build_map_free() + assert len(hive.registry_file.cell_map_referenced) == 0 + assert len(hive.registry_file.cell_map_free) == len(hive.registry_file.cell_map_unallocated) + + with open(hive_bigdata, 'rb') as f: + hive = Registry.RegistryHive(f) + + if walk_everywhere: + hive.walk_everywhere() + assert len(hive.registry_file.cell_map_allocated - hive.registry_file.cell_map_referenced) == 0 + else: + hive.registry_file.build_map_free() + assert len(hive.registry_file.cell_map_referenced) == 0 + assert len(hive.registry_file.cell_map_free) == len(hive.registry_file.cell_map_unallocated) + +def test_deleted(): + with open(hive_deleted_data, 'rb') as f: + hive = Registry.RegistryHive(f) + + hive.walk_everywhere() + + scanner = RegistryRecover.Scanner(hive) + cnt_key_values = 0 + cnt_key_nodes = 0 + for i in scanner.scan(): + if type(i) is Registry.RegistryValue: + cnt_key_values += 1 + + assert i.type_raw() == RegistryRecords.REG_SZ + + if i.name() == 'v2': + assert i.data() == '456\x00' + elif i.name() == 'v': + assert i.data() == '123456\x00' + else: + assert False + + elif type(i) is Registry.RegistryKey: + cnt_key_nodes += 1 + + assert i.name() == '456' + + c = 0 + for v in i.values(): + c += 1 + assert v.name() == 'v' + assert v.type_raw() == RegistryRecords.REG_SZ + assert v.data() == '123456\x00' + + assert c == 1 + + assert cnt_key_values == 2 + assert cnt_key_nodes == 1 + + with open(hive_deleted_tree, 'rb') as f: + hive = Registry.RegistryHive(f) + + hive.walk_everywhere() + + scanner = RegistryRecover.Scanner(hive) + for i in scanner.scan(): + assert type(i) is Registry.RegistryKey + assert i.path() in [ '1\\2\\3', '1\\2\\3\\4', '1\\2\\3\\4\\5', '1\\2\\3\\4\\New Key #1' ] + assert i.path_partial() == i.path() + + with open(hive_healed, 'rb') as f: + hive = Registry.RegistryHive(f) + + hive.walk_everywhere() + + scanner = RegistryRecover.Scanner(hive) + for i in scanner.scan(): + if type(i) is Registry.RegistryKey: + assert i.name() == 'cccc' + for v in i.values(): + assert v.name() == '123' + assert v.type_raw() == RegistryRecords.REG_SZ + assert v.data() == 'test\x00' + + elif type(i) is Registry.RegistryValue: + assert i.name() == '123' + assert i.type_raw() == RegistryRecords.REG_SZ + assert i.data() == 'test\x00' + +def test_comp(): + with open(hive_comp, 'rb') as f: + hive = Registry.RegistryHive(f) + hive.walk_everywhere() + +def test_carving(): + with open(hive_carving0, 'rb') as f: + carver = RegistryCarve.Carver(f) + for i in carver.carve(): + assert i.offset == 0 + assert i.size == 8192 + assert not i.truncated + assert i.truncation_scenario == 0 + + with open(hive_carving512, 'rb') as f: + carver = RegistryCarve.Carver(f) + for i in carver.carve(): + assert i.offset == 512 + assert i.size == 8192 + assert not i.truncated + assert i.truncation_scenario == 0 + +def test_remnants(): + with open(hive_remnants, 'rb') as f: + hive = Registry.RegistryHive(f) + + hive.walk_everywhere() + + scanner = RegistryRecover.Scanner(hive) + + c = 0 + for i in scanner.scan(): + assert type(i) is Registry.RegistryValue + assert i.name() == '' + assert i.type_raw() == RegistryRecords.REG_DWORD + assert i.data() == 1 + c += 1 + + assert c == 1 + +def test_truncated(): + with open(hive_truncated, 'rb') as f: + hive = Registry.RegistryHiveTruncated(f) + + for i in hive.scan(): + assert type(i) is Registry.RegistryKey + assert i.name() in [ '{6214ff27-7b1b-41a3-9ae4-5fb851ffed63}', 'key_with_many_subkeys' ] or int(i.name()) > 0 + +def test_effective_hbins_data_size(): + with open(hive_effective_size, 'rb') as f: + hive = Registry.RegistryHive(f) + + assert hive.registry_file.baseblock.effective_hbins_data_size == 487424 + assert hive.registry_file.baseblock.get_hbins_data_size() != hive.registry_file.baseblock.effective_hbins_data_size + +def test_log_discovery(): + for i in range(len(log_discovery)): + p = log_discovery[i] + a = RegistryHelpers.DiscoverLogFiles(p) + + assert a is not None + + if i == 0: + assert path.normcase(path.basename(a.log_path)) == path.normcase('aa.LOG') + assert path.normcase(path.basename(a.log1_path)) == path.normcase('aa.LOG1') + assert path.normcase(path.basename(a.log2_path)) == path.normcase('aa.LOG2') + elif i == 1: + assert a.log_path is None + assert path.normcase(path.basename(a.log1_path)) == path.normcase('aa.LOG1') + assert path.normcase(path.basename(a.log2_path)) == path.normcase('aa.LOG2') + elif i == 2: + assert path.normcase(path.basename(a.log_path)) == path.normcase('aa.log') + assert path.normcase(path.basename(a.log1_path)) == path.normcase('aa.log1') + assert a.log2_path is None + elif i == 3: + assert path.normcase(path.basename(a.log_path)) == path.normcase('aa.LOG') + + # These properties should be None if the file system is case-sensitive. + assert a.log1_path is None or path.normcase(path.basename(a.log1_path)) == path.normcase('aa.log1') + assert a.log2_path is None or path.normcase(path.basename(a.log2_path)) == path.normcase('aa.log2') + elif i == 4: + assert a.log_path is None + assert a.log1_path is None + assert a.log2_path is None + else: + assert False + +def test_deleted_tree_no_root_flag(): + with open(hive_deleted_tree_no_root_flag, 'rb') as f: + hive = Registry.RegistryHive(f) + + assert hive.root_key().key_node.get_flags() & RegistryRecords.KEY_HIVE_ENTRY == 0 + hive.walk_everywhere() + + scanner = RegistryRecover.Scanner(hive) + for i in scanner.scan(): + assert type(i) is Registry.RegistryKey + assert i.path() in [ '1\\2\\3', '1\\2\\3\\4', '1\\2\\3\\4\\5', '1\\2\\3\\4\\New Key #1' ] + assert i.path_partial() == i.path() + +def test_deleted_tree_partial_path(): + with open(hive_deleted_tree_partial_path, 'rb') as f: + hive = Registry.RegistryHive(f) + + hive.walk_everywhere() + + scanner = RegistryRecover.Scanner(hive) + for i in scanner.scan(): + assert type(i) is Registry.RegistryKey + assert i.path_partial() in [ '3', '3\\4', '3\\4\\5', '3\\4\\New Key #1' ] + +def test_flags_converter(): + cases = [ + {'log_entry_flags': 0, 'baseblock_flags': 0, 'result': 0}, + {'log_entry_flags': 1, 'baseblock_flags': 0, 'result': 1}, + {'log_entry_flags': 1, 'baseblock_flags': 1, 'result': 1}, + {'log_entry_flags': 0, 'baseblock_flags': 1, 'result': 0}, + {'log_entry_flags': 0, 'baseblock_flags': 3, 'result': 2}, + {'log_entry_flags': 1, 'baseblock_flags': 3, 'result': 3}, + {'log_entry_flags': 1, 'baseblock_flags': 2, 'result': 3} + ] + + for case in cases: + assert RegistryFile.LogEntryFlagsToBaseBlockFlags(case['log_entry_flags'], case['baseblock_flags']) == case['result'] + +def test_hive_save(): + def check_saved_hive(filepath): + with open(filepath, 'rb') as recovered: + hive_recovered = Registry.RegistryHive(recovered) + assert not hive_recovered.registry_file.baseblock.is_file_dirty + hive_recovered.walk_everywhere() + + tmp_file = path.join(HIVES_DIR, 'temphive_delete_me') + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + with pytest.raises(RegistryFile.NotSupportedException): + hive.registry_file.save_recovered_hive(tmp_file) + + with open(hive_dirty_old, 'rb') as primary, open(hive_dirty_old_log, 'rb') as log: + hive = Registry.RegistryHive(primary) + hive.recover_old(log) + hive.registry_file.save_recovered_hive(tmp_file) + check_saved_hive(tmp_file) + + with open(hive_dirty_new1, 'rb') as primary, open(hive_dirty_new1_log1, 'rb') as log1, open(hive_dirty_new1_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + hive.recover_new(log1, log2) + hive.registry_file.save_recovered_hive(tmp_file) + check_saved_hive(tmp_file) + + with open(hive_dirty_new2, 'rb') as primary, open(hive_dirty_new2_log1, 'rb') as log1, open(hive_dirty_new2_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + hive.recover_new(log1, log2) + hive.registry_file.save_recovered_hive(tmp_file) + check_saved_hive(tmp_file) + + with open(hive_bad_baseblock, 'rb') as primary, open(hive_bad_baseblock_log1, 'rb') as log1, open(hive_bad_baseblock_log2, 'rb') as log2: + hive = Registry.RegistryHive(primary) + hive.recover_auto(None, log1, log2) + hive.registry_file.save_recovered_hive(tmp_file) + check_saved_hive(tmp_file) + + remove(tmp_file) + +def test_slack(): + with open(record_nk, 'rb') as f: + buf = f.read() + r = RegistryRecords.KeyNode(buf) + assert r.get_slack() == b'SLCK' + + with open(record_vk, 'rb') as f: + buf = f.read() + r = RegistryRecords.KeyValue(buf) + assert r.get_slack() == b'SLCK' + + with open(record_sk, 'rb') as f: + buf = f.read() + r = RegistryRecords.KeySecurity(buf) + assert r.get_slack() == b'SLCK' + + with open(record_li, 'rb') as f: + buf = f.read() + r = RegistryRecords.IndexLeaf(buf) + assert r.get_slack() == b'SLCK' + + with open(record_lh, 'rb') as f: + buf = f.read() + r = RegistryRecords.HashLeaf(buf) + assert r.get_slack() == b'SLCK' + + with open(record_lf, 'rb') as f: + buf = f.read() + r = RegistryRecords.FastLeaf(buf) + assert r.get_slack() == b'SLCK' + + with open(record_ri, 'rb') as f: + buf = f.read() + r = RegistryRecords.IndexRoot(buf) + assert r.get_slack() == b'SLCK' + + with open(record_list, 'rb') as f: + buf = f.read() + r = RegistryRecords.KeyValuesList(buf, 3) + assert r.get_slack() == b'SLCK' + + r = RegistryRecords.SegmentsList(buf, 3) + assert r.get_slack() == b'SLCK' + + with open(record_db, 'rb') as f: + buf = f.read() + r = RegistryRecords.BigData(buf) + assert r.get_slack() == b'SLCK' + +def test_hive_slack(): + with open(hive_slack, 'rb') as f: + hive = Registry.RegistryHive(f) + + assert len(hive.effective_slack) == 0 + hive.walk_everywhere() + assert len(hive.effective_slack) > 0 + assert b'SLCK' in hive.effective_slack diff --git a/yarp-carver b/yarp-carver new file mode 100755 index 0000000..e785cc3 --- /dev/null +++ b/yarp-carver @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from yarp import RegistryCarve +import argparse +from collections import namedtuple +import os +import sys + +PROGRAM_NAME = 'yarp-carver' +PROGRAM_VERSION = '1.0.0-beta1' + +Arguments = namedtuple('Arguments', [ 'source_file', 'output_dir' ]) + +def parse_args(): + """Parse command line arguments and return a named tuple (Arguments).""" + + parser = argparse.ArgumentParser(prog = PROGRAM_NAME, description = 'Carve Windows registry files from a disk image (or a similar source).', add_help = False, prefix_chars = '-') + + group_main = parser.add_argument_group('Main arguments') + group_misc = parser.add_argument_group('Miscellaneous arguments') + + group_main.add_argument('file', help = 'a disk image') + group_main.add_argument('outdir', help = 'an output directory') + + group_misc.add_argument('--help', action = 'help', help = 'show this help message and exit') + group_misc.add_argument('--version', action = 'version', help = 'show the version number and exit', version = PROGRAM_VERSION) + + parsed_args = parser.parse_args() + + source_file = parsed_args.file + output_dir = parsed_args.outdir + + return Arguments(source_file = source_file, output_dir = output_dir) + +def make_sane_filename(filename): + for bad_char in [ '\x00', '/', '\\', ':' ]: + filename = filename.replace(bad_char, '') + + if filename == '': + filename = 'unknown' + + return filename + +args = parse_args() + +if not os.path.isdir(args.output_dir): + print('Output directory does not exist: {}'.format(args.output_dir), file = sys.stderr) + sys.exit(255) + +try: + f = open(args.source_file, 'rb') +except (OSError, IOError): + print('Source file cannot be opened: {}'.format(args.source_file), file = sys.stderr) + sys.exit(255) + +carver = RegistryCarve.Carver(f) +print('Offset\tSize\tTruncated\tFile name') +for carve_result in carver.carve(): + print('{}\t{}\t{}\t{}'.format(carve_result.offset, carve_result.size, carve_result.truncated, carve_result.filename)) + + regf_filename = carve_result.filename + if regf_filename.rfind('\\') != -1: + regf_filename = regf_filename.split('\\')[-1] + regf_filename = make_sane_filename(regf_filename) + + if carve_result.truncated: + output_filename = '{}_{}-truncated'.format(carve_result.offset, regf_filename) + else: + output_filename = '{}_{}'.format(carve_result.offset, regf_filename) + + output_file = os.path.join(args.output_dir, output_filename) + + with open(output_file, 'wb') as out_f: + f.seek(carve_result.offset) + buf = f.read(carve_result.size) + out_f.write(buf) + +f.close() diff --git a/yarp-print b/yarp-print new file mode 100755 index 0000000..a5f6240 --- /dev/null +++ b/yarp-print @@ -0,0 +1,304 @@ +#!/usr/bin/env python3 + +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from yarp import * +import argparse +from collections import namedtuple +import os +import sys + +PROGRAM_NAME = 'yarp-print' +PROGRAM_VERSION = '1.0.0-beta1' + +Arguments = namedtuple('Arguments', [ 'primary_file', 'do_recovery', 'do_deleted' ]) + +def parse_args(): + """Parse command line arguments and return a named tuple (Arguments).""" + + parser = argparse.ArgumentParser(prog = PROGRAM_NAME, description = 'Parse a Windows registry file, print all keys and values.', add_help = False, prefix_chars = '-') + + group_main = parser.add_argument_group('Main arguments') + group_opt = parser.add_argument_group('Optional arguments') + group_misc = parser.add_argument_group('Miscellaneous arguments') + + group_main.add_argument('file', help = 'a registry file (primary) to parse') + group_opt.add_argument('--no-recovery', action = 'store_true', help = 'do not discover and use transaction log files to recover the hive (in memory)') + group_opt.add_argument('--deleted', action = 'store_true', help = 'include deleted keys and values to the output') + + group_misc.add_argument('--help', action = 'help', help = 'show this help message and exit') + group_misc.add_argument('--version', action = 'version', help = 'show the version number and exit', version = PROGRAM_VERSION) + + parsed_args = parser.parse_args() + + primary_file = parsed_args.file + do_recovery = not parsed_args.no_recovery + do_deleted = parsed_args.deleted + + return Arguments(primary_file = primary_file, do_recovery = do_recovery, do_deleted = do_deleted) + +def print_hive_information(hive): + print('Last written timestamp (UTC): {}'.format(hive.last_written_timestamp())) + try: + print('Last reorganized timestamp (UTC): {}'.format(hive.last_reorganized_timestamp())) + except (ValueError, OverflowError): + pass + + print() + +def print_value(value): + value_name = value.name() + if value_name == '': + print('Default value') + else: + print('Value name: {}'.format(value_name)) + + print('Value type: {}'.format(value.type_str())) + print('Data size: {}'.format(value.data_size())) + + try: + data = value.data() + except UnicodeDecodeError: + data = value.data_raw() + + if type(data) is bytes: + print('Data (hexdump):') + print(RegistryHelpers.HexDump(data)) + elif type(data) is list: + print('Data (one list element per line):') + for element in data: + print(element) + else: + print('Data (decoded):') + print(data) + + print() + +def print_key(key): + key_path = key.path() + if key_path == '': + print('Root key') + else: + print('Key path: {}'.format(key_path)) + + classname = key.classname() + if classname is not None: + print('Class name: {}'.format(classname)) + + print('Last written timestamp (UTC): {}'.format(key.last_written_timestamp())) + print('Access bits: {}'.format(key.access_bits())) + + print() + + for value in key.values(): + print_value(value) + + print('---') + print() + +def print_key_recursive(key): + print_key(key) + + for subkey in key.subkeys(): + print_key_recursive(subkey) + +def print_deleted_value(value): + value_name = value.name() + if value_name == '': + print('Default value') + else: + print('Value name: {}'.format(value_name)) + + print('Value type: {}'.format(value.type_str())) + print('Data size: {}'.format(value.data_size())) + + try: + data = value.data() + except Registry.RegistryException: + data = None + except UnicodeDecodeError: + data = value.data_raw() + + if data is None: + print('Data not recovered') + else: + if type(data) is bytes: + print('Data (hexdump):') + print(RegistryHelpers.HexDump(data)) + elif type(data) is list: + print('Data (one list element per line):') + for element in data: + print(element) + else: + print('Data (decoded):') + print(data) + + print() + +def print_deleted_key(key): + try: + key_path = key.path() + except Registry.RegistryException: + key_path = None + + if key_path is None: + print('Unknown key path') + print('Partial key path: {}'.format(key.path_partial())) + print('Key name: {}'.format(key.name())) + else: + if key_path == '': + print('Root key') + else: + print('Key path: {}'.format(key_path)) + + try: + classname = key.classname() + except (Registry.RegistryException, UnicodeDecodeError): + classname = None + + if classname is not None: + print('Class name: {}'.format(classname)) + + try: + print('Last written timestamp (UTC): {}'.format(key.last_written_timestamp())) + except (ValueError, OverflowError): + print('Last written timestamp is not plausible') + + print('Access bits: {}'.format(key.access_bits())) + + print() + + try: + for value in key.values(): + print_deleted_value(value) + except Registry.RegistryException: + pass + + print('---') + print() + +# Currently, we can use functions for deleted keys and values to print keys and values in a truncated hive. +print_truncated_key = print_deleted_key +print_truncated_value = print_deleted_value + +args = parse_args() + +if not os.path.isfile(args.primary_file): + print('Primary file does not exist: {}'.format(args.primary_file), file = sys.stderr) + sys.exit(255) + +primary = open(args.primary_file, 'rb') + +try: + hive = Registry.RegistryHive(primary) +except (RegistryFile.BaseBlockException, RegistryFile.NotSupportedException): + raise +except Registry.RegistryException: + truncated = True +else: + truncated = False + +if truncated: + print('Primary file seems to be truncated, only available keys and values will be printed', file = sys.stderr) + hive = Registry.RegistryHiveTruncated(primary) + + print('Hive information:') + print() + print_hive_information(hive) + + print('Keys and values (allocated):') + print() + + all_values = [] + for item in hive.scan(): + if type(item) is Registry.RegistryValue: + all_values.append(item) + elif type(item) is Registry.RegistryKey: + print_truncated_key(item) + + print('All values (allocated):') + print() + for value in all_values: + print_truncated_value(value) + + if args.do_deleted: + print('Unallocated keys and values (may contain reallocated data):') + print() + + scanner = RegistryRecover.Scanner(hive, False) + deleted_values = [] + + for item in scanner.scan(): + if type(item) is Registry.RegistryKey: + print_deleted_key(item) + elif type(item) is Registry.RegistryValue: + deleted_values.append(item) + + print('Unallocated values (all, may contain reallocated data):') + print() + for value in deleted_values: + print_deleted_value(value) + + sys.exit(0) + +if args.do_recovery: + log_files = RegistryHelpers.DiscoverLogFiles(args.primary_file) + + log = None + if log_files.log_path is not None: + log = open(log_files.log_path, 'rb') + + log1 = None + if log_files.log1_path is not None: + log1 = open(log_files.log1_path, 'rb') + + log2 = None + if log_files.log2_path is not None: + log2 = open(log_files.log2_path, 'rb') + + try: + recovery_result = hive.recover_auto(log, log1, log2) + except Registry.AutoRecoveryException: + print('An error has occurred when recovering a hive using a transaction log', file = sys.stderr) + +hive.walk_everywhere() + +print('Hive information:') +print() +print_hive_information(hive) + +print('Keys and values:') +print() +print_key_recursive(hive.root_key()) + +if args.do_deleted: + print('Deleted keys and values (may contain reallocated data):') + print() + + scanner = RegistryRecover.Scanner(hive) + deleted_values = [] + + for item in scanner.scan(): + if type(item) is Registry.RegistryKey: + print_deleted_key(item) + elif type(item) is Registry.RegistryValue: + deleted_values.append(item) + + print('Deleted values (all, may contain reallocated data):') + print() + for value in deleted_values: + print_deleted_value(value) + +hive = None +primary.close() + +if args.do_recovery: + if log is not None: + log.close() + + if log1 is not None: + log1.close() + + if log2 is not None: + log2.close() diff --git a/yarp-timeline b/yarp-timeline new file mode 100755 index 0000000..f74b231 --- /dev/null +++ b/yarp-timeline @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from yarp import * +import argparse +from collections import namedtuple +import os +import sys + +PROGRAM_NAME = 'yarp-timeline' +PROGRAM_VERSION = '1.0.0-beta1' + +Arguments = namedtuple('Arguments', [ 'primary_file', 'faster' ]) +TimelineEntry = namedtuple('TimelineEntry', [ 'path_or_name', 'is_deleted', 'is_path_known', 'timestamp' ]) + +def parse_args(): + """Parse command line arguments and return a named tuple (Arguments).""" + + parser = argparse.ArgumentParser(prog = PROGRAM_NAME, description = 'Parse a Windows registry file, print the timeline for keys (including deleted ones).', add_help = False, prefix_chars = '-') + + group_main = parser.add_argument_group('Main arguments') + group_opt = parser.add_argument_group('Optional arguments') + group_misc = parser.add_argument_group('Miscellaneous arguments') + + group_main.add_argument('file', help = 'a registry file (primary) to parse') + + group_opt.add_argument('--fast', action = 'store_true', help = 'do not use intermediate states to extend the timeline when applying a transaction log (new format)') + + group_misc.add_argument('--help', action = 'help', help = 'show this help message and exit') + group_misc.add_argument('--version', action = 'version', help = 'show the version number and exit', version = PROGRAM_VERSION) + + parsed_args = parser.parse_args() + + primary_file = parsed_args.file + faster = parsed_args.fast + + return Arguments(primary_file = primary_file, faster = faster) + +keys_list = [] +def extend_keys_list(do_deleted = False): + def process_key(key): + global keys_list + + key_parsed = parse_key(key, False) + if key_parsed is not None and key_parsed not in keys_list: + keys_list.append(key_parsed) + + for subkey in key.subkeys(): + try: + process_key(subkey) + except Registry.RegistryException: + pass + + global hive + + process_key(hive.root_key()) + + if do_deleted: + global keys_list + + try: + hive.walk_everywhere() + except Registry.RegistryException: + return + + scanner = RegistryRecover.Scanner(hive) + for item in scanner.scan(): + if type(item) is Registry.RegistryKey: + key_parsed = parse_key(item, True) + if key_parsed is not None and key_parsed not in keys_list: + keys_list.append(key_parsed) + +def parse_key(key, is_deleted): + try: + path_or_name = key.path() + except Registry.RegistryException: + path_or_name = key.name() + is_path_known = False + else: + is_path_known = True + + try: + timestamp = key.last_written_timestamp() + except (ValueError, OverflowError): + return + + return TimelineEntry(path_or_name = path_or_name, is_deleted = is_deleted, is_path_known = is_path_known, timestamp = timestamp) + +def print_timeline_header(): + print('Registry file\tKey path/name\tIs deleted\tIs path known\tTimestamp (UTC)') + +def print_timeline_entry(entry, registry_file): + print('{}\t{}\t{}\t{}\t{}'.format(registry_file, entry.path_or_name, entry.is_deleted, entry.is_path_known, entry.timestamp)) + +def print_timeline(registry_file): + global keys_list + + print_timeline_header() + for entry in keys_list: + print_timeline_entry(entry, registry_file) + +args = parse_args() + +if not os.path.isfile(args.primary_file): + print('Primary file does not exist: {}'.format(args.primary_file), file = sys.stderr) + sys.exit(255) + +primary = open(args.primary_file, 'rb') +hive = Registry.RegistryHive(primary) + +extend_keys_list(True) # Extend the list of keys (including deleted ones) for the first time, before applying a transaction log. + +log_files = RegistryHelpers.DiscoverLogFiles(args.primary_file) + +log = None +if log_files.log_path is not None: + log = open(log_files.log_path, 'rb') + +log1 = None +if log_files.log1_path is not None: + log1 = open(log_files.log1_path, 'rb') + +log2 = None +if log_files.log2_path is not None: + log2 = open(log_files.log2_path, 'rb') + +if not args.faster: + hive.log_entry_callback = extend_keys_list # Extend the list of keys (without deleted ones) each time a log entry has been applied. + +try: + recovery_result = hive.recover_auto(log, log1, log2) +except Registry.AutoRecoveryException: + print('An error has occurred when recovering a hive using a transaction log', file = sys.stderr) +else: + if recovery_result.recovered and not recovery_result.is_new_log: + extend_keys_list() # Finally, extend the list of keys (without deleted ones) after an old transaction log file has been applied. + elif recovery_result.recovered and recovery_result.is_new_log and args.faster: + extend_keys_list() + +keys_list.sort(key = lambda x: x.timestamp, reverse = True) +print_timeline(args.primary_file) + +hive = None +primary.close() + +if log is not None: + log.close() + +if log1 is not None: + log1.close() + +if log2 is not None: + log2.close() diff --git a/yarp/Registry.py b/yarp/Registry.py new file mode 100644 index 0000000..858f156 --- /dev/null +++ b/yarp/Registry.py @@ -0,0 +1,858 @@ +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from __future__ import unicode_literals + +from .RegistryFile import RegistryException +from . import RegistryFile +from . import RegistryRecords +from struct import unpack +from datetime import datetime, timedelta +from collections import namedtuple + +ValueTypes = { +RegistryRecords.REG_NONE: 'REG_NONE', +RegistryRecords.REG_SZ: 'REG_SZ', +RegistryRecords.REG_EXPAND_SZ: 'REG_EXPAND_SZ', +RegistryRecords.REG_BINARY: 'REG_BINARY', +RegistryRecords.REG_DWORD: 'REG_DWORD', +RegistryRecords.REG_DWORD_BIG_ENDIAN: 'REG_DWORD_BIG_ENDIAN', +RegistryRecords.REG_LINK: 'REG_LINK', +RegistryRecords.REG_MULTI_SZ: 'REG_MULTI_SZ', +RegistryRecords.REG_RESOURCE_LIST: 'REG_RESOURCE_LIST', +RegistryRecords.REG_FULL_RESOURCE_DESCRIPTOR: 'REG_FULL_RESOURCE_DESCRIPTOR', +RegistryRecords.REG_RESOURCE_REQUIREMENTS_LIST: 'REG_RESOURCE_REQUIREMENTS_LIST', +RegistryRecords.REG_QWORD: 'REG_QWORD' +} + +AutoRecoveryResult = namedtuple('AutoRecoveryResult', [ 'recovered', 'is_new_log', 'file_objects' ]) + +class WalkException(RegistryException): + """This exception is raised when a walk error has occurred. + A walk error is a generic error when traversing registry records (entities). + """ + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class AutoRecoveryException(RegistryException): + """This exception is raised when a primary file cannot be recovered in the 'auto' mode. + In particular, when no recovery scheme has been found. + """ + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +def DecodeFiletime(Timestamp): + """Decode the FILETIME timestamp and return the datetime object.""" + + return datetime(1601, 1, 1) + timedelta(microseconds = Timestamp / 10) + +def DecodeUnicode(Buffer, RemoveGarbage = False): + """Decode the Unicode (UTF-16LE) string and return it. + When 'RemoveGarbage' is True, this function will attempt to sanitize a null-terminated Unicode string. + """ + + if RemoveGarbage and len(Buffer) > 2: + # Windows is using null-terminated Unicode strings, so we want to remove garbage, if any, after the end of the string. + pos = 0 + while pos < len(Buffer): + two_bytes = Buffer[pos : pos + 2] + if two_bytes == b'\x00\x00': + return Buffer[ : pos + 2].decode('utf-16le') # Include the null character to the output string. + + pos += 2 + + return Buffer.decode('utf-16le') + +def DecodeASCII(Buffer): + """Decode the ASCII (extended) string and return it.""" + + return Buffer.decode('latin-1') # This is equal to adding a null byte after each character, and then running .decode('utf-16le'). + +def DecodeUnicodeMulti(Buffer, RemoveGarbage = False): + """Decode the Unicode (UTF-16LE) array of null-terminated strings and return it as is. + When 'RemoveGarbage' is True, this function will attempt to sanitize a null-terminated Unicode array. + """ + + if RemoveGarbage and len(Buffer) > 4: + # We want to remove garbage, if any, after the end of the array (marker: 0x00 0x00 0x00 0x00). + pos = 0 + while pos < len(Buffer): + four_bytes = Buffer[pos : pos + 4] + if four_bytes == b'\x00\x00\x00\x00': + return DecodeUnicode(Buffer[ : pos + 4]) # Include the null characters to the output string. + + pos += 2 + + return DecodeUnicode(Buffer) + +class RegistryHive(object): + """This is a high-level class for a registry hive.""" + + registry_file = None + """A primary file of a hive (a RegistryFile.PrimaryFile object).""" + + log_entry_callback = None + """A callback function executed when a log entry has been applied.""" + + effective_slack = None + """A set of data strings from different slack space locations to be used in the deleted data recovery.""" + + def __init__(self, file_object, tolerate_minor_errors = True): + self.registry_file = RegistryFile.PrimaryFile(file_object, tolerate_minor_errors) + self.tolerate_minor_errors = tolerate_minor_errors + self.effective_slack = set() + + def root_key(self): + """Get and return a root key node (a RegistryKey object).""" + + return RegistryKey(self.registry_file, self.registry_file.get_root_cell(), 0, self.registry_file.baseblock.effective_root_cell_offset, self.tolerate_minor_errors) + + def last_written_timestamp(self): + """Get, decode and return a last written timestamp (a datetime object).""" + + return DecodeFiletime(self.registry_file.baseblock.effective_last_written_timestamp) + + def last_reorganized_timestamp(self): + """Get, decode and return a last reorganized timestamp (a datetime object).""" + + timestamp = self.registry_file.baseblock.effective_last_reorganized_timestamp + if timestamp is not None: + return DecodeFiletime(timestamp) + + def find_key(self, path): + """Find a key node by its path (without a name of a root key), return a key node (a RegistryKey object) or None, if not found.""" + + if path == '\\' or len(path) == 0: + return self.root_key() + + if path[0] == '\\': + path = path[1 : ] + + current_key = self.root_key() + path_components = path.split('\\') + + i = 0 + while i < len(path_components) and current_key is not None: + current_key = current_key.subkey(path_components[i]) + i += 1 + + return current_key + + def recover_new(self, file_object_log_or_log1, file_object_log2 = None): + """Recover a primary file using a single transaction log file or two transaction log files. + When 'file_object_log2' is None, a single transaction log file is used. + Transaction log files should be in the new format. + """ + + if file_object_log2 is None: + self.registry_file.apply_new_log_file(file_object_log_or_log1, self.log_entry_callback) + else: + self.registry_file.apply_new_log_files(file_object_log_or_log1, file_object_log2, self.log_entry_callback) + + def recover_old(self, file_object_log): + """Recover a primary file using a single transaction log file. + A transaction log file should be in the old format. + """ + + self.registry_file.apply_old_log_file(file_object_log) + + def recover_auto(self, file_object_log, file_object_log1, file_object_log2): + """Recover a primary file using one, two or three candidate transaction log files (the 'auto' mode). + The format of transaction log files (new or old) and the logging scheme (single-logging or dual-logging) are guessed. + If a transaction log file with a corresponding extension (.LOG/.LOG1/.LOG2) is not present, use None as an argument for that file. + If a primary file is not dirty, no exception is raised. A named tuple (AutoRecoveryResult) is returned. + """ + + def try_log(file_object_log, log_class): + if file_object_log is None: + return + + try: + log = log_class(file_object_log) + except (RegistryFile.ReadException, RegistryFile.BaseBlockException, RegistryFile.FileSizeException, RegistryFile.NotSupportedException, RegistryFile.DirtyVectorException): + return + else: + return log + + if not self.registry_file.baseblock.is_file_dirty: + return AutoRecoveryResult(recovered = False, is_new_log = None, file_objects = None) + + log, log1, log2 = file_object_log, file_object_log1, file_object_log2 + use_log = log is not None + + if (log1 is not None and log2 is None) or (log1 is None and log2 is not None): + raise AutoRecoveryException('No valid recovery scheme possible') + + if use_log and log1 is None and log2 is None: + # This is the single-logging scheme. + log_new = try_log(log, RegistryFile.NewLogFile) + if log_new is not None: + self.recover_new(log) + return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log]) + + log_old = try_log(log, RegistryFile.OldLogFile) + if log_old is not None: + self.recover_old(log) + return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log]) + + if use_log: + log_new = try_log(log, RegistryFile.NewLogFile) + log1_new = try_log(log1, RegistryFile.NewLogFile) + log2_new = try_log(log2, RegistryFile.NewLogFile) + + if use_log: + log_old = try_log(log, RegistryFile.OldLogFile) + log1_old = try_log(log1, RegistryFile.OldLogFile) + log2_old = try_log(log2, RegistryFile.OldLogFile) + + # We prefer the new format and the dual-logging scheme. + if log1_new is not None and log2_new is not None: + self.recover_new(log1, log2) + return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log1, log2]) + + if log1_new is not None: + self.recover_new(log1) + return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log1]) + + if log2_new is not None: + self.recover_new(log2) + return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log2]) + + # Now, try the single-logging scheme for the new format. + if use_log and log_new is not None: + self.recover_new(log) + return AutoRecoveryResult(recovered = True, is_new_log = True, file_objects = [log]) + + # Now, switch to the old format (we still prefer the dual-logging scheme). + if log1_old is not None and log2_old is not None: + log1_timestamp = log1_old.baseblock.effective_last_written_timestamp + log2_timestamp = log2_old.baseblock.effective_last_written_timestamp + if log1_timestamp >= log2_timestamp: # Select the latest log. + self.recover_old(log1) + return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log1]) + else: + self.recover_old(log2) + return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log2]) + + if log1_old is not None: + self.recover_old(log1) + return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log1]) + + if log2_old is not None: + self.recover_old(log2) + return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log2]) + + # Now, try the single-logging scheme. + if use_log and log_old is not None: + self.recover_old(log) + return AutoRecoveryResult(recovered = True, is_new_log = False, file_objects = [log]) + + # We failed. + raise AutoRecoveryException('No obvious recovery scheme found') + + def save_recovered_hive(self, filepath): + """Save the recovered hive to a new primary file (using its path).""" + + self.registry_file.save_recovered_hive(filepath) + + def rollback_changes(self): + """Discard recovered data and use a primary file as is.""" + + self.registry_file.discard_writable_file_object() + + def walk_everywhere(self): + """Visit and record each referenced cell, collect the slack space data. This will also ensure that a hive is consistent.""" + + def process_key(key): + security = key.security() + if security is not None: + security_descriptor = security.descriptor() + + classname = key.classname() + + for value in key.values(): + value_data_raw = value.data_raw() + + for subkey in key.subkeys(): + process_key(subkey) + + for slack in key.effective_slack: + if len(slack) >= 4: # Skip the slack space data if it is less than 4 bytes. + self.effective_slack.add(slack) + + self.registry_file.record_referenced_cells = True + try: + process_key(self.root_key()) + except RegistryException: + self.registry_file.record_referenced_cells = False + raise + + self.registry_file.record_referenced_cells = False + + self.registry_file.build_map_free() + +class RegistryKey(object): + """This is a high-level class for a registry key.""" + + registry_file = None + """A primary file of a hive (a RegistryFile.PrimaryFile object).""" + + key_node = None + """A KeyNode object.""" + + effective_slack = None + """A set of data strings from different slack space locations to be used in the deleted data recovery.""" + + def __init__(self, primary_file, buf, layer, relative_cell_offset, tolerate_minor_errors = False, naive = False): + """When working with deleted registry keys, set 'naive' to True, 'relative_cell_offset' and 'layer' to None. + For a root key, set 'layer' to 0 (increment 'layer' by one when going to subkeys of a current key and decrement it by one when going to a parent key). + """ + + self.registry_file = primary_file + self.naive = naive + if not self.naive: + self.get_cell = self.registry_file.get_cell + else: + self.get_cell = self.registry_file.get_cell_naive + + self.key_node = RegistryRecords.KeyNode(buf) + self.relative_cell_offset = relative_cell_offset + self.layer = layer + self.tolerate_minor_errors = tolerate_minor_errors + self.effective_slack = set() + + def last_written_timestamp(self): + """Get, decode and return a last written timestamp (a datetime object).""" + + return DecodeFiletime(self.key_node.get_last_written_timestamp()) + + def access_bits(self): + """Get and return access bits.""" + + if self.registry_file.baseblock.effective_version == 1: + return + + return self.key_node.get_access_bits() + + def name(self): + """Get, decode and return a key name string.""" + + name_buf = self.key_node.get_key_name() + is_ascii = self.registry_file.baseblock.effective_version > 1 and self.key_node.get_flags() & RegistryRecords.KEY_COMP_NAME > 0 + if is_ascii: + name = DecodeASCII(name_buf) + else: + name = DecodeUnicode(name_buf) + + if name.find('\\') != -1: + if not self.naive: + raise WalkException('Key node does not have a valid name, key path: {}'.format(self.path())) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Key node does not have a valid name') + + return name + + def classname(self): + """Get, decode and return a class name string.""" + + classname_length = self.key_node.get_classname_length() + if classname_length > 0: + classname_buf = self.get_cell(self.key_node.get_classname_offset()) + return DecodeUnicode(classname_buf[ : classname_length]) + + def parent(self): + """Get and return a parent key node (a RegistryKey object).""" + + if self.layer == 0: + # This is the root key. + return + + if self.layer is None and (self.key_node.get_flags() & RegistryRecords.KEY_HIVE_ENTRY > 0 or self.relative_cell_offset == self.registry_file.baseblock.effective_root_cell_offset): + # This is the root key. + return + + parent_offset = self.key_node.get_parent() + parent_buf = self.get_cell(parent_offset) + + layer_up = None + if self.layer is not None: + layer_up = self.layer - 1 + + parent_key_node = RegistryKey(self.registry_file, parent_buf, layer_up, parent_offset, self.tolerate_minor_errors, self.naive) + + return parent_key_node + + def path(self, show_root = False): + """Construct and return a path to a key node. + When 'show_root' is True, a name of a root key node is included. + """ + + path_components = [ self.name() ] + + if self.naive: + track = set() + track.add(self.key_node.get_parent()) + + p = self.parent() + while p is not None: + if self.naive: + p_parent = p.key_node.get_parent() + if p_parent in track: + raise WalkException('Invalid path when following parent keys') + + track.add(p_parent) + + path_components.append(p.name()) + p = p.parent() + + path_components.reverse() + if not show_root: + path_components = path_components[ 1 : ] + + return '\\'.join(path_components) + + def path_partial(self, show_root = False): + """Construct and return a path (possibly a partial one) to a key node. + When 'show_root' is True, a name of a root key node is included. + """ + + path_components = [ self.name() ] + + if self.naive: + track = set() + track.add(self.key_node.get_parent()) + + try: + p = self.parent() + while p is not None: + if self.naive: + p_parent = p.key_node.get_parent() + if p_parent in track: + raise WalkException('Invalid path when following parent keys') + + track.add(p_parent) + + path_components.append(p.name()) + p = p.parent() + except RegistryException: + root_found = False + else: + root_found = True + + path_components.reverse() + if root_found and not show_root: + path_components = path_components[ 1 : ] + + return '\\'.join(path_components) + + def subkeys(self): + """This method yields subkeys (RegistryKey objects).""" + + subkeys_names = set() + + def process_leaf(leaf_buf): + leaf_signature = leaf_buf[ : 2] + + if leaf_signature == b'li': + leaf = RegistryRecords.IndexLeaf(leaf_buf) + elif leaf_signature == b'lf': + leaf = RegistryRecords.FastLeaf(leaf_buf) + else: # b'lh' + leaf = RegistryRecords.HashLeaf(leaf_buf) + + slack = leaf.get_slack() + self.effective_slack.add(slack) + + layer_down = None + if self.layer is not None: + layer_down = self.layer + 1 + + if type(leaf) is RegistryRecords.IndexLeaf: + for leaf_element in leaf.elements(): + subkey_offset = leaf_element.relative_offset + + buf = self.get_cell(subkey_offset) + subkey = RegistryKey(self.registry_file, buf, layer_down, subkey_offset, self.tolerate_minor_errors, self.naive) + if self.relative_cell_offset is not None and subkey.key_node.get_parent() != self.relative_cell_offset: + if not self.naive: + raise WalkException('Key node does not point to a valid parent key node, key path: {}, name: {}'.format(self.path(), subkey.name())) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Key node does not point to a valid parent key node') + + yield subkey + + if type(leaf) is RegistryRecords.FastLeaf: + for leaf_element in leaf.elements(): + subkey_offset = leaf_element.relative_offset + + buf = self.get_cell(subkey_offset) + subkey = RegistryKey(self.registry_file, buf, layer_down, subkey_offset, self.tolerate_minor_errors, self.naive) + if self.relative_cell_offset is not None and subkey.key_node.get_parent() != self.relative_cell_offset: + if not self.naive: + raise WalkException('Key node does not point to a valid parent key node, key path: {}, name: {}'.format(self.path(), subkey.name())) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Key node does not point to a valid parent key node') + + yield subkey + + if type(leaf) is RegistryRecords.HashLeaf: + for leaf_element in leaf.elements(): + subkey_offset = leaf_element.relative_offset + + buf = self.get_cell(subkey_offset) + subkey = RegistryKey(self.registry_file, buf, layer_down, subkey_offset, self.tolerate_minor_errors, self.naive) + if self.relative_cell_offset is not None and subkey.key_node.get_parent() != self.relative_cell_offset: + if not self.naive: + raise WalkException('Key node does not point to a valid parent key node, key path: {}, name: {}'.format(self.path(), subkey.name())) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Key node does not point to a valid parent key node') + + yield subkey + + + if self.key_node.get_subkeys_count() > 0: + list_offset = self.key_node.get_subkeys_list_offset() + list_buf = self.get_cell(list_offset) + list_signature = list_buf[ : 2] + + prev_name = None + + if list_signature == b'ri': + index_root = RegistryRecords.IndexRoot(list_buf) + + slack = index_root.get_slack() + self.effective_slack.add(slack) + + for leaf_offset in index_root.elements(): + list_buf = self.get_cell(leaf_offset) + for subkey in process_leaf(list_buf): + curr_name = subkey.name().upper() + if curr_name not in subkeys_names: + subkeys_names.add(curr_name) + else: + if not self.naive: + raise WalkException('Duplicate subkey, key path: {}, name: {}'.format(self.path(), curr_name)) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Duplicate subkey') + + if prev_name is not None and curr_name <= prev_name: + if not self.naive: + raise WalkException('Wrong order of subkeys, key path: {}, offending name: {}'.format(self.path(), curr_name)) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Wrong order of subkeys') + + prev_name = curr_name + + yield subkey + else: + for subkey in process_leaf(list_buf): + curr_name = subkey.name().upper() + if curr_name not in subkeys_names: + subkeys_names.add(curr_name) + else: + if not self.naive: + raise WalkException('Duplicate subkey, key path: {}, name: {}'.format(self.path(), curr_name)) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Duplicate subkey') + + if prev_name is not None and curr_name <= prev_name: + if not self.naive: + raise WalkException('Wrong order of subkeys, key path: {}, offending name: {}'.format(self.path(), curr_name)) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Wrong order of subkeys') + + prev_name = curr_name + + yield subkey + + def subkey(self, name): + """This method returns a subkey by its name (a RegistryKey object) or None, if not found.""" + + name = name.lower() + for current_subkey in self.subkeys(): + curr_name = current_subkey.name().lower() + if name == curr_name: + return current_subkey + + def subkeys_count(self): + """Get and return a number of subkeys. Volatile subkeys are not counted.""" + + return self.key_node.get_subkeys_count() + + def values(self): + """This method yields key values (RegistryValue objects).""" + + values_names = set() + + values_count = self.key_node.get_key_values_count() + if values_count > 0 and self.key_node.get_flags() & RegistryRecords.KEY_PREDEF_HANDLE == 0: + list_offset = self.key_node.get_key_values_list_offset() + list_buf = self.get_cell(list_offset) + + values_list = RegistryRecords.KeyValuesList(list_buf, values_count) + + slack = values_list.get_slack() + self.effective_slack.add(slack) + + for value_offset in values_list.elements(): + buf = self.get_cell(value_offset) + curr_value = RegistryValue(self.registry_file, buf, self.naive) + curr_name = curr_value.name().lower() + if curr_name not in values_names: + values_names.add(curr_name) + else: + if not self.naive: + raise WalkException('Duplicate value name, key path: {}, value name: {}'.format(self.path(), curr_name)) + else: + # Do not build the path, if we are trying to recover a key node. + raise WalkException('Duplicate value name') + + yield curr_value + + def value(self, name = ''): + """This method returns a key value by its name (a RegistryValue object) or None, if not found. + When 'name' is empty, a default value is returned (if any). + """ + + name = name.lower() + + for curr_value in self.values(): + curr_name = curr_value.name().lower() + if name == curr_name: + return curr_value + + def values_count(self): + """Get and return a number of key values.""" + + if self.key_node.get_flags() & RegistryRecords.KEY_PREDEF_HANDLE > 0: + return 0 + + return self.key_node.get_key_values_count() + + def security(self): + """Get and return a key security item (a RegistrySecurity object).""" + + key_security_offset = self.key_node.get_key_security_offset() + if key_security_offset != RegistryFile.CELL_OFFSET_NIL: + buf = self.get_cell(key_security_offset) + return RegistrySecurity(self.registry_file, buf) + + def __str__(self): + return 'RegistryKey, name: {}, subkeys: {}, values: {}'.format(self.name(), self.subkeys_count(), self.values_count()) + +class RegistrySecurity(object): + """This is a high-level class for a key security item.""" + + registry_file = None + """A primary file of a hive (a RegistryFile.PrimaryFile object).""" + + key_security = None + """A KeySecurity object.""" + + def __init__(self, primary_file, buf): + self.registry_file = primary_file + + self.key_security = RegistryRecords.KeySecurity(buf) + + def descriptor(self): + """Get and return a security descriptor (as raw bytes).""" + + return self.key_security.get_security_descriptor() + +class RegistryValue(object): + """This is a high-level class for a registry value.""" + + registry_file = None + """A primary file of a hive (a RegistryFile.PrimaryFile object).""" + + key_value = None + """A KeyValue object.""" + + def __init__(self, primary_file, buf, naive = False): + """When working with deleted registry values, set 'naive' to True.""" + + self.registry_file = primary_file + if not naive: + self.get_cell = self.registry_file.get_cell + else: + self.get_cell = self.registry_file.get_cell_naive + + self.key_value = RegistryRecords.KeyValue(buf) + + def name(self): + """Get, decode and return a value name string.""" + + name_buf = self.key_value.get_value_name() + is_ascii = self.registry_file.baseblock.effective_version > 1 and self.key_value.get_flags() & RegistryRecords.VALUE_COMP_NAME > 0 + if is_ascii: + return DecodeASCII(name_buf) + + return DecodeUnicode(name_buf) + + def type_raw(self): + """Get and return a value type (as an integer).""" + + return self.key_value.get_data_type() + + def type_str(self): + """Get, decode and return a value type (as a string).""" + + value_type = self.key_value.get_data_type() + if value_type in ValueTypes.keys(): + return ValueTypes[value_type] + else: + return hex(value_type) + + def data_size(self): + """Get and return a data size.""" + + return self.key_value.get_data_size_real() + + def data_raw(self): + """Get and return data (as raw bytes).""" + + if self.key_value.get_data_size_real() == 0: + return b'' + + if self.key_value.is_data_inline(): + return self.key_value.get_inline_data()[ : self.key_value.get_data_size_real()] + + is_big_data = self.registry_file.baseblock.effective_version > 3 and self.key_value.get_data_size_real() > 16344 + if not is_big_data: + return self.get_cell(self.key_value.get_data_offset())[ : self.key_value.get_data_size_real()] + + big_data_buf = self.get_cell(self.key_value.get_data_offset()) + big_data = RegistryRecords.BigData(big_data_buf) + + segments_list_offset = big_data.get_segments_list_offset() + segments_count = big_data.get_segments_count() + + segments_list = RegistryRecords.SegmentsList(self.get_cell(segments_list_offset), segments_count) + + data = b'' + data_length = self.key_value.get_data_size_real() + for segment_offset in segments_list.elements(): + buf = self.get_cell(segment_offset) + + if data_length > 16344: + data_part = buf[ : 16344] + if len(data_part) != 16344: + raise WalkException('Invalid segment size: {} != 16344'.format(len(data_part))) + + data += data_part + data_length -= 16344 + else: + data += buf[ : data_length] + break + + return data + + def data(self): + """Get, decode and return data (as an integer, a string, a list of strings, or raw bytes). + A string may contain a terminating null character. + """ + + data_raw = self.data_raw() + data_length = len(data_raw) + type_int = self.type_raw() + + if type_int == RegistryRecords.REG_DWORD and data_length == 4: + return unpack('L', data_raw)[0] + + if type_int == RegistryRecords.REG_QWORD and data_length == 8: + return unpack(' 1: + return DecodeUnicode(data_raw, True) + + if type_int == RegistryRecords.REG_LINK and data_length % 2 == 0 and data_length > 1: + return DecodeUnicode(data_raw) + + if type_int == RegistryRecords.REG_MULTI_SZ and data_length % 2 == 0 and data_length > 1: + sz_list_data = DecodeUnicodeMulti(data_raw, True) + if sz_list_data == '\x00': + return [] + + if len(sz_list_data) > 2 and sz_list_data[-1] == '\x00' and sz_list_data[-2] == '\x00': + sz_list = sz_list_data[ : -1].split('\x00') + + i = 0 + while i < len(sz_list): + sz_list[i] += '\x00' # Restore the terminating null characters. + i += 1 + + return sz_list + + return data_raw + + def __str__(self): + name = self.name() + if len(name) > 0: + return 'RegistryValue, name: {}, data type: {}, data size: {}'.format(name, self.type_str(), self.data_size()) + else: + return 'RegistryValue, default value (no name), data type: {}, data size: {}'.format(self.type_str(), self.data_size()) + +class RegistryHiveTruncated(object): + """This is a high-level class for a truncated registry hive.""" + + registry_file = None + """A primary file of a hive (a RegistryFile.PrimaryFileTruncated object).""" + + def __init__(self, file_object): + self.registry_file = RegistryFile.PrimaryFileTruncated(file_object) + self.effective_slack = set() + + def last_written_timestamp(self): + """Get, decode and return a last written timestamp (a datetime object).""" + + return DecodeFiletime(self.registry_file.baseblock.effective_last_written_timestamp) + + def last_reorganized_timestamp(self): + """Get, decode and return a last reorganized timestamp (a datetime object).""" + + timestamp = self.registry_file.baseblock.effective_last_reorganized_timestamp + if timestamp is not None: + return DecodeFiletime(timestamp) + + def scan(self): + """This method yields RegistryKey objects for keys and RegistryValue objects for values.""" + + for cell in self.registry_file.cells(): + cell_absolute_size = cell.get_absolute_size() + if cell_absolute_size > 76: # A key node with at least one character in the name. + cell_data = cell.get_cell_data() + try: + key = RegistryKey(self.registry_file, cell_data, None, None, True, False) + key_name = key.name() + except (RegistryException, UnicodeDecodeError): + pass + else: + yield key + elif cell_absolute_size >= 20: # A key value with no name (at least). + cell_data = cell.get_cell_data() + try: + value = RegistryValue(self.registry_file, cell_data, False) + value_name = value.name() + except (RegistryException, UnicodeDecodeError): + pass + else: + yield value diff --git a/yarp/RegistryCarve.py b/yarp/RegistryCarve.py new file mode 100644 index 0000000..59a2c52 --- /dev/null +++ b/yarp/RegistryCarve.py @@ -0,0 +1,188 @@ +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from __future__ import unicode_literals + +from . import RegistryFile +from .Registry import DecodeUnicode +from struct import unpack +from collections import namedtuple + +CarveResult = namedtuple('CarveResult', [ 'offset', 'size', 'truncated', 'truncation_point', 'truncation_scenario', 'filename' ]) +BaseBlockCheckResult = namedtuple('BaseBlockCheckResult', [ 'is_valid', 'hbins_data_size', 'filename', 'old_cells' ]) +HiveBinCheckResult = namedtuple('HiveBinCheckResult', [ 'is_valid', 'size' ]) +CellsCheckResult = namedtuple('CellsCheckResult', [ 'are_valid', 'truncation_point_relative' ]) + +SECTOR_SIZE = 512 # This is an assumed sector size. +FILE_MARGIN_SIZE = 4*1024*1024 # We will read more bytes than specified in the base block to account possible damage scenarios. +FILE_SIZE_MAX_MIB = 500 # We do not expect primary files to be larger than this (in MiB). + +def CheckBaseBlockOfPrimaryFile(Buffer): + """Check if Buffer contains a valid base block of a primary file and a hive bin, return a named tuple (BaseBlockCheckResult).""" + + if len(Buffer) < RegistryFile.BASE_BLOCK_LENGTH_PRIMARY + RegistryFile.HIVE_BIN_SIZE_ALIGNMENT: + return BaseBlockCheckResult(is_valid = False, hbins_data_size = None, filename = None, old_cells = None) + + signature, __, __, __, major_version, minor_version, file_type, file_format, __, hbins_data_size, clustering_factor = unpack('<4sLLQLLLLLLL', Buffer[ : 48]) + + if (signature == b'regf' and major_version in RegistryFile.MAJOR_VERSION_NUMBERS_SUPPORTED and minor_version in RegistryFile.MINOR_VERSION_NUMBERS_SUPPORTED and + file_type == RegistryFile.FILE_TYPE_PRIMARY and file_format == RegistryFile.FILE_FORMAT_DIRECT_MEMORY_LOAD and clustering_factor == RegistryFile.FILE_CLUSTERING_FACTOR and + hbins_data_size >= RegistryFile.HIVE_BIN_SIZE_ALIGNMENT and hbins_data_size % RegistryFile.HIVE_BIN_SIZE_ALIGNMENT == 0 and + RegistryFile.BASE_BLOCK_LENGTH_PRIMARY + hbins_data_size <= FILE_SIZE_MAX_MIB * 1024 * 1024): + + log_signature = Buffer[RegistryFile.BASE_BLOCK_LENGTH_LOG : RegistryFile.BASE_BLOCK_LENGTH_LOG + 4] + hbin_signature = Buffer[RegistryFile.BASE_BLOCK_LENGTH_PRIMARY : RegistryFile.BASE_BLOCK_LENGTH_PRIMARY + 4] + if log_signature != b'DIRT' and log_signature != b'HvLE' and hbin_signature == b'hbin': + try: + filename = DecodeUnicode(Buffer[48 : 48 + 64], True).rstrip('\x00') + except UnicodeDecodeError: + pass + else: + old_cells = minor_version in RegistryFile.MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT + return BaseBlockCheckResult(is_valid = True, hbins_data_size = hbins_data_size, filename = filename, old_cells = old_cells) + + return BaseBlockCheckResult(is_valid = False, hbins_data_size = None, filename = None, old_cells = None) + +def CheckHiveBin(Buffer, ExpectedOffsetRelative): + """Check if Buffer contains a valid hive bin (without checking its cells), return a named tuple (HiveBinCheckResult).""" + + if len(Buffer) < RegistryFile.HIVE_BIN_SIZE_ALIGNMENT: + return HiveBinCheckResult(is_valid = False, size = None) + + signature, offset, size = unpack('<4sLL', Buffer[ : 12]) + if signature == b'hbin' and offset == ExpectedOffsetRelative and size >= RegistryFile.HIVE_BIN_SIZE_ALIGNMENT and size % RegistryFile.HIVE_BIN_SIZE_ALIGNMENT == 0: + return HiveBinCheckResult(is_valid = True, size = size) + + return HiveBinCheckResult(is_valid = False, size = None) + +def CheckCellsOfHiveBin(Buffer, OldCells = False): + """Check if Buffer contains a hive bin with valid cells (new format), return a named tuple (CellsCheckResult). A hive bin's header is not checked.""" + + curr_pos_relative = 32 + while curr_pos_relative < len(Buffer): + four_bytes = Buffer[curr_pos_relative : curr_pos_relative + 4] + if len(four_bytes) < 4: + return CellsCheckResult(are_valid = False, truncation_point_relative = curr_pos_relative) + + cell_size, = unpack(' regf_size: + regf_size = curr_pos_relative # Adjust the file size to include an unforeseeably large hive bin. + break + + hbin_buf_partial = regf_buf[curr_pos_relative : curr_pos_relative + RegistryFile.HIVE_BIN_SIZE_ALIGNMENT] + check_result_hbin = CheckHiveBin(hbin_buf_partial, expected_hbin_offset_relative) + if not check_result_hbin.is_valid: + truncation_point = regf_offset + curr_pos_relative + regf_size = curr_pos_relative # Adjust the file size according to the truncation point. + break + + last_hbin_buf = regf_buf[curr_pos_relative : curr_pos_relative + check_result_hbin.size] + + curr_pos_relative += check_result_hbin.size + expected_hbin_offset_relative += check_result_hbin.size + + if last_hbin_buf is None: + # No valid hive bins found. + pos += SECTOR_SIZE + continue + + if truncation_point is None: + # Probably no truncation. + check_result_cells = CheckCellsOfHiveBin(last_hbin_buf, check_result.old_cells) + if check_result_cells.are_valid: + # No truncation. + yield CarveResult(offset = regf_offset, size = regf_size, truncated = False, truncation_point = None, truncation_scenario = 0, + filename = check_result.filename) + else: + # Truncation within the last hive bin. + truncation_point = regf_offset + regf_size - len(last_hbin_buf) + check_result_cells.truncation_point_relative + truncation_point = truncation_point // SECTOR_SIZE * SECTOR_SIZE # Adjust the truncation point according to the sector size. + regf_size = truncation_point - regf_offset # Adjust the file size according to the truncation point. + + yield CarveResult(offset = regf_offset, size = regf_size, truncated = True, truncation_point = truncation_point, truncation_scenario = 2, + filename = check_result.filename) + else: + # Obvious truncation. + check_result_cells = CheckCellsOfHiveBin(last_hbin_buf, check_result.old_cells) + if check_result_cells.are_valid: + # Truncation at a boundary of a hive bin. + yield CarveResult(offset = regf_offset, size = regf_size, truncated = True, truncation_point = truncation_point, truncation_scenario = 1, + filename = check_result.filename) + else: + # Truncation within a hive bin. + truncation_point = regf_offset + regf_size - len(last_hbin_buf) + check_result_cells.truncation_point_relative + truncation_point = truncation_point // SECTOR_SIZE * SECTOR_SIZE # Adjust the truncation point according to the sector size. + regf_size = truncation_point - regf_offset # Adjust the file size according to the truncation point. + + yield CarveResult(offset = regf_offset, size = regf_size, truncated = True, truncation_point = truncation_point, truncation_scenario = 3, + filename = check_result.filename) + + if regf_size % SECTOR_SIZE == 0: + pos += regf_size + else: + pos += regf_size + SECTOR_SIZE - regf_size % SECTOR_SIZE + + continue + + pos += SECTOR_SIZE diff --git a/yarp/RegistryFile.py b/yarp/RegistryFile.py new file mode 100644 index 0000000..27a4e3d --- /dev/null +++ b/yarp/RegistryFile.py @@ -0,0 +1,1290 @@ +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from __future__ import unicode_literals + +from struct import unpack, pack +from ctypes import c_uint32 +from io import BytesIO +from shutil import copyfileobj +from collections import namedtuple + +MAJOR_VERSION_NUMBERS_SUPPORTED = set([1]) +MINOR_VERSION_NUMBERS_SUPPORTED = set([1, 2, 3, 4, 5, 6]) + +MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT = set([1]) +MINOR_VERSION_NUMBERS_FOR_NEW_CELL_FORMAT = set(MINOR_VERSION_NUMBERS_SUPPORTED - MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT) + +FILE_TYPE_PRIMARY = 0 # Primary (normal) file. +FILE_TYPE_LOG_OLD = 1 # Transaction log file (old format). +FILE_TYPE_LOG_VERYOLD = 2 # Transaction log file (the same old format, but with a different type number). +FILE_TYPE_LOG_NEW = 6 # Transaction log file (new format). +FILE_TYPES_SUPPORTED = set([FILE_TYPE_PRIMARY, FILE_TYPE_LOG_OLD, FILE_TYPE_LOG_VERYOLD, FILE_TYPE_LOG_NEW]) + +FILE_FORMAT_DIRECT_MEMORY_LOAD = 1 + +BASE_BLOCK_LENGTH_PRIMARY = 4096 +FILE_CLUSTERING_FACTOR = 1 # This is the only value expected (even when the sector size is not 512 bytes). +BASE_BLOCK_LENGTH_LOG = 512 * FILE_CLUSTERING_FACTOR + +MARVIN32_SEED = 0x82EF4D887A4E55C5 # This is the seed for log entries. + +HIVE_BIN_SIZE_ALIGNMENT = 4096 + +CELL_OFFSET_NIL = 0xFFFFFFFF +CELL_SIZE_MAX_NAIVE = 10 * 1024 * 1024 + +DirtyPageMeta = namedtuple('DirtyPageMeta', [ 'relative_offset_primary', 'relative_offset_log' ]) +DirtyPageReference = namedtuple('DirtyPageReference', [ 'relative_offset_primary', 'size' ]) + +def Marvin32(Buffer, Seed = MARVIN32_SEED): + """Calculate and return the Marvin32 hash (64 bits) of Buffer.""" + + def ROTL(X, N, W): + return (X.value << N) | (X.value >> (W - N)) + + def Mix(State, Val): + lo, hi = State + lo.value += Val.value + hi.value ^= lo.value + lo.value = ROTL(lo, 20, 32) + hi.value + hi.value = ROTL(hi, 9, 32) ^ lo.value + lo.value = ROTL(lo, 27, 32) + hi.value + hi.value = ROTL(hi, 19, 32) + return (lo, hi) + + lo = c_uint32(Seed) + hi = c_uint32(Seed >> 32) + state = (lo, hi) + + length = len(Buffer) + pos = 0 + val = c_uint32() + + while length >= 4: + val.value = unpack(' 0: + if BaseBlockFlags & 1 == 0: + BaseBlockFlags += 1 + else: + if BaseBlockFlags & 1 > 0: + BaseBlockFlags -= 1 + + return BaseBlockFlags + +class RegistryException(Exception): + """This is a top-level exception for this module.""" + + pass + +class ReadException(RegistryException): + """This exception is raised when a read error has occurred. + This exception does not supersede standard I/O exceptions. + """ + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class NotSupportedException(RegistryException): + """This exception is raised when something is not supported.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class BaseBlockException(RegistryException): + """This exception is raised when something is invalid in a base block.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class FileSizeException(RegistryException): + """This exception is raised when a file has an obviously invalid size.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class HiveBinException(RegistryException): + """This exception is raised when something is invalid in a hive bin.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class HiveCellException(RegistryException): + """This exception is raised when something is wrong with a hive cell.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class DirtyVectorException(RegistryException): + """This exception is raised when something is invalid in a dirty vector.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class DirtyPageException(RegistryException): + """This exception is raised when a dirty page is invalid (truncated).""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class LogEntryException(RegistryException): + """This exception is raised when a log entry is invalid.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class RecoveryException(RegistryException): + """This exception is raised when a recovery error has occurred.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class NotEligibleException(RegistryException): + """This exception is raised when a transaction log file cannot be applied to a primary file.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class CellOffsetException(RegistryException): + """This exception is raised when an invalid cell has been requested.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class RegistryFile(object): + """This is a generic class for registry files, it provides low-level methods for reading, parsing, and writing data. + All methods are self-explanatory. + """ + + def __init__(self, file_object, file_offset = 0): + self.file_object = file_object + self.file_offset = file_offset + + def get_file_size(self): + self.file_object.seek(0, 2) + return self.file_object.tell() + + def read_binary(self, pos, length): + self.file_object.seek(self.file_offset + pos) + b = self.file_object.read(length) + if len(b) == length: + return b + + raise ReadException('Cannot read data (expected: {} bytes, read: {} bytes)'.format(length, len(b))) + + def write_binary(self, pos, data): + self.file_object.seek(self.file_offset + pos) + self.file_object.write(data) + + def read_uint32(self, pos): + b = self.read_binary(pos, 4) + return unpack('> (bit_pos % 8)) & 1) != 0 + if is_bit_set: + dirty_page_meta = DirtyPageMeta(relative_offset_primary = bit_pos * 512, relative_offset_log = i * 512) + yield dirty_page_meta + i += 1 + + bit_pos += 1 + +class DirtyPage(RegistryFile): + """This is a class for a dirty page, describing its location and bytes (data).""" + + primary_file_offset = None + log_file_offset = None + page_size = None + + def __init__(self, file_object, log_file_offset, page_size, primary_file_offset): + super(DirtyPage, self).__init__(file_object, log_file_offset) + + self.page_size = page_size + self.primary_file_offset = primary_file_offset + self.log_file_offset = log_file_offset + + def get_bytes(self): + bytes_ = self.read_binary(0, self.page_size) + if len(bytes_) != self.page_size: + raise DirtyPageException('Truncated dirty page') + + return bytes_ + +class OldLogFile(object): + """This is a class for a transaction log file (old format).""" + + baseblock = None + """A base block in a log file (a BaseBlock object).""" + + dirtyvector = None + """A dirty vector in a log file (a DirtyVector object).""" + + def __init__(self, file_object): + self.file_object = file_object + + self.baseblock = BaseBlock(self.file_object) + + if self.baseblock.get_file_type() != FILE_TYPE_LOG_OLD and self.baseblock.get_file_type() != FILE_TYPE_LOG_VERYOLD: + raise BaseBlockException('Invalid file type') + + if self.baseblock.is_file_dirty: + raise BaseBlockException('Dirty state') + + file_size = self.baseblock.get_file_size() + if file_size < self.get_dirty_pages_starting_offset() + 512: # Check if at least one dirty page (512 bytes) can be present in the file. + raise FileSizeException('Invalid file size: {}'.format(file_size)) + + self.dirtyvector = DirtyVector(self.file_object, BASE_BLOCK_LENGTH_LOG, self.baseblock.effective_hbins_data_size) + + def get_dirty_pages_starting_offset(self): + offset_unaligned = BASE_BLOCK_LENGTH_LOG + len(b'DIRT') + self.baseblock.effective_hbins_data_size // 4096 + sector_size = 512 # We do not expect other values (even when the sector size is not 512 bytes). + + if offset_unaligned % sector_size == 0: + offset_aligned = offset_unaligned + else: + offset_aligned = offset_unaligned + sector_size - offset_unaligned % sector_size + + return offset_aligned + + def dirty_pages(self): + """This method yields DirtyPage objects.""" + + log_file_base = self.get_dirty_pages_starting_offset() + primary_file_base = BASE_BLOCK_LENGTH_PRIMARY + + for dirty_page_meta in self.dirtyvector.dirty_pages_meta(): + log_file_offset = dirty_page_meta.relative_offset_log + log_file_base + primary_file_offset = dirty_page_meta.relative_offset_primary + primary_file_base + + dirty_page = DirtyPage(self.file_object, log_file_offset, 512, primary_file_offset) + yield dirty_page + +class LogEntry(RegistryFile): + """This is a class for a log entry, it provides methods to read dirty pages references and to map dirty pages. + Most methods are self-explanatory. + """ + + def __init__(self, file_object, file_offset, expected_sequence_number): + super(LogEntry, self).__init__(file_object, file_offset) + + signature = self.get_signature() + if signature != b'HvLE': + raise LogEntryException('Invalid signature: {}'.format(signature)) + + size = self.get_size() + if size < 512 or size % 512 != 0: + raise LogEntryException('Invalid size: {}'.format(size)) + + hbins_data_size = self.get_hbins_data_size() + if hbins_data_size < HIVE_BIN_SIZE_ALIGNMENT or hbins_data_size % HIVE_BIN_SIZE_ALIGNMENT != 0: + raise LogEntryException('Invalid hive bins data size: {}'.format(hbins_data_size)) + + dirty_pages_count = self.get_dirty_pages_count() + if dirty_pages_count == 0: + raise LogEntryException('Invalid dirty pages count: {}'.format(dirty_pages_count)) + + if not self.validate_hashes(): + raise LogEntryException('Invalid hashes'.format(dirty_pages_count)) + + sequence_number = self.get_sequence_number() + if sequence_number != expected_sequence_number: + raise LogEntryException('Unexpected sequence number: {} != {}'.format(sequence_number, expected_sequence_number)) + + def get_signature(self): + return self.read_binary(0, 4) + + def get_size(self): + return self.read_uint32(4) + + def get_flags(self): + return self.read_uint32(8) + + def get_sequence_number(self): + return self.read_uint32(12) + + def get_hbins_data_size(self): + return self.read_uint32(16) + + def get_dirty_pages_count(self): + return self.read_uint32(20) + + def get_hash_1(self): + return self.read_uint64(24) + + def get_hash_2(self): + return self.read_uint64(32) + + def calculate_hash_1(self): + b = bytearray(self.read_binary(40, self.get_size() - 40)) + return Marvin32(b) + + def calculate_hash_2(self): + b = bytearray(self.read_binary(0, 32)) + return Marvin32(b) + + def validate_hashes(self): + """Compare calculated hashes to hashes recorded in a log entry.""" + + return self.get_hash_2() == self.calculate_hash_2() and self.get_hash_1() == self.calculate_hash_1() + + def get_dirty_pages_starting_offset(self): + return 40 + self.get_dirty_pages_count() * 8 + + def dirty_pages_references(self): + """This method yields DirtyPageReference tuples.""" + + curr_pos = 40 + i = 0 + while i < self.get_dirty_pages_count(): + primary_file_offset_relative = self.read_uint32(curr_pos) + page_size = self.read_uint32(curr_pos + 4) + + dirty_page_reference = DirtyPageReference(relative_offset_primary = primary_file_offset_relative, size = page_size) + yield dirty_page_reference + + curr_pos += 8 + i += 1 + + def dirty_pages(self): + """This method yields DirtyPage objects.""" + + log_file_base = self.file_offset + self.get_dirty_pages_starting_offset() + primary_file_base = BASE_BLOCK_LENGTH_PRIMARY + + delta = 0 + for dirty_page_reference in self.dirty_pages_references(): + primary_file_offset = dirty_page_reference.relative_offset_primary + primary_file_base + page_size = dirty_page_reference.size + + log_file_offset = log_file_base + delta + + dirty_page = DirtyPage(self.file_object, log_file_offset, page_size, primary_file_offset) + yield dirty_page + + delta += page_size + +class NewLogFile(object): + """This is a class for a transaction log file (new format).""" + + baseblock = None + """A base block in a log file (a BaseBlock object).""" + + def __init__(self, file_object): + self.file_object = file_object + + self.baseblock = BaseBlock(self.file_object) + + if self.baseblock.get_file_type() != FILE_TYPE_LOG_NEW: + raise BaseBlockException('Invalid file type') + + if self.baseblock.is_file_dirty: + raise BaseBlockException('Dirty state') + + self.file_size = self.baseblock.get_file_size() + if self.file_size <= BASE_BLOCK_LENGTH_LOG + 40: # Check if at least one log entry can be present in the file. + raise FileSizeException('Invalid file size: {}'.format(self.file_size)) + + def log_entries(self): + """This method yields LogEntry objects.""" + + current_sequence_number = self.baseblock.get_primary_sequence_number() + + curr_pos = BASE_BLOCK_LENGTH_LOG + while curr_pos < self.file_size: + try: + curr_logentry = LogEntry(self.file_object, curr_pos, current_sequence_number) + except (LogEntryException, ReadException): + break # We could read garbage at the end of the file, this is normal. + + yield curr_logentry + + curr_pos += curr_logentry.get_size() + current_sequence_number = c_uint32(current_sequence_number + 1).value # Handle a possible overflow. + +class PrimaryFile(object): + """This is a class for a primary file, it provides methods to read the file, to build the maps of cells, and to recover the file using a transaction log.""" + + file = None + """A RegistryFile object for a primary file.""" + + baseblock = None + """A base block in a primary file (a BaseBlock object).""" + + cell_map_allocated = None + """A map of allocated cells.""" + + cell_map_unallocated = None + """A map of unallocated cells.""" + + record_referenced_cells = False + """When True, the get_cell() method will add a requested cell to a map of allocated and referenced cells.""" + + cell_map_referenced = None + """A map of allocated and referenced cells (empty by default).""" + + cell_map_free = None + """A map of free (unallocated, unreferenced) cells (empty by default, see the build_map_free() method).""" + + def __init__(self, file_object, tolerate_minor_errors = True): + self.file_object = file_object + self.writable = False + self.file = RegistryFile(file_object) + self.tolerate_minor_errors = tolerate_minor_errors + + self.old_log_file = None + self.new_log_file = None + self.log_apply_count = 0 + self.last_sequence_number = None + + self.baseblock = BaseBlock(self.file_object) + if not self.baseblock.is_primary_file: + raise NotSupportedException('Invalid file type') + + self.build_cell_maps() + + def hive_bins(self): + """This method yields HiveBin objects.""" + + curr_pos = BASE_BLOCK_LENGTH_PRIMARY + while curr_pos - BASE_BLOCK_LENGTH_PRIMARY < self.baseblock.effective_hbins_data_size: + try: + curr_hivebin = HiveBin(self.file_object, curr_pos, self.tolerate_minor_errors, self.baseblock.use_old_cell_format) + except (HiveBinException, ReadException): + if self.baseblock.is_file_dirty and self.log_apply_count == 0: + # We could read garbage at the end of the dirty file, this is normal. + self.baseblock.effective_hbins_data_size = curr_pos - BASE_BLOCK_LENGTH_PRIMARY + break + else: + raise # If the file is not dirty (or we recovered the data), this is a serious error. + + yield curr_hivebin + + curr_pos += curr_hivebin.get_size() + + def build_cell_maps(self): + """Build the maps of allocated and unallocated cells, clear other maps.""" + + self.cell_map_allocated = set() + self.cell_map_unallocated = set() + + for hbin in self.hive_bins(): + for cell in hbin.cells: + cell_file_offset = cell.file_offset + if cell.is_allocated(): + self.cell_map_allocated.add(cell_file_offset) + else: + self.cell_map_unallocated.add(cell_file_offset) + + self.cell_map_free = set() + self.cell_map_referenced = set() + + def build_map_free(self): + """Build the map of free cells.""" + + self.cell_map_free = set() + + if len(self.cell_map_referenced) > 0: + self.cell_map_free = self.cell_map_allocated - self.cell_map_referenced + + self.cell_map_free.update(self.cell_map_unallocated) + + def get_root_cell(self): + """Get and return data from a root cell.""" + + return self.get_cell(self.baseblock.effective_root_cell_offset) + + def get_cell(self, cell_relative_offset): + """Get and return data from a cell. The cell must be in the map of allocated cells.""" + + if cell_relative_offset == CELL_OFFSET_NIL: + raise CellOffsetException('Got CELL_OFFSET_NIL') + + cell_file_offset = BASE_BLOCK_LENGTH_PRIMARY + cell_relative_offset + if len(self.cell_map_allocated) > 0 and cell_file_offset not in self.cell_map_allocated: + raise CellOffsetException('There is no valid cell starting at this offset (relative): {}'.format(cell_relative_offset)) + + if self.record_referenced_cells: + self.cell_map_referenced.add(cell_file_offset) + + cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format) + return cell.get_cell_data() + + def get_cell_naive(self, cell_relative_offset): + """Get and return data from a cell naively.""" + + if cell_relative_offset == CELL_OFFSET_NIL: + raise CellOffsetException('Got CELL_OFFSET_NIL') + + cell_file_offset = BASE_BLOCK_LENGTH_PRIMARY + cell_relative_offset + + cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format) + + size = cell.get_absolute_size() + if size > CELL_SIZE_MAX_NAIVE: + raise CellOffsetException('Got an obviously invalid offset (relative)') + + return cell.get_cell_data() + + def create_writable_file_object(self): + """Create a writable copy of a file object (used to recover a primary file).""" + + if self.writable: + return + + new_file_object = BytesIO() + + # Copy data to the new writable file object. + self.file_object.seek(0) + copyfileobj(self.file_object, new_file_object) + + self.original_file_object = self.file_object + self.file_object = new_file_object + self.file = RegistryFile(self.file_object) + + self.writable = True + + def discard_writable_file_object(self): + """Discard a writable copy of a file object.""" + + if not self.writable: + return + + self.file_object.close() + self.file_object = self.original_file_object + self.__init__(self.file_object, self.tolerate_minor_errors) + + def save_recovered_hive(self, filepath): + """Save the recovered hive to a new primary file.""" + + if self.log_apply_count == 0: + raise NotSupportedException('Cannot save a hive that was not recovered') + + if self.baseblock.is_baseblock_valid: + # The base block is valid, use it. + self.file_object.seek(0) + baseblock_bytes = self.file_object.read(BASE_BLOCK_LENGTH_PRIMARY) + else: + # The base block is invalid, use another one from a transaction log file. + if self.old_log_file is not None: + self.old_log_file.file_object.seek(0) + baseblock_bytes = self.old_log_file.file_object.read(BASE_BLOCK_LENGTH_LOG) + elif self.new_log_file is not None: + self.new_log_file.file_object.seek(0) + baseblock_bytes = self.new_log_file.file_object.read(BASE_BLOCK_LENGTH_LOG) + else: + raise NotSupportedException('Cannot find a log file to be used to recover the base block') + + # Create a file object for the base block. + baseblock_object = BytesIO(b'\x00' * BASE_BLOCK_LENGTH_PRIMARY) + + # Write the base block to the new file object. + baseblock_object.seek(0) + baseblock_object.write(baseblock_bytes) + + # Create a new BaseBlock object. + baseblock = BaseBlock(baseblock_object, True) + + # Update various fields in the base block. + if self.last_sequence_number is not None: + baseblock.write_synchronized_sequence_numbers(self.last_sequence_number) + else: + baseblock.write_synchronized_sequence_numbers(baseblock.get_primary_sequence_number()) + + baseblock.write_hbins_data_size(self.baseblock.effective_hbins_data_size) + baseblock.write_flags(self.baseblock.effective_flags) + baseblock.write_file_type(FILE_TYPE_PRIMARY) + baseblock.update_checksum() + + with open(filepath, 'wb') as f: + # Copy the old base block and the recovered hive bins data to a file. + self.file_object.seek(0) + copyfileobj(self.file_object, f) + + # Copy the new base block over the old one. + baseblock_object.seek(0) + f.seek(0) + f.write(baseblock_object.read()) + + # Close the file object. + baseblock_object.close() + + def apply_old_log_file(self, log_file_object): + """Apply a transaction log file (old format) to a primary file.""" + + if self.log_apply_count > 0: + raise RecoveryException('A log file has been already applied') + + if not self.baseblock.is_file_dirty: + raise RecoveryException('There is no need to apply the log file') + + self.old_log_file = OldLogFile(log_file_object) + log_timestamp = self.old_log_file.baseblock.effective_last_written_timestamp + primary_timestamp = self.baseblock.effective_last_written_timestamp + + if log_timestamp < primary_timestamp: + raise NotEligibleException('This log file cannot be applied') + + self.baseblock.effective_hbins_data_size = self.old_log_file.baseblock.effective_hbins_data_size + self.baseblock.effective_root_cell_offset = self.old_log_file.baseblock.effective_root_cell_offset + self.baseblock.effective_version = self.old_log_file.baseblock.effective_version + self.baseblock.use_old_cell_format = self.baseblock.effective_version in MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT + self.baseblock.effective_last_reorganized_timestamp = self.old_log_file.baseblock.effective_last_reorganized_timestamp + self.baseblock.effective_last_written_timestamp = self.old_log_file.baseblock.effective_last_written_timestamp + self.baseblock.effective_flags = self.old_log_file.baseblock.effective_flags + + self.create_writable_file_object() + + for dirty_page in self.old_log_file.dirty_pages(): # Apply dirty pages. + self.file.write_binary(dirty_page.primary_file_offset, dirty_page.get_bytes()) + + self.log_apply_count += 1 + self.build_cell_maps() + + def apply_new_log_file(self, log_file_object, callback = None): + """Apply a single transaction log file (new format) to a primary file. + After a log entry has been applied, call an optional callback function. + """ + + if self.log_apply_count >= 2: + raise RecoveryException('No more than two log files can be applied') + + if not self.baseblock.is_file_dirty: + raise RecoveryException('There is no need to apply the log file') + + self.new_log_file = NewLogFile(log_file_object) + + if self.last_sequence_number is not None and self.last_sequence_number >= self.new_log_file.baseblock.get_primary_sequence_number(): + raise RecoveryException('This log file cannot be applied') + + if self.baseblock.is_baseblock_valid and self.new_log_file.baseblock.get_primary_sequence_number() < self.baseblock.get_secondary_sequence_number(): + raise NotEligibleException('This log file cannot be applied') + + self.baseblock.effective_root_cell_offset = self.new_log_file.baseblock.effective_root_cell_offset + self.baseblock.effective_version = self.new_log_file.baseblock.effective_version + self.baseblock.use_old_cell_format = self.baseblock.effective_version in MINOR_VERSION_NUMBERS_FOR_OLD_CELL_FORMAT + self.baseblock.effective_last_reorganized_timestamp = self.new_log_file.baseblock.effective_last_reorganized_timestamp + self.baseblock.effective_last_written_timestamp = self.new_log_file.baseblock.effective_last_written_timestamp + self.baseblock.effective_flags = self.new_log_file.baseblock.effective_flags + + self.create_writable_file_object() + + for log_entry in self.new_log_file.log_entries(): + self.last_sequence_number = log_entry.get_sequence_number() + self.baseblock.effective_flags = LogEntryFlagsToBaseBlockFlags(log_entry.get_flags(), self.baseblock.effective_flags) + self.baseblock.effective_hbins_data_size = log_entry.get_hbins_data_size() + + for dirty_page in log_entry.dirty_pages(): # Apply dirty pages. + self.file.write_binary(dirty_page.primary_file_offset, dirty_page.get_bytes()) + + if callback is not None: + self.build_cell_maps() + callback() + + self.log_apply_count += 1 + + if callback is None: + self.build_cell_maps() + + def apply_new_log_files(self, log_file_object_1, log_file_object_2, callback = None): + """Apply two transaction log files (new format) to a primary file. + After a log entry has been applied, call an optional callback function. + """ + + def is_starting_log(this_sequence_number, another_sequence_number): + if this_sequence_number >= another_sequence_number: + delta = this_sequence_number - another_sequence_number + starting = False + else: + delta = another_sequence_number - this_sequence_number + starting = True + + if c_uint32(delta).value <= 0x7FFFFFFF: + return starting + else: + return not starting # Sequence numbers did overflow. + + + new_log_file_1 = NewLogFile(log_file_object_1) + sequence_number_1 = new_log_file_1.baseblock.get_primary_sequence_number() + + new_log_file_2 = NewLogFile(log_file_object_2) + sequence_number_2 = new_log_file_2.baseblock.get_primary_sequence_number() + + if is_starting_log(sequence_number_1, sequence_number_2): + first = log_file_object_1 + second = log_file_object_2 + else: + first = log_file_object_2 + second = log_file_object_1 + + if self.baseblock.is_baseblock_valid: + try: + self.apply_new_log_file(first, callback) + except NotEligibleException: + pass + + self.apply_new_log_file(second, callback) + else: + self.apply_new_log_file(second, callback) # This is how Windows works. + +class PrimaryFileTruncated(object): + """This is a class for a truncated primary file, it provides methods to read the truncated file, to build the maps of cells, and to yield each cell.""" + + file = None + """A RegistryFile object for a primary file.""" + + baseblock = None + """A base block in a primary file (a BaseBlock object).""" + + cell_map_allocated = None + """A map of allocated cells.""" + + cell_map_unallocated = None + """A map of unallocated cells.""" + + cell_map_free = None + """A map of free (unallocated only) cells.""" + + def __init__(self, file_object): + self.file_object = file_object + self.writable = False + self.file = RegistryFile(file_object) + + self.baseblock = BaseBlock(self.file_object) + if not self.baseblock.is_primary_file: + raise NotSupportedException('Invalid file type') + + self.build_cell_maps() + + def hive_bins(self): + """This method yields HiveBin objects.""" + + curr_pos = BASE_BLOCK_LENGTH_PRIMARY + while curr_pos - BASE_BLOCK_LENGTH_PRIMARY < self.baseblock.effective_hbins_data_size: + try: + curr_hivebin = HiveBin(self.file_object, curr_pos, True, self.baseblock.use_old_cell_format) + except (HiveBinException, ReadException): + break # Since we expect a truncation point, stop here. + + yield curr_hivebin + + curr_pos += curr_hivebin.get_size() + + def build_cell_maps(self): + """Build the maps of allocated and unallocated cells.""" + + self.cell_map_allocated = set() + self.cell_map_unallocated = set() + + for hbin in self.hive_bins(): + for cell in hbin.cells: + cell_file_offset = cell.file_offset + if cell.is_allocated(): + self.cell_map_allocated.add(cell_file_offset) + else: + self.cell_map_unallocated.add(cell_file_offset) + + self.cell_map_free = self.cell_map_unallocated + + def get_cell(self, cell_relative_offset): + """Get and return data from a cell. The cell must be in the map of allocated cell or in the map of unallocated cells.""" + + if cell_relative_offset == CELL_OFFSET_NIL: + raise CellOffsetException('Got CELL_OFFSET_NIL') + + cell_file_offset = BASE_BLOCK_LENGTH_PRIMARY + cell_relative_offset + if cell_file_offset not in self.cell_map_allocated and cell_file_offset not in self.cell_map_unallocated: + raise CellOffsetException('There is no valid cell starting at this offset (relative): {}'.format(cell_relative_offset)) + + cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format) + return cell.get_cell_data() + + def get_cell_naive(self, cell_relative_offset): + """Get and return data from a cell naively.""" + + if cell_relative_offset == CELL_OFFSET_NIL: + raise CellOffsetException('Got CELL_OFFSET_NIL') + + cell_file_offset = BASE_BLOCK_LENGTH_PRIMARY + cell_relative_offset + + cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format) + return cell.get_cell_data() + + def cells(self, yield_unallocated_cells = False): + """This method yields a HiveCell object for each cell.""" + + for cell_file_offset in self.cell_map_allocated: + cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format) + yield cell + + if yield_unallocated_cells: + for cell_file_offset in self.cell_map_unallocated: + cell = HiveCell(self.file_object, cell_file_offset, self.baseblock.use_old_cell_format) + yield cell diff --git a/yarp/RegistryHelpers.py b/yarp/RegistryHelpers.py new file mode 100644 index 0000000..770356a --- /dev/null +++ b/yarp/RegistryHelpers.py @@ -0,0 +1,105 @@ +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from __future__ import unicode_literals + +from os import path, linesep +from collections import namedtuple + +DiscoveredLogFiles = namedtuple('DiscoveredLogFiles', [ 'log_path', 'log1_path', 'log2_path' ]) + +def DiscoverLogFiles(PrimaryPath): + """Return a named tuple (DiscoveredLogFiles) describing a path to each transaction log file of a supplied primary file.""" + + def DiscoverLogFilesInternal(PrimaryPath): + # We prefer uppercase extensions. + log = PrimaryPath + '.LOG' + log1 = PrimaryPath + '.LOG1' + log2 = PrimaryPath + '.LOG2' + + if path.isfile(log) or path.isfile(log1) or path.isfile(log2): + # At least one file has an uppercase extension, use it and others (if present). + if not path.isfile(log): + log = None + if not path.isfile(log1): + log1 = None + if not path.isfile(log2): + log2 = None + + return DiscoveredLogFiles(log_path = log, log1_path = log1, log2_path = log2) + + # Now, switch to lowercase extensions. + log = PrimaryPath + '.log' + log1 = PrimaryPath + '.log1' + log2 = PrimaryPath + '.log2' + + if path.isfile(log) or path.isfile(log1) or path.isfile(log2): + # At least one file has a lowercase extension, use it and others (if present). + if not path.isfile(log): + log = None + if not path.isfile(log1): + log1 = None + if not path.isfile(log2): + log2 = None + + return DiscoveredLogFiles(log_path = log, log1_path = log1, log2_path = log2) + + directory, filename = path.split(PrimaryPath) + filenames = sorted(set([ filename, filename.lower(), filename.upper() ])) + for filename in filenames: + result = DiscoverLogFilesInternal(path.join(directory, filename)) + if result is not None: + return result + + # Give up. + return DiscoveredLogFiles(log_path = None, log1_path = None, log2_path = None) + +def HexDump(Buffer): + """Return bytes from Buffer as a hexdump-like string (16 bytes per line).""" + + def int2hex(i): + return '{:02X}'.format(i) + + if type(Buffer) is not bytearray: + Buffer = bytearray(Buffer) + + output_lines = '' + + i = 0 + while i < len(Buffer): + bytes_line = Buffer[i : i + 16] + + address = int2hex(i) + address = str(address).zfill(8) + hex_line = '' + ascii_line = '' + + k = 0 + while k < len(bytes_line): + single_byte = bytes_line[k] + + hex_line += int2hex(single_byte) + if k == 7 and k != len(bytes_line) - 1: + hex_line += '-' + elif k != len(bytes_line) - 1: + hex_line += ' ' + + if single_byte >= 32 and single_byte <= 126: + ascii_line += chr(single_byte) + else: + ascii_line += '.' + + k += 1 + + padding_count = 16 - k + if padding_count > 0: + hex_line += ' ' * 3 * padding_count + + output_lines += address + ' ' * 2 + hex_line + ' ' * 2 + ascii_line + + i += 16 + + if i < len(Buffer): + output_lines += linesep + + return output_lines diff --git a/yarp/RegistryRecords.py b/yarp/RegistryRecords.py new file mode 100644 index 0000000..ed1bfa4 --- /dev/null +++ b/yarp/RegistryRecords.py @@ -0,0 +1,524 @@ +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from __future__ import unicode_literals + +from struct import unpack +from collections import namedtuple +from .RegistryFile import RegistryException + +# Key node flags. +KEY_VOLATILE = 0x0001 +KEY_HIVE_EXIT = 0x0002 +KEY_HIVE_ENTRY = 0x0004 +KEY_NO_DELETE = 0x0008 +KEY_SYM_LINK = 0x0010 +KEY_COMP_NAME = 0x0020 +KEY_PREDEF_HANDLE = 0x0040 +KEY_VIRT_SOURCE = 0x0080 +KEY_VIRT_TARGET = 0x0100 +KEY_VIRT_STORE = 0x0200 + +# User flags for a key node. +KEY_FLAG_32BIT = 0x1 +KEY_FLAG_REFLECTED = 0x2 +KEY_FLAG_EXEMPT_REFLECTION = 0x4 +KEY_FLAG_OWNERSHIP_REFLECTION = 0x8 + +# Virtualization control flags for a key node. +REG_KEY_DONT_VIRTUALIZE = 0x2 +REG_KEY_DONT_SILENT_FAIL = 0x4 +REG_KEY_RECURSE_FLAG = 0x8 + +# Debug flags for a key node. +BREAK_ON_OPEN = 0x01 +BREAK_ON_DELETE = 0x02 +BREAK_ON_SECURITY_CHANGE = 0x04 +BREAK_ON_CREATE_SUBKEY = 0x08 +BREAK_ON_DELETE_SUBKEY = 0x10 +BREAK_ON_SET_VALUE = 0x20 +BREAK_ON_DELETE_VALUE = 0x40 +BREAK_ON_KEY_VIRTUALIZE = 0x80 + +# Key value flags. +VALUE_COMP_NAME = 0x0001 +VALUE_TOMBSTONE = 0x0002 + +# Data types for a key value. +REG_NONE = 0x00000000 +REG_SZ = 0x00000001 +REG_EXPAND_SZ = 0x00000002 +REG_BINARY = 0x00000003 +REG_DWORD = 0x00000004 +REG_DWORD_LITTLE_ENDIAN = REG_DWORD +REG_DWORD_BIG_ENDIAN = 0x00000005 +REG_LINK = 0x00000006 +REG_MULTI_SZ = 0x00000007 +REG_RESOURCE_LIST = 0x00000008 +REG_FULL_RESOURCE_DESCRIPTOR = 0x00000009 +REG_RESOURCE_REQUIREMENTS_LIST = 0x0000000a +REG_QWORD = 0x0000000b +REG_QWORD_LITTLE_ENDIAN = REG_QWORD + +LeafElement = namedtuple('LeafElement', [ 'relative_offset', 'name_hint', 'name_hash' ]) + +class ParseException(RegistryException): + """This exception is raised when a registry record is invalid.""" + + def __init__(self, value): + self._value = value + + def __str__(self): + return repr(self._value) + +class MemoryBlock(object): + """This is a generic class for a memory block (cell data), it provides low-level methods for reading and parsing data. + All methods are self-explanatory. + """ + + def __init__(self, buf): + self.buf = buf + + def read_binary(self, pos, length = None): + if length is None: + b = self.buf[pos : ] + return b + + b = self.buf[pos : pos + length] + if len(b) != length: + raise ParseException('Cannot read data (expected: {} bytes, read: {} bytes)'.format(length, len(b))) + + return b + + def read_uint8(self, pos): + b = self.read_binary(pos, 1) + return unpack('> 12 + + def get_user_flags_new(self): + return self.get_virtualization_control_and_user_flags() & 0xF + + def get_virtualization_control_flags(self): + return self.get_virtualization_control_and_user_flags() >> 4 + + def get_debug(self): + return self.read_uint8(55) + + def get_largest_subkey_classname_length(self): + return self.read_uint32(56) + + def get_largest_value_name_length(self): + return self.read_uint32(60) + + def get_largest_value_data_size(self): + return self.read_uint32(64) + + def get_workvar(self): + return self.read_uint32(68) + + def get_key_name_length(self): + return self.read_uint16(72) + + def get_classname_length(self): + return self.read_uint16(74) + + def get_key_name(self): + """Get and return a key name string (as raw bytes).""" + + return self.read_binary(76, self.get_key_name_length()) + + def get_slack(self): + return self.read_binary(76 + self.get_key_name_length()) + +class KeyValuesList(MemoryBlock): + """This is a class for a key values list, it provides methods to read this list.""" + + def __init__(self, buf, elements_count): + super(KeyValuesList, self).__init__(buf) + + self.elements_count = elements_count + + def elements(self): + """This method yields key value offsets.""" + + i = 0 + while i < self.elements_count: + yield self.read_uint32(i * 4) + i += 1 + + def get_slack(self): + return self.read_binary(self.elements_count * 4) + +class KeyValue(MemoryBlock): + """This is a class for a key value, it provides methods to access various fields of the key value. + Most methods are self-explanatory. + """ + + def __init__(self, buf): + super(KeyValue, self).__init__(buf) + + signature = self.get_signature() + if signature != b'vk': + raise ParseException('Invalid signature: {}'.format(signature)) + + def get_signature(self): + return self.read_binary(0, 2) + + def get_value_name_length(self): + return self.read_uint16(2) + + def get_data_size(self): + return self.read_uint32(4) + + def get_data_size_real(self): + """Get and return a real size of data (the most significant bit is ignored).""" + + size = self.get_data_size() + if size >= 0x80000000: + size -= 0x80000000 + + return size + + def is_data_inline(self): + """Return True if data is stored inline (in the data offset field).""" + + return self.get_data_size() >= 0x80000000 + + def get_inline_data(self): + return self.read_binary(8, 4) + + def get_data_offset(self): + return self.read_uint32(8) + + def get_data_type(self): + return self.read_uint32(12) + + def get_flags(self): + return self.read_uint16(16) + + def get_spare(self): + return self.read_uint16(18) + + def get_title_index(self): + return self.read_uint32(16) # The same offset as above. + + def get_value_name(self): + """Get and return a value name string (as raw bytes).""" + + return self.read_binary(20, self.get_value_name_length()) + + def get_slack(self): + return self.read_binary(20 + self.get_value_name_length()) + +class KeySecurity(MemoryBlock): + """This is a class for a key security item, it provides methods to access various fields of the key security item. + All methods are self-explanatory. + """ + + def __init__(self, buf): + super(KeySecurity, self).__init__(buf) + + signature = self.get_signature() + if signature != b'sk': + raise ParseException('Invalid signature: {}'.format(signature)) + + security_descriptor_size = self.get_security_descriptor_size() + if security_descriptor_size == 0: + raise ParseException('Empty security descriptor') + + def get_signature(self): + return self.read_binary(0, 2) + + def get_reserved(self): + return self.read_uint16(2) + + def get_flink(self): + return self.read_uint32(4) + + def get_blink(self): + return self.read_uint32(8) + + def get_reference_count(self): + return self.read_uint32(12) + + def get_security_descriptor_size(self): + return self.read_uint32(16) + + def get_security_descriptor(self): + """Get and return a security descriptor (as raw bytes).""" + + return self.read_binary(20, self.get_security_descriptor_size()) + + def get_slack(self): + return self.read_binary(20 + self.get_security_descriptor_size()) + +class SegmentsList(MemoryBlock): + """This is a class for a segments list (big data), it provides a method to read this list.""" + + def __init__(self, buf, elements_count): + super(SegmentsList, self).__init__(buf) + + self.elements_count = elements_count + + def elements(self): + """This method yields segment offsets.""" + + i = 0 + while i < self.elements_count: + yield self.read_uint32(i * 4) + i += 1 + + def get_slack(self): + return self.read_binary(self.elements_count * 4) + +class BigData(MemoryBlock): + """This is a class for a big data record, it provides methods to access various fields of the big data record. + All methods are self-explanatory. + """ + + def __init__(self, buf): + super(BigData, self).__init__(buf) + + signature = self.get_signature() + if signature != b'db': + raise ParseException('Invalid signature: {}'.format(signature)) + + segments_count = self.get_segments_count() + if segments_count < 2: + raise ParseException('Invalid number of segments: {}'.format(segments_count)) + + def get_signature(self): + return self.read_binary(0, 2) + + def get_segments_count(self): + return self.read_uint16(2) + + def get_segments_list_offset(self): + return self.read_uint32(4) + + def get_slack(self): + return self.read_binary(8) diff --git a/yarp/RegistryRecover.py b/yarp/RegistryRecover.py new file mode 100644 index 0000000..a1f1b5f --- /dev/null +++ b/yarp/RegistryRecover.py @@ -0,0 +1,143 @@ +# yarp: yet another registry parser +# (c) Maxim Suhanov + +from __future__ import unicode_literals + +from . import Registry +from . import RegistryFile + +MAX_PLAUSIBLE_SUBKEYS_COUNT = 80000 +MAX_PLAUSIBLE_VALUES_COUNT = 70000 +MAX_PLAUSIBLE_NAME_LENGTH = 2048 + +def ValidateKey(Key): + """Check whether or not a key looks plausible. If not, an exception is raised.""" + + key_name = Key.name() + if len(key_name) > MAX_PLAUSIBLE_NAME_LENGTH: + raise Registry.RegistryException('Implausible name length') + + if Key.subkeys_count() > MAX_PLAUSIBLE_SUBKEYS_COUNT or Key.key_node.get_volatile_subkeys_count() > MAX_PLAUSIBLE_SUBKEYS_COUNT: + raise Registry.RegistryException('Implausible number of subkeys reported') + + if Key.values_count() > MAX_PLAUSIBLE_VALUES_COUNT: + raise Registry.RegistryException('Implausible number of values reported') + + timestamp_year = Key.last_written_timestamp().year + if timestamp_year < 1970 or timestamp_year > 2100: + raise Registry.RegistryException('Implausible last written timestamp') + +def ValidateValue(Value): + """Check whether or not a value looks plausible. If not, an exception is raised.""" + + value_name = Value.name() + if len(value_name) > MAX_PLAUSIBLE_NAME_LENGTH: + raise Registry.RegistryException('Implausible name length') + + if Value.key_value.is_data_inline() and Value.key_value.get_data_size_real() > 4: + raise Registry.RegistryException('Value data is too large to be stored inline') + +class Scanner(object): + """This class is used to scan free cells for deleted keys and values.""" + + hive = None + """A RegistryHive object.""" + + def __init__(self, hive, scan_remnant_data = True, scan_slack_space = True): + """Argument: + - hive: a RegistryHive object; + - scan_remnant_data: when True, also scan remnant data within a primary file. + """ + + self.hive = hive + self.scan_remnant_data = scan_remnant_data + self.scan_slack_space = scan_slack_space + + def virtual_cell(self): + """Get and return remnant data within a primary file as a virtual cell (if any, else return None).""" + + if not self.scan_remnant_data: + return + + offset = RegistryFile.BASE_BLOCK_LENGTH_PRIMARY + self.hive.registry_file.baseblock.effective_hbins_data_size + self.hive.registry_file.file_object.seek(offset) + data = self.hive.registry_file.file_object.read() + if len(data) == 0: + return + + return data + + def process_cell(self, cell): + """Scan data of a cell for deleted keys and values, yield them as RegistryKey and RegistryValue objects.""" + + pos = 0 + while pos < len(cell): + if pos < len(cell) - 76: # A key node with at least one character in the name. + two_bytes = cell[pos : pos + 2] + if two_bytes == b'nk': + candidate_nk = cell[pos : ] + try: + key = Registry.RegistryKey(self.hive.registry_file, candidate_nk, None, None, True, True) + ValidateKey(key) + except (Registry.RegistryException, UnicodeDecodeError): + pass + else: + yield key + + pos += 76 + key.key_node.get_key_name_length() + if pos % 2 != 0: + pos += 1 + + continue + + pos += 2 + continue + + if pos <= len(cell) - 20: # A key value with no name (at least). + two_bytes = cell[pos : pos + 2] + if two_bytes == b'vk': + candidate_vk = cell[pos : ] + try: + value = Registry.RegistryValue(self.hive.registry_file, candidate_vk, True) + ValidateValue(value) + except (Registry.RegistryException, UnicodeDecodeError): + pass + else: + yield value + + pos += 20 + value.key_value.get_value_name_length() + if pos % 2 != 0: + pos += 1 + + continue + + pos += 2 + continue + + pos += 2 + + def scan(self): + """This method yields RegistryKey objects for deleted keys and RegistryValue objects for deleted values. + A hive is required to have the free map built (or nothing will be recovered). + """ + + for file_offset in self.hive.registry_file.cell_map_free: + cell = self.hive.registry_file.get_cell_naive(file_offset - RegistryFile.BASE_BLOCK_LENGTH_PRIMARY) + + for result in self.process_cell(cell): + yield result + + virtual_cell = self.virtual_cell() + if virtual_cell is not None: + for result in self.process_cell(virtual_cell): + yield result + + if self.scan_slack_space: + for slack in self.hive.effective_slack: + if len(slack) % 2 != 0: + virtual_cell = slack[ 1 : ] + else: + virtual_cell = slack + + for result in self.process_cell(virtual_cell): + yield result diff --git a/yarp/__init__.py b/yarp/__init__.py new file mode 100644 index 0000000..7e8344d --- /dev/null +++ b/yarp/__init__.py @@ -0,0 +1,5 @@ +# yarp: yet another registry parser +# (c) Maxim Suhanov + +__version__ = '1.0.0-beta1' +__all__ = [ 'Registry', 'RegistryFile', 'RegistryRecords', 'RegistryRecover', 'RegistryCarve', 'RegistryHelpers' ]