Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Push out CLI and Gui interface for latest verison
- Loading branch information
Showing
5 changed files
with
230 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
#sqlparse.py | ||
# | ||
#This program parses an SQLite3 database for deleted entires and | ||
#places the output into either and TSV file, or text file | ||
# | ||
#The SQLite file format, offsets etc is described at | ||
#sqlite.org/fileformat.html | ||
# | ||
# | ||
# Copyright (C) 2015 Mari DeGrazia (arizona4n6@gmail.com) | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You can view the GNU General Public License at <http://www.gnu.org/licenses/> | ||
# | ||
# Version History: | ||
# v1.1 2013-11-05 | ||
# | ||
# v1.2 2015-06-20 | ||
#support added in to print out non b-tree pages | ||
# | ||
# v.1.3 2015-06-21 | ||
#minor changes / comments etc. | ||
# | ||
# | ||
#Find a bug???? Please let me know and I'll try to fix it (if you ask nicely....) | ||
# | ||
|
||
import struct | ||
from optparse import OptionParser | ||
import sys | ||
|
||
#function to remove the non-printable characters, tabs and white spaces | ||
def remove_ascii_non_printable(chunk): | ||
chunk = ' '.join(chunk .split()) | ||
return ''.join([ch for ch in chunk if ord(ch) > 31 and ord(ch) < 126 or ord(ch) ==9]) | ||
|
||
|
||
usage = "Parse deleted records from an SQLite file into a TSV File or text file \n\ | ||
Examples:\n\ | ||
-f /home/sanforensics/smsmms.db -o report.tsv\n\ | ||
-f /home/sanforensics/smssms.db -r -o report.txt \n" | ||
|
||
parser = OptionParser(usage=usage) | ||
|
||
parser.add_option("-f", "--file", dest = "infile", help = "sqlite database file", metavar = "smsmms.db") | ||
parser.add_option("-o", "--output", dest = "outfile", help = "Output to a tsv file. Strips white space, tabs and non-printable characters from data field", metavar = "output.tsv") | ||
parser.add_option("-r", "--raw", action ="store_true", dest = "raw", help = "Optional. Will out put data field in a raw format and text file.", metavar = "output.tsv") | ||
parser.add_option("-p", "--printpages", action ="store_true", dest = "printpages", help = "Optional. Will print any printable non-whitespace chars from all non-leaf b-tree pages (in case page has been re-purposed). WARNING: May output a lot of string data.") | ||
|
||
(options,args)=parser.parse_args() | ||
|
||
#no arugments given by user,exit | ||
if len(sys.argv) == 1: | ||
parser.print_help() | ||
sys.exit(0) | ||
|
||
#if input of output file missing, exit | ||
if (options.infile == None) or (options.outfile == None): | ||
parser.print_help() | ||
print "Filename or Output file not given" | ||
sys.exit(0) | ||
|
||
#open file, confirm it is an SQLite DB | ||
try: | ||
f=open(options.infile,"rb") | ||
except: | ||
print ("File not Found") | ||
sys.exit(0) | ||
|
||
try: | ||
output = open(options.outfile, 'w') | ||
except: | ||
print "Error opening output file" | ||
sys.exit(0) | ||
|
||
|
||
#write the column header if not outputting to text file | ||
if options.raw !=True: | ||
output.write("Type\tOffset\tLength\tData\n") | ||
|
||
#get the file size, we'll need this later | ||
#filesize = len(f.read()) | ||
# Cheeky suggestion ... so it doesnt read the whole file unecessarily | ||
import os | ||
stats = os.stat(options.infile) | ||
filesize = stats.st_size | ||
|
||
#be kind, rewind (to the beginning of the file, that is) | ||
f.seek(0) | ||
|
||
#verify the file is an sqlite db; read the first 16 bytes for the header | ||
header = f.read(16) | ||
|
||
if "SQLite" not in header: | ||
print ("File does not appear to be an SQLite File") | ||
sys.exit(0) | ||
|
||
|
||
#OK, lets get started. The SQLite database is made up of multiple Pages. We need to get the size of each page. | ||
#The pagesize this is stored at offset 16 at is 2 bytes long | ||
|
||
pagesize = struct.unpack('>H', f.read(2))[0] | ||
|
||
#According to SQLite.org/fileformat.html, all the data is contained in the table-b-trees leaves. | ||
#Let's go to each Page, read the B-Tree Header, and see if it is a table b-tree, which is designated by the flag 13 | ||
|
||
#set the offset to 0, so we can also process any strings in the first page | ||
offset = 0 | ||
|
||
#while the offset is less then the filesize, keep processing the pages | ||
|
||
while offset < filesize: | ||
|
||
#move to the beginning of the page and read the b-tree flag, if it's 13, its a leaf table b tree and we want to process it | ||
f.seek(offset) | ||
flag = struct.unpack('>b',f.read(1))[0] | ||
|
||
if flag == 13: | ||
|
||
#this is a table_b_tree - get the header information which is contained in the first 8 bytes | ||
|
||
freeblock_offset = struct.unpack('>h',f.read(2))[0] | ||
num_cells = struct.unpack('>h',f.read(2))[0] | ||
cell_offset = struct.unpack('>h',f.read(2))[0] | ||
num_free_bytes = struct.unpack('>b',f.read(1))[0] | ||
|
||
|
||
#unallocated is the space after the header information and before the first cell starts | ||
|
||
#start after the header (8 bytes) and after the cell pointer array. The cell pointer array will be the number of cells x 2 bytes per cell | ||
start = 8 + (num_cells * 2) | ||
|
||
# the length of the unallocated space will be the difference between the start and the cell offset | ||
length = cell_offset-start | ||
|
||
#move to start of unallocated, then read the data (if any) in unallocated - remember, we already read in the first 8 bytes, so now we just need to move past the cell pointer array | ||
f.read(num_cells*2) | ||
unallocated = f.read(length) | ||
|
||
if options.raw == True: | ||
output.write("Unallocated, Offset " + str(offset+start) + " Length " + str(length) + "\n") | ||
output.write("Data:\n") | ||
output.write((unallocated)) | ||
output.write("\n\n") | ||
|
||
else: | ||
#lets clean this up so its mainly the strings - remove white spaces and tabs too | ||
|
||
unallocated = remove_ascii_non_printable(unallocated ) | ||
if unallocated != "": | ||
output.write("Unallocated" + "\t" + str(offset+start) + "\t" + str(length) + "\t" + str(unallocated) + "\n" ) | ||
|
||
#if there are freeblocks, lets pull the data | ||
|
||
while freeblock_offset != 0: | ||
|
||
#move to the freeblock offset | ||
f.seek(offset+freeblock_offset) | ||
|
||
#get next freeblock chain | ||
next_fb_offset = struct.unpack('>h',f.read(2))[0] | ||
|
||
#get the size of this freeblock | ||
free_block_size = struct.unpack('>hh',f.read(4))[0] | ||
|
||
#move to the offset so we can read the free block data | ||
f.seek(offset+freeblock_offset) | ||
|
||
#read in this freeblock | ||
free_block = f.read(free_block_size) | ||
|
||
if options.raw == True: | ||
output.write("Free Block, Offset " + str(offset+freeblock_offset) + ", Length " + str(free_block_size) + "\n") | ||
output.write("Data:\n") | ||
output.write((free_block)) | ||
output.write( "\n\n") | ||
|
||
else: | ||
#lets clean this up so its mainly the strings - remove white spaces and tabs too | ||
free_block = remove_ascii_non_printable(free_block) | ||
if unallocated != "": | ||
output.write("Free Block" + "\t" + str(offset+freeblock_offset) + "\t" + str(free_block_size) + "\t" + str(free_block) + "\n" ) | ||
|
||
freeblock_offset = next_fb_offset | ||
|
||
# Cheeky's Change: Extract strings from non-Leaf-Table B-tree pages to handle re-purposed/re-used pages | ||
# According to docs, valid flag values are 2, 5, 10, 13 BUT pages containing string data have also been observed with flag = 0 | ||
# So just print strings from all non flag = 13 pages. | ||
elif (options.printpages): | ||
# read block into one big string, filter unprintables, then print | ||
pagestring = f.read(pagesize-1) # we've already read the flag byte | ||
printable_pagestring = remove_ascii_non_printable(pagestring) | ||
|
||
if options.raw == True: | ||
output.write("Non-Leaf-Table-Btree-Type_"+ str(flag) + ", Offset " + str(offset) + ", Length " + str(pagesize) + "\n") | ||
output.write("Data: (ONLY PRINTABLE STRINGS ARE SHOWN HERE. FOR RAW DATA, CHECK FILE IN HEX VIEWER AT ABOVE LISTED OFFSET):\n\n") | ||
output.write(printable_pagestring) | ||
output.write( "\n\n") | ||
else: | ||
output.write("Non-Leaf-Table-Btree-Type_" + str(flag) + "\t" + str(offset) + "\t" + str(pagesize) + "\t" + printable_pagestring + "\n" ) | ||
|
||
#increase the offset by one pagesize and loop | ||
offset = offset + pagesize | ||
|
||
output.close() | ||
|
||
#end | ||
|