Skip to content

Commit

Permalink
sql updates
Browse files Browse the repository at this point in the history
  • Loading branch information
killswitch-GUI committed Jul 1, 2016
1 parent 9fac802 commit 9efd5c7
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 67 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -58,4 +58,4 @@ docs/_build/
target/

# db
.db
*.db
61 changes: 59 additions & 2 deletions Common/TaskController.py
Expand Up @@ -11,13 +11,15 @@
import subprocess
import logging
import datetime
# internal modules
from Helpers import helpers
from Helpers import messages
from Helpers import HtmlBootStrapTheme
from Helpers import VerifyEmails
from Helpers import Connect6
from Helpers import EmailFormat
from Helpers import LinkedinNames
import sql_opperations


class Conducter(object):
Expand Down Expand Up @@ -47,6 +49,7 @@ def __init__(self):
self.JsonList = []
self.Tasks = []
self.ResultsList = []
self.search_id = 0
self.logger = logging.getLogger("SimplyEmail.TaskController")
try:
config = configparser.ConfigParser()
Expand Down Expand Up @@ -116,7 +119,6 @@ def _execute_get_task(self, task_queue):
except:
self.logger.warning("_execute_get_task: task_queue.get() failed (unkown reason)")
return None


# Handler for each Process that will call all the modules in the Queue
def ExecuteModule(self, Task_queue, Results_queue, Html_queue, Json_queue, domain, verbose=False):
Expand Down Expand Up @@ -299,17 +301,20 @@ def CleanJsonResults(self, domain, scope=False):
FinalList = []
if scope:
for item in self.JsonList:
# add emails to sql db
self._tasking_sql_add_email(item['email'].lower(),self.search_id,domain)
SecondList.append(item)
else:
for item in self.JsonList:
if domain.lower() in item['email'].lower():
# add scoped emails to sql db
self._tasking_sql_add_email(item['email'].lower(),self.search_id,domain)
SecondList.append(item)
for item in SecondList:
if item not in FinalList:
FinalList.append(item)
return FinalList


def Consumer(self, Results_queue, verbose):
while True:
try:
Expand Down Expand Up @@ -391,6 +396,56 @@ def _json_queue_start(self):
except:
self.logger.critical("_json_queue_start: FAILED to start Json_queue")

def _tasking_sql_reporting(self, domain):
"""
Starts the intial table row
for a email search.
sets self.search_id for entire scrape
"""
self.logger.debug("_tasking_sql_reporting: adding initial row for reporting")
s = sql_opperations.database()
search_id = s.add_reporting(domain)
self.search_id = search_id

def _tasking_sql_reporting_finish(self, emails_found, emails_unique, emails_domain):
"""
Updates the reporting row with
the required info to finsh a scrape.
emails_found = tottal emails found
emails_unique = tottal final emails found
"""
self.logger.debug("_tasking_sql_reporting: updating row for reporting")
s = sql_opperations.database()
s.update_reporting(emails_found, emails_unique, emails_domain, self.search_id)

def _tasking_sql_add_email(self, email_address,search_id,domain):
"""
builds the email row with a new email,
ths will be built off the JSON consumer for
verbose data.
email_address = email address
search_id = link to search
domain = domain of search
returns:
email_id = the email key for further opperations
"""
self.logger.debug("_tasking_sql_add_email: adding email to table")
s = sql_opperations.database()
email_id = s.set_email(email_address,search_id,domain)
return email_id

def _tasking_sql_check_email(self, email_address):
"""
Takes a email and checks the
current db for previous emails.
returns:
result = boolean value
"""

def TaskSelector(self, domain, verbose=False, scope=False, Names=False, json="", Verify=False):
# Here it will check the Queue for the next task to be completed
Expand Down Expand Up @@ -533,6 +588,7 @@ def TaskSelector(self, domain, verbose=False, scope=False, Names=False, json="",
# Helps with testing only one module at a time. Helping with proper
# Module Dev and testing before integration
def TestModule(self, domain, module, verbose=False, scope=False, Names=False, json='', Verify=False):
self._tasking_sql_reporting(domain)
self.logger.debug("Starting TaskSelector for: " + str(domain))
Config = configparser.ConfigParser()
Config.read("Common/SimplyEmail.ini")
Expand Down Expand Up @@ -658,6 +714,7 @@ def TestModule(self, domain, module, verbose=False, scope=False, Names=False, js
str(e)
print helpers.color(error, warning=True)
if not json:
self._tasking_sql_reporting_finish(len(self.ConsumerList), len(FinalEmailList), FinalCount)
self.CompletedScreen(FinalCount, BuiltNameCount, domain)

def NameBuilder(self, domain, emaillist, Verbose=False):
Expand Down
115 changes: 68 additions & 47 deletions Common/sql_opperations.py
Expand Up @@ -46,13 +46,13 @@ class database(object):
"emails_unique", # INT - emails in the final count that are unique
]

def __init__(self, MainMenu, args=None):
def __init__(self, args=None):

# init the class
self.conn = self.database_connect()
pass

def database_connect():
def database_connect(self):
"""
Connect with the backend ./simplyemail.db sqlite database and return the
connection object.
Expand Down Expand Up @@ -106,19 +106,39 @@ def get_emails(self):
cur.close()
return results

def set_email(self, email_json):
def set_email(self,email_address,search_id,domain):
"""
takes json email blob and
takes email blob and
adds a email row.
returns:
email_id = the id unique to email for life
"""
email_id = helpers.get_searchid()
first_seen = helpers.get_datetime()
cur = self.conn.cursor()
cur.execute("""INSERT INTO email (email_address,
email_id,
domain,
first_seen,
last_seen,
instances_seen,
first_name,
last_name,
name_generated_email,
email_verified,
score)
VALUES (?,?,?,?,?,?,?,?,?,?,?)""", (email_address,email_id,domain,first_seen,'',0,'','',False,False,0))
cur.close()
return email_id

def get_reporting_id(self):
"""
takes a primay key id
and returns the reporting object.
"""

def _add_reporting(self, domain, modules_enabled):
def add_reporting(self, domain, modules_enabled=0):
"""
takes required data type to set
up tasking in the db.
Expand All @@ -128,64 +148,65 @@ def _add_reporting(self, domain, modules_enabled):
"""
search_id = helpers.get_searchid()
cur = self.conn.cursor()
cur.execute("""INSERT INTO reporting (search_id
cur.execute("""INSERT INTO reporting (search_id,
domain,
start_time,
end_time,
modules_enabled,
modules_enabled_key,
emails_found,
emails_unique)
VALUES (?,?,?,?,?,?,?)""", (search_id, domain, helpers.get_datetime(),'',0,modules_enabled,0,0))
VALUES (?,?,?,?,?,?,?)""", (search_id, domain, helpers.get_datetime(),'',0,0,0))
cur.close()
return search_id

def update_reporting(self, emails_found, emails_unique):
def update_reporting(self, emails_found, emails_unique, emails_domain, search_id):
"""
update the reporting col
for the current task with the ending values
"""
cur = self.conn.cursor()
cur.execute("UPDATE reporting SET end_time = ?, emails_found = ?, emails_unique = ?, emails_domain = ? WHERE search_id=?", [helpers.get_datetime(), emails_found, emails_unique, emails_domain, search_id])
cur.close()


def _add_modules(self, search_id):
"""
builds the intial row for the modules
table during reporting row creation.
"""
f = False
cur = self.conn.cursor()
cur.execute("""INSERT INTO modules (search_id
"ask_search" ,
"canario_api" ,
"cannary_search" ,
"emailhunter_search" ,
"exaled_doc" ,
"exaled_docx" ,
"exaled_pdf" ,
"exaled_pptx" ,
"exaled_search" ,
"flickr_search" ,
"github_code" ,
"github_gist" ,
"github_user" ,
"google_csv" ,
"google_doc" ,
"google_docx" ,
"google_pdf" ,
"google_pptx" ,
"google_search" ,
"google_xlsx" ,
"html_scrape" ,
"oninstagram" ,
"pastebin_search" ,
"reddit_search" ,
"pgp_search" ,
"whois_api" ,
"whoisolgy_search" ,
"yahoo_search"
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (search_id,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f)
cur.close()
# def add_modules(self, search_id):
# """
# builds the intial row for the modules
# table during reporting row creation.
# """
# f = False
# cur = self.conn.cursor()
# cur.execute("""INSERT INTO modules (search_id
# "ask_search" ,
# "canario_api" ,
# "cannary_search" ,
# "emailhunter_search" ,
# "exaled_doc" ,
# "exaled_docx" ,
# "exaled_pdf" ,
# "exaled_pptx" ,
# "exaled_search" ,
# "flickr_search" ,
# "github_code" ,
# "github_gist" ,
# "github_user" ,
# "google_csv" ,
# "google_doc" ,
# "google_docx" ,
# "google_pdf" ,
# "google_pptx" ,
# "google_search" ,
# "google_xlsx" ,
# "html_scrape" ,
# "oninstagram" ,
# "pastebin_search" ,
# "reddit_search" ,
# "pgp_search" ,
# "whois_api" ,
# "whoisolgy_search" ,
# "yahoo_search"
# VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", (search_id,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f)
# cur.close()



Expand Down
12 changes: 12 additions & 0 deletions Helpers/helpers.py
Expand Up @@ -21,6 +21,18 @@ def dictToJson(inputDict):
obj += json.dumps(item)
return obj

def get_searchid():
currentDate = str(time.strftime("%d%m%Y"))
currentTime = str(time.strftime("%H%M%S"))
searchid = currentDate + currentTime
return searchid

def get_datetime():
currentDate = str(time.strftime("%d/%m/%Y"))
currentTime = str(time.strftime("%H:%M:%S"))
datetime = currentDate + ' ' +currentTime
return datetime

def JsonListToJsonObj(inputJsonList, domain):
"""
Takes a list of json objects,
Expand Down
Empty file added data/__init__.py
Empty file.
48 changes: 31 additions & 17 deletions setup/setup_db.py → setup/reset_db.py
Expand Up @@ -42,34 +42,32 @@
c.execute('''CREATE TABLE "email" (
"id" integer PRIMARY KEY,
"email_address" text,
"email_id" integer,
"domain" text,
"first_seen" text,
"last_seen" text,
"instances_seen" integer,
"sources" text,
"first_name" text,
"last_name" text,
"name_generated_email" boolean,
"email_verified" boolean,
"score" integer
)''')

# domains will be a json blob of data
# email_ids = email primary keys id
# email_count = email count of ids
# search_ids = search_ids correclated to the domain
c.execute('DROP TABLE IF EXISTS "domain"')
c.execute('''CREATE TABLE "domain" (
c.execute('DROP TABLE IF EXISTS "url"')
c.execute('''CREATE TABLE "url" (
"id" integer PRIMARY KEY,
"domain" text,
"email_ids" text,
"email_count" integer,
"urls" text,
"last_scrapped" text,
"email_id" integer,
"url" text,
"datetime" text
)''')

c.execute('DROP TABLE IF EXISTS "email_search_ids"')
c.execute('''CREATE TABLE "email_search_ids" (
"id" integer PRIMARY KEY,
"email_id" integer,
"search_ids" text,
"webmail" boolean,
"pattern" text,
"allows_verification" boolean
"datetime" text
)''')

# type = hash, plaintext, token
Expand All @@ -94,11 +92,27 @@
"start_time" text,
"end_time" text,
"emails_found" integer,
"modules_enabled" text,
"modules_enabled_key" integer,
"emails_unique" integer,
"emails_domain" integer,
"emails_domain" integer
)''')

# domains will be a json blob of data
c.execute('DROP TABLE IF EXISTS "domain"')
c.execute('''CREATE TABLE "domain" (
"id" integer PRIMARY KEY,
"domain" text,
"email_ids" text,
"email_count" integer,
"urls" text,
"last_scrapped" text,
"search_ids" text,
"webmail" boolean,
"pattern" text,
"allows_verification" boolean
)''')


# table for modules used during search
c.execute('DROP TABLE IF EXISTS "modules"')
c.execute('''CREATE TABLE "modules" (
Expand Down

0 comments on commit 9efd5c7

Please sign in to comment.