In [93]:

# parses the log file and returns a dictionary with the keys being the dates and the values being dictionaries
# containing users as key and a list of their sessions on that day as values
def parse_logfile(file):
    date_log = {}
    fh = open(file, "r")
    text = fh.readlines()
    fh.close()
    
    # adding data to date_log {date:[{user:{session}, ....]}
    for lines in text:
        items = lines.split()
        date = items[0]
        if date in date_log:
            if items[4] in date_log[date]: # if user is already in the date append sesion info
                date_log[date][items[4]].append({"time": items[1], "activity": items[2], "server": items[3]})
            else: # if date exists but user does not, create user
                date_log[date][items[4]] = [{"time": items[1], "activity": items[2], "server": items[3]}]
        else: # add date and user
            date_log[date] = {items[4]: [{"time": items[1], "activity": items[2], "server": items[3]}]}
    
    # sorting by date
    date_log_copy = date_log
    date_log = {}
    for date in sorted(date_log_copy):
        date_log[date] = date_log_copy[date]
        
    # sorting each member sessions by time
    for date in date_log:
        for user in date_log[date]:
            date_log[date][user] = sorted(date_log[date][user], key=lambda k: k['time']) 
    
    return date_log

def susp_activities(date_log):
    logins = 0
    susp_dict = {}
    for date in date_log:
        susp_dict[date] = {}
        for user in date_log[date]:
            logins = 0 # {date: user : [{}]}
            for session in date_log[date][user]:
                if session['activity'] == 'login':
                    logins += 1
                    if logins == 5:
                        susp_dict[date][user] = date_log[date][user]
                        break
                elif int(session['time'][:2]) >= 5 and int(session['time'][:2]) <= 12:
                        susp_dict[date][user] = date_log[date][user]
                        break
    return susp_dict

def irresp_behavior(date_log):
    logins = 0
    logouts = 0
    irresp_dict = {}
    for date in date_log:
        irresp_dict[date] = {}
        for user in date_log[date]:
            logins = 0
            logouts = 0
            for session in date_log[date][user]:
                if session['activity'] == 'login':
                    logins += 1
                else:
                    logouts += 1
            if logins > logouts:
                irresp_dict[date][user] = date_log[date][user]
    return irresp_dict

def system_glitch(date_log):
    logins = 0
    logouts = 0
    irresp_dict = {}
    for date in date_log:
        irresp_dict[date] = {}
        for user in date_log[date]:
            logins = 0
            logouts = 0
            for session in date_log[date][user]:
                if session['activity'] == 'login':
                    logins += 1
                else:
                    logouts += 1
            if logins < logouts:
                irresp_dict[date][user] = date_log[date][user]
    return irresp_dict
                    

#Seems to be a useful reoccuring pattern. May not be efficeient but works for now.
#for date in date_log:
#    for user in date_log[date]:
#        for session in date_log[date][user]: 

# returns all the domains and the users for each domain
def domain_count(file):
    domains = {}
    fh = open(file, "r")
    text = fh.readlines()
    fh.close()
    for lines in text:
        items = lines.split()
        user = items[4].split('@')[0]
        domain = items[4].split('@')[1]
        if domain in domains:
            domains[domain].add(user)
        else:
            domains[domain] = set()
            domains[domain].add(user)
    fh.close()
    return domains

def write_report(file_name, data, behavior):
    # refresh if making a new report with same name
    fh = open(file_name, "w")
    fh.write("")
    fh.close()
    fh = open(file_name, "a")
    total = 0
    user_strings = {}
    user_count = {}
    for date in data:
        total += len(data[date])
    fh.write("==============================\n") # need to use \n
    fh.write(f"=== {behavior} ({total} cases) ===\n")
    fh.write("==============================\n")
    count = 0
    for date in data:
        for user in data[date]:
            if user in user_count:
                user_count[user] = user_count[user] + 1
            else:
                user_count[user] = 1
            if user in user_strings:
                user_strings[user].append(f"\tDATE: [{date}]---\n")
                for session in data[date][user]:
                    user_strings[user].append(f"\t\t\t{session['time']}\t\t{session['activity']}\t\t{session['server']}\n")
            else:
                user_strings[user] = []
                user_strings[user].append(f"\tDATE: [{date}]---\n")
                for session in data[date][user]:
                    user_strings[user].append(f"\t\t\t{session['time']}\t\t{session['activity']}\t\t{session['server']}\n")
    for user in user_strings:
        fh.write(f"{user}\t\t\t{user_count[user]}\n")
        fh.write("".join(user_strings[user]))
    fh.close()

def write_domain_report(file_name, domains):
    fh = open(file_name, "w")
    fh.write("")
    fh.close()
    fh = open(file_name, "a")
    fh.write("==============================\n")
    fh.write(f"=== DOMAIN COUNT ({len(domains)} DOMAINS) ===\n")
    fh.write("==============================\n")
    for domain in domains:
        fh.write(f"{domain.rjust(20)}{str(len(domains[domain])).rjust(12)}\n") 
        
# additional insight, check if there is any users who exhibit normal behavior according to the already defined report issues
# returns true if atleast 1 person is normal, else false
def normal_behavior(data_log):
    users = set()
    susp = susp_activities(data_log)
    irresp = irresp_behavior(data_log)
    glitch = system_glitch(data_log)
    
    # make members list
    for date in date_log:
        for user in date_log[date]:
            if user not in users:
                users.add(user)
    
    # if user appears in any of these problem dictionaries, remove them from the set
    for date in susp:
        for user in susp[date]:
            if user in users:
                users.remove(user)      
    for date in irresp:
        for user in irresp[date]:
            if user in users:
                users.remove(user)
    for date in glitch:
        for user in glitch[date]:
            if user in users:
                users.remove(user)
    
    # write users to file
    user_list = [user for user in users] # list comprehension
    fh = open("normal_report.txt", "w")
    fh.write("==============================\n")
    fh.write(f"=== CHECK BEHAVIOR ({len(user_list)} CASES) ===\n")
    fh.write("==============================\n")
    for user in user_list:
        fh.write(f"{user}\n")
    fh.close()
    
    return True if len(user_list) > 0 else False # ternary operator

     
date_log = parse_logfile("userlog.log")

In [94]:
# checking if sorted by date
# for date in date_log:
#    print(date)

In [95]:
#checking if member sessions are sorted by time
# for date in date_log:
#    for user in date_log[date]:
#        for session in date_log[date][user]:
#            print(session['time'])
#        print(f"====={user}======")

In [96]:
# generating report files
write_report("suspicious_report.txt", susp_activities(date_log), "SUSPICIOUS ACTIVITIES")
write_report("irresponsible_report.txt", irresp_behavior(date_log), "IRRESPONSIBLE BEHAVIOR")
write_report("glitch_report.txt", system_glitch(date_log), "SYSTEM GLITCH")
write_domain_report("domain_report.txt", domain_count("userlog.log"))

In [97]:
normal_behavior(date_log) # checking if there are any users who do not exhibit any issues

False