Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removes some redundant code from nicktracker for slack and scummvm #266

Merged
merged 1 commit into from Feb 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
136 changes: 26 additions & 110 deletions lib/scummvm/nickTracker.py
Expand Up @@ -3,7 +3,8 @@
import lib.util as util
from datetime import date

def nick_tracker(log_dict, track_users_on_channels = False):

def nick_tracker(log_dict):
"""
Tracks all nicks and the identifies nicks which point to same user

Expand All @@ -16,143 +17,58 @@ def nick_tracker(log_dict, track_users_on_channels = False):

"""
nicks = [] # list of all the nicknames
nick_same_list = [[] for i in range(config.MAX_EXPECTED_DIFF_NICKS)]
nick_channel_dict = []
channels_for_user = []
nicks_hash = []
channels_hash = []

#Getting all the nicknames in a list
nick_same_list = [[] for i in xrange(config.MAX_EXPECTED_DIFF_NICKS)]

def nick_append(nick, nicks, nicks_today_on_this_channel, track_users_on_channels):
if track_users_on_channels and (nick not in nicks_today_on_this_channel):
nicks_today_on_this_channel.append(nick) #not nicks as there are same nicks spread across multiple channels
nicks.append(nick)
elif nick not in nicks:
# Getting all the nicknames in a list
def nick_append(nick, nicks):
if nick not in nicks:
nicks.append(nick)
return nicks, nicks_today_on_this_channel

return nicks

for day_content_all_channels in log_dict.values():
#traverse over data of different channels for that day

channels_for_user_day = {}#empty for next day usage

# traverse over data of different channels for that day
for day_content in day_content_all_channels:

day_log = day_content["log_data"]
channel_name = day_content["auxiliary_data"]["channel"]
nicks_today_on_this_channel = []
day_logs = day_content["log_data"]

for i in day_log:
for day_log in day_logs:
# use regex to get the string between <> and appended it to the nicks list
if(util.check_if_msg_line (i)):
m = re.search(r"\<(.*?)\>", i)
if (util.check_if_msg_line(day_log)):
m = re.search(r"\<(.*?)\>", day_log)
nick = util.correctLastCharCR(m.group(0)[1:-1])
nicks, nicks_today_on_this_channel = nick_append(nick, nicks, nicks_today_on_this_channel, track_users_on_channels)
nicks = nick_append(nick, nicks)

''' Forming list of lists for avoiding nickname duplicacy '''
for line in day_log:
if("Nick change:" in line):
for line in day_logs:
if ("Nick change:" in line):
old_nick = line.split()[3]
new_nick = line.split()[5]
nicks, nicks_today_on_this_channel = nick_append(old_nick, nicks, nicks_today_on_this_channel, track_users_on_channels)
nicks, nicks_today_on_this_channel = nick_append(new_nick, nicks, nicks_today_on_this_channel, track_users_on_channels)

#nicks.append(new_nick)
for i in range(config.MAX_EXPECTED_DIFF_NICKS):
nicks = nick_append(old_nick, nicks)
nicks = nick_append(new_nick, nicks)

for i in xrange(config.MAX_EXPECTED_DIFF_NICKS):
if old_nick in nick_same_list[i] or new_nick in nick_same_list[i]:
if old_nick not in nick_same_list[i]:
nick_same_list[i].append(old_nick)
if new_nick not in nick_same_list[i]:
nick_same_list[i].append(new_nick)
break
if not nick_same_list[i]:
if old_nick not in nick_same_list[i]:
nick_same_list[i].append(old_nick)
if new_nick not in nick_same_list[i]:
nick_same_list[i].append(new_nick)
nick_same_list[i].append(old_nick)
nick_same_list[i].append(new_nick)
break

if track_users_on_channels:
'''
Creating list of dictionaries nick_channel_dict of the format :
[{'nickname':'rohan', 'channels':['[#abc', 0],['#bcd', 0]]},{}]
'''
considered_nicks = []
if config.DEBUGGER:
print "Analysis on", (str(day_content["auxiliary_data"]["day"]) + "-" + str(day_content["auxiliary_data"]["month"])), channel_name

for user in nicks_today_on_this_channel:
f = 1
for nick_tuple in nick_same_list:
if user in nick_tuple:
user_nick = nick_tuple[0]
f = 0
break
if f:
user_nick = user

'''for channels of user on a day'''
if channels_for_user_day.has_key(user_nick) and channel_name not in channels_for_user_day[user_nick]:
channels_for_user_day[user_nick].append(channel_name)
else:
channels_for_user_day[user_nick] = [channel_name]

flag = 1
for dictionary in nick_channel_dict:
if dictionary['nickname'] == user_nick and user_nick not in considered_nicks:
index = searchChannel(channel_name, dictionary['channels'])
if index == -1:
dictionary['channels'].append([channel_name,1])
else:
dictionary['channels'][index][1]+=1
flag = 0
considered_nicks.append(user_nick)
break
if flag:
nick_channel_dict.append({'nickname':user_nick, 'channels': [[channel_name, 1]]})
considered_nicks.append(user_nick)

channels_for_user.append(channels_for_user_day)


for nick in nicks:
for index in range(config.MAX_EXPECTED_DIFF_NICKS):
for index in xrange(config.MAX_EXPECTED_DIFF_NICKS):
if nick in nick_same_list[index]:
break
if not nick_same_list[index]:
nick_same_list[index].append(nick)
break

if config.DEBUGGER:
print "========> 30 on " + str(len(nicks)) + " nicks"
print "========> 30 on {} nicks".format(len(nicks))
print nicks[:30]
print "========> 30 on " + str(len(nick_same_list)) + " nick_same_list"
print "========> 30 on {} nick_same_list".format(len(nick_same_list))
print nick_same_list[:30]

if not track_users_on_channels:
return [nicks, nick_same_list]

else:
for dicts in nick_channel_dict:
nick = dicts['nickname']
if nick not in nicks_hash:
nicks_hash.append(nick)

for channel in dicts['channels']:
if channel[0] not in channels_hash:
channels_hash.append(channel[0])

return [nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash]


def searchChannel(channel, channel_list):
ans = -1
i = 0
for c_tuple in channel_list:
if c_tuple[0] == channel:
ans = i
break
i += 1
return ans
return [nicks, nick_same_list]
131 changes: 23 additions & 108 deletions lib/slack/nickTracker.py
@@ -1,9 +1,9 @@
import re
import lib.slack.config as config
import lib.slack.util as util
from datetime import date

def nick_tracker(log_dict, track_users_on_channels = False):

def nick_tracker(log_dict):
"""
Tracks all nicks and the identifies nicks which point to same user

Expand All @@ -16,143 +16,58 @@ def nick_tracker(log_dict, track_users_on_channels = False):

"""
nicks = [] # list of all the nicknames
nick_same_list = [[] for i in range(config.MAX_EXPECTED_DIFF_NICKS)]
nick_channel_dict = []
channels_for_user = []
nicks_hash = []
channels_hash = []

#Getting all the nicknames in a list
nick_same_list = [[] for i in xrange(config.MAX_EXPECTED_DIFF_NICKS)]

def nick_append(nick, nicks, nicks_today_on_this_channel, track_users_on_channels):
if track_users_on_channels and (nick not in nicks_today_on_this_channel):
nicks_today_on_this_channel.append(nick) #not nicks as there are same nicks spread across multiple channels
nicks.append(nick)
elif nick not in nicks:
# Getting all the nicknames in a list
def nick_append(nick, nicks):
if nick not in nicks:
nicks.append(nick)
return nicks, nicks_today_on_this_channel

return nicks

for day_content_all_channels in log_dict.values():
#traverse over data of different channels for that day

channels_for_user_day = {}#empty for next day usage

# traverse over data of different channels for that day
for day_content in day_content_all_channels:

day_log = day_content["log_data"]
channel_name = day_content["auxiliary_data"]["channel"]
nicks_today_on_this_channel = []
day_logs = day_content["log_data"]

for i in day_log:
for day_log in day_logs:
# use regex to get the string between <> and appended it to the nicks list
if(util.check_if_msg_line (i)):
m = re.search(r"\<(.*?)\>", i)
if(util.check_if_msg_line (day_log)):
m = re.search(r"\<(.*?)\>", day_log)
nick = util.correctLastCharCR(m.group(0)[1:-1])
nicks, nicks_today_on_this_channel = nick_append(nick, nicks, nicks_today_on_this_channel, track_users_on_channels)
nicks = nick_append(nick, nicks)

''' Forming list of lists for avoiding nickname duplicacy '''
for line in day_log:
for line in day_logs:
if("Nick change:" in line):
old_nick = line.split()[3]
new_nick = line.split()[5]
nicks, nicks_today_on_this_channel = nick_append(old_nick, nicks, nicks_today_on_this_channel, track_users_on_channels)
nicks, nicks_today_on_this_channel = nick_append(new_nick, nicks, nicks_today_on_this_channel, track_users_on_channels)
nicks = nick_append(old_nick, nicks)
nicks = nick_append(new_nick, nicks)

#nicks.append(new_nick)
for i in range(config.MAX_EXPECTED_DIFF_NICKS):
for i in xrange(config.MAX_EXPECTED_DIFF_NICKS):
if old_nick in nick_same_list[i] or new_nick in nick_same_list[i]:
if old_nick not in nick_same_list[i]:
nick_same_list[i].append(old_nick)
if new_nick not in nick_same_list[i]:
nick_same_list[i].append(new_nick)
break
if not nick_same_list[i]:
if old_nick not in nick_same_list[i]:
nick_same_list[i].append(old_nick)
if new_nick not in nick_same_list[i]:
nick_same_list[i].append(new_nick)
nick_same_list[i].append(old_nick)
nick_same_list[i].append(new_nick)
break

if track_users_on_channels:
'''
Creating list of dictionaries nick_channel_dict of the format :
[{'nickname':'rohan', 'channels':['[#abc', 0],['#bcd', 0]]},{}]
'''
considered_nicks = []
if config.DEBUGGER:
print "Analysis on", (str(day_content["auxiliary_data"]["day"]) + "-" + str(day_content["auxiliary_data"]["month"])), channel_name

for user in nicks_today_on_this_channel:
f = 1
for nick_tuple in nick_same_list:
if user in nick_tuple:
user_nick = nick_tuple[0]
f = 0
break
if f:
user_nick = user

'''for channels of user on a day'''
if channels_for_user_day.has_key(user_nick) and channel_name not in channels_for_user_day[user_nick]:
channels_for_user_day[user_nick].append(channel_name)
else:
channels_for_user_day[user_nick] = [channel_name]

flag = 1
for dictionary in nick_channel_dict:
if dictionary['nickname'] == user_nick and user_nick not in considered_nicks:
index = searchChannel(channel_name, dictionary['channels'])
if index == -1:
dictionary['channels'].append([channel_name,1])
else:
dictionary['channels'][index][1]+=1
flag = 0
considered_nicks.append(user_nick)
break
if flag:
nick_channel_dict.append({'nickname':user_nick, 'channels': [[channel_name, 1]]})
considered_nicks.append(user_nick)

channels_for_user.append(channels_for_user_day)


for nick in nicks:
for index in range(config.MAX_EXPECTED_DIFF_NICKS):
for index in xrange(config.MAX_EXPECTED_DIFF_NICKS):
if nick in nick_same_list[index]:
break
if not nick_same_list[index]:
nick_same_list[index].append(nick)
break

if config.DEBUGGER:
print "========> 30 on " + str(len(nicks)) + " nicks"
print "========> 30 on {} nicks".format(len(nicks))
print nicks[:30]
print "========> 30 on " + str(len(nick_same_list)) + " nick_same_list"
print "========> 30 on {} nick_same_list".format(len(nick_same_list))
print nick_same_list[:30]

if not track_users_on_channels:
return [nicks, nick_same_list]

else:
for dicts in nick_channel_dict:
nick = dicts['nickname']
if nick not in nicks_hash:
nicks_hash.append(nick)

for channel in dicts['channels']:
if channel[0] not in channels_hash:
channels_hash.append(channel[0])

return [nicks, nick_same_list, channels_for_user, nick_channel_dict, nicks_hash, channels_hash]


def searchChannel(channel, channel_list):
ans = -1
i = 0
for c_tuple in channel_list:
if c_tuple[0] == channel:
ans = i
break
i += 1
return ans
return [nicks, nick_same_list]
2 changes: 1 addition & 1 deletion scummvm.py
Expand Up @@ -182,7 +182,7 @@
starting_date = date[0]
ending_date = date[1]
log_data = reader.linux_input(log_directory, channel_name, starting_date, ending_date)
nicks, nick_same_list = nickTracker.nick_tracker(log_data, False)
nicks, nick_same_list = nickTracker.nick_tracker(log_data)
for cutoff in cut_offs:
print("dynamic community analysis for", starting_date, "with cutoff=", cutoff,
"started at: ", datetime.datetime.now(), file=exec_times_file)
Expand Down
2 changes: 1 addition & 1 deletion slack.py
Expand Up @@ -185,7 +185,7 @@
starting_date = date[0]
ending_date = date[1]
log_data = reader.linux_input_slack(log_directory, starting_date, ending_date)
nicks, nick_same_list = nickTracker.nick_tracker(log_data, False)
nicks, nick_same_list = nickTracker.nick_tracker(log_data)
for cutoff in cut_offs:
print("dynamic community analysis for", starting_date, "with cutoff=", cutoff,
"started at: ", datetime.datetime.now(), file=exec_times_file)
Expand Down