In [73]:
"""
identify users posting number of comments above a specific threshold any given hour of the day
"""

'\nidentify users posting number of comments above a specific threshold any given hour of the day\n'

In [113]:
import requests
from datetime import datetime, timedelta
import math
import time

In [75]:
class CustomError(Exception):
    """
    custom exception class
    """
    pass

In [131]:
def get_date_time(day_delta):
    """
    returns datetime object for start-of-day day_delta days from the current datetime
    """
    dtime = datetime.now()
    dtime = dtime.replace(minute = 0, hour = 0, second = 0, microsecond = 0)
    dtime = dtime + timedelta(days = day_delta)
    return dtime

In [133]:
allowed_channels = "ET,TOI"
min_doc_count = 30

def get_agg_bucket(start_date_time, end_date_time):
    """
    params - 
    start_date_time, end_date_time -> time range to query in a specific period (in milliseconds)
    return - 
    the aggregation bucket list of json with 'doc_count' and 'key' (as returned by elastic search native api)
    """
    
    base_agg_url = "http://commentmoderator.indiatimes.com/mytimes/elasticCommentQuery?appKey={}&sort=desc&from=0&size=0&aggField=F_ADD&aggSize=150&aggMinDocCount={}&sDateEpoch={:.0f}&eDateEpoch={:.0f}"
    url = base_agg_url.format(allowed_channels, min_doc_count, start_date_time, end_date_time)

    print(base_agg_url)
    bucket = None
    try:
        resp = requests.get(url)
        if (resp.status_code == 200):
            bucket = resp.json()['aggregations']['2']['buckets']
        else:
            raise CustomError('Aggregation API call failed : Status code : {}'.format(resp.status_code))
    except Exception as ex:
        template = "An exception of type {} occurred. Message: {}"
        #message = template.format(type(ex).__name__, ex.args)
        message = template.format(type(ex).__name__, ex)
        print (message)
    
    return bucket

def get_date_time_from_epoch(time_epoch):
    """
    returns time string formatted in the given format for the given epochs in milliseconds
    """
    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time_epoch/1000))

In [134]:
def get_list_faulty_users(datetime_obj):
    net_list_faulty_users = []
    n_hour_interval = 1
    n_total_hours = 24

    print('date considering : {}'.format(datetime_obj))
    #datetime(2019,5,8,0,0)
    start_date_time = datetime_obj.timestamp()*1000

    for hour_num in range(0, n_total_hours, n_hour_interval):
        #calculating end_date_time as n_hour_interval hours from the start_date_time
        end_date_time = start_date_time + n_hour_interval*3600*1000
        hour_duration = "{} - {}".format(get_date_time_from_epoch(start_date_time), get_date_time_from_epoch(end_date_time))

        print("duration : {}".format(hour_duration))

        for x in get_agg_bucket(start_date_time, end_date_time):
            x['duration'] = hour_duration
            print(x)
            net_list_faulty_users.append(x)
        start_date_time = end_date_time
    return net_list_faulty_users

In [135]:
net_list_faulty_users = get_list_faulty_users(get_date_time(-1))

date considering : 2019-06-17 00:00:00
duration : 2019-06-17 00:00:00 - 2019-06-17 01:00:00
http://commentmoderator.indiatimes.com/mytimes/elasticCommentQuery?appKey={}&sort=desc&from=0&size=0&aggField=F_ADD&aggSize=150&aggMinDocCount={}&sDateEpoch={:.0f}&eDateEpoch={:.0f}
duration : 2019-06-17 01:00:00 - 2019-06-17 02:00:00
http://commentmoderator.indiatimes.com/mytimes/elasticCommentQuery?appKey={}&sort=desc&from=0&size=0&aggField=F_ADD&aggSize=150&aggMinDocCount={}&sDateEpoch={:.0f}&eDateEpoch={:.0f}
duration : 2019-06-17 02:00:00 - 2019-06-17 03:00:00
http://commentmoderator.indiatimes.com/mytimes/elasticCommentQuery?appKey={}&sort=desc&from=0&size=0&aggField=F_ADD&aggSize=150&aggMinDocCount={}&sDateEpoch={:.0f}&eDateEpoch={:.0f}
duration : 2019-06-17 03:00:00 - 2019-06-17 04:00:00
http://commentmoderator.indiatimes.com/mytimes/elasticCommentQuery?appKey={}&sort=desc&from=0&size=0&aggField=F_ADD&aggSize=150&aggMinDocCount={}&sDateEpoch={:.0f}&eDateEpoch={:.0f}
duration : 2019-06-17

In [136]:
#sorting the list of users in reverse doc_count
net_list_faulty_users.sort(key = lambda x: x["doc_count"], reverse=True)

In [137]:
for x in net_list_faulty_users:
    print(x)

{'doc_count': 62, 'key': 'suriyabhai127@gmail.com', 'duration': '2019-06-17 06:00:00 - 2019-06-17 07:00:00'}
{'doc_count': 54, 'key': 'suriyabhai127@gmail.com', 'duration': '2019-06-17 07:00:00 - 2019-06-17 08:00:00'}
{'doc_count': 54, 'key': 'rama@perfectvips.com', 'duration': '2019-06-17 12:00:00 - 2019-06-17 13:00:00'}
{'doc_count': 49, 'key': 'dude33745@gmail.com', 'duration': '2019-06-17 23:00:00 - 2019-06-18 00:00:00'}
{'doc_count': 43, 'key': 'diligentgemini@gmail.com', 'duration': '2019-06-17 08:00:00 - 2019-06-17 09:00:00'}
{'doc_count': 43, 'key': 'suriyabhai127@gmail.com', 'duration': '2019-06-17 12:00:00 - 2019-06-17 13:00:00'}
{'doc_count': 30, 'key': 'myntracustomerc@gmail.com', 'duration': '2019-06-17 12:00:00 - 2019-06-17 13:00:00'}


In [150]:
def get_mail_body(list_faulty_users):    
    html = "Comment Rate Exceed User Alert<br>http://commentmoderator.indiatimes.com/<br><br>" \
           + "<table>" \
            + "<tr><td><b>Comment Count</b></td><td><b>Email Id</b></td><td><b>Duration</b></td></tr><tr></tr>"

    for x in list_faulty_users:
        html = html + "<tr><td>{}</td><td>{}</td><td>{}</td></tr>".format(x['doc_count'], x['key'], x['duration'])
    html = html + "</table>"
    return html

In [146]:
get_mail_body(net_list_faulty_users)

'Comment Rate Exceed User Alert<br>http://commentmoderator.indiatimes.com/<br><br><table><tr><td><b>Comment Count</b></td><td><b>Email Id</b></td><td><b>Duration</b></td></tr><tr></tr><tr><td>62</td><td>suriyabhai127@gmail.com</td><td>2019-06-17 06:00:00 - 2019-06-17 07:00:00</td></tr><tr><td>54</td><td>suriyabhai127@gmail.com</td><td>2019-06-17 07:00:00 - 2019-06-17 08:00:00</td></tr><tr><td>54</td><td>rama@perfectvips.com</td><td>2019-06-17 12:00:00 - 2019-06-17 13:00:00</td></tr><tr><td>49</td><td>dude33745@gmail.com</td><td>2019-06-17 23:00:00 - 2019-06-18 00:00:00</td></tr><tr><td>43</td><td>diligentgemini@gmail.com</td><td>2019-06-17 08:00:00 - 2019-06-17 09:00:00</td></tr><tr><td>43</td><td>suriyabhai127@gmail.com</td><td>2019-06-17 12:00:00 - 2019-06-17 13:00:00</td></tr><tr><td>30</td><td>myntracustomerc@gmail.com</td><td>2019-06-17 12:00:00 - 2019-06-17 13:00:00</td></tr></table>'

In [90]:
# try:
#     resp = requests.get(url)
#     if (resp.status_code == 200):
#         bucket = resp.json()['aggregations']['2']['buckets']
#     else:
#         error_msg = 'failure in aggregation API call : status code - {}'.format(resp.status_code)
# except requests.exceptions.Timeout:
#     error_msg = "Internal Api Error : connection timeout"
# except requests.exceptions.ConnectionError:
#     error_msg = "Internal Api Error : connection error"
# except requests.exceptions.RequestException as e:
#     logging.error('Exception in get_single_ms_by_api for query_word : {}, error: {}'.format(query_word, e))
#     error_msg = "Internal Api Error : request exception : {}".format(e)
# except JSONDecodeError as ex:
#     error_msg = 'unable to parse the aggregation API response as json : {}'.format(ex)
# except KeyError as key_error:
#     error_msg = 'failure in retrieving json key from response : {}'.format(key_error)

In [153]:
def get_date_time_hour(hour_delta):
    """
    returns datetime object for start-of-day day_delta days from the current datetime
    """
    dtime = datetime.now()
    dtime = dtime.replace(minute = 0, second = 0, microsecond = 0)
    dtime = dtime + timedelta(hours = hour_delta)
    return dtime

In [156]:
get_date_time_hour(-1)

datetime.datetime(2019, 6, 18, 16, 0)