In [10]:
!export AWS_DEFAULT_REGION=us-east-1

In [11]:
import boto3
import json
import base64
from botocore.config import Config
import awswrangler as wr
from tqdm.notebook import tqdm
from datetime import timedelta, datetime
from collections import defaultdict

In [12]:
boto3.setup_default_session(region_name="us-east-1")

In [19]:
action_logs = []
for a in ActionLog.objects.filter(
    action="Login",
    user_agent__icontains="okhttp",
    created_at__gte="2023-12-01",
    user__username='edgar.silva'
# ).exclude(
#     user__username="marcos.hollerweger.ccr"
).exclude(
    user__username="luan.poletti.ccr"
).exclude(
    user__username="marina.carpes.ccr"
).order_by("created_at"):
    action_logs.append({
        "username": a.user.username,
        "created_at": a.created_at.timestamp(),
    })

In [18]:
action_logs

[{'username': 'edgar.silva', 'created_at': '01/12/2023'},
 {'username': 'edgar.silva', 'created_at': '01/12/2023'},
 {'username': 'edgar.silva', 'created_at': '01/12/2023'},
 {'username': 'edgar.silva', 'created_at': '01/12/2023'},
 {'username': 'edgar.silva', 'created_at': '04/12/2023'},
 {'username': 'edgar.silva', 'created_at': '07/12/2023'},
 {'username': 'edgar.silva', 'created_at': '07/12/2023'}]

In [23]:
token_groups = defaultdict(list)

for a in tqdm(action_logs):
    username = a['username']
    username_filter = base64.b64encode(
        ('","username":"' + username).encode('utf-8')
    )[:-4].decode("utf-8")
    
    # O timedelta hours=3 aqui é pra compensar diferença de fuso
    start_time = datetime.utcfromtimestamp(a['created_at']) - timedelta(seconds=30) - timedelta(hours=3)
    end_time = datetime.utcfromtimestamp(a['created_at']) + timedelta(minutes=30) - timedelta(hours=3)
    
    df = wr.cloudwatch.read_logs(
        log_group_names=["/aws/lambda/roadlabsspotway-production"],
        start_time=start_time,
        end_time=end_time,
        query="""
            fields @timestamp, httpMethod, path, `headers.User-Agent`, `headers.Authorization`, `queryStringParameters.company`
            | sort @timestamp asc
            | filter headers.Authorization like "{}"
        """.format(username_filter),
        limit=2000
    )
    
    first_token = None
    last_method = None
    for row in df.to_dict('records'):
        if "okhttp" not in row['headers.User-Agent']:
            continue
        if "/Reporting/" in row['path'] and len(row['path']) > 20 and row['httpMethod'] == "GET":
            continue
        if row['path'] in ["/ApprovalTransition/", "/WmDBSync/"] and row['httpMethod'] == "GET":
            continue

        if row['ptr'] not in [b['ptr'] for b in token_groups[row['headers.Authorization']]]:
            token_groups[row['headers.Authorization']].append(row)

  0%|          | 0/7 [00:00<?, ?it/s]

AccessDeniedException: An error occurred (AccessDeniedException) when calling the StartQuery operation: User with accountId: 608592777334 is not authorized to perform StartQuery on resources /aws/lambda/roadlabsspotway-production.

In [10]:
ordered_stuff = []

for token, group in token_groups.items():
    group = sorted(group, key=lambda x: x['timestamp'])
    ordered_stuff.append((token, group))
    
ordered_stuff = sorted(ordered_stuff, key=lambda x: x[1][0]['timestamp'])

# groups = sorted(token_groups.values(), key=lambda x: len(x))
for token, group in ordered_stuff:
    token_obj = json.loads(base64.b64decode(token.split(" ")[-1].split(".")[1].encode("utf-8") + b"=="))
    username = token_obj['username']
    
    company_list = [a['queryStringParameters.company'] for a in group if type(a['queryStringParameters.company']) is str]

#     if "3af64f25-59e8-446b-bb1e-963549090b0d" not in company_list:
#         continue
    
#     if len(group) < 15:
#         continue
    
#     print("sincronização por {} iniciada em {}".format(token_obj['username'], group[0]['timestamp'].strftime("%d/%m/%Y %H:%M")))
#     group = sorted(group, key=lambda x: x['timestamp'])

    time_spent = group[-1]['timestamp'] - group[0]['timestamp']
    
    reporting_created = len([b for b in group if b['path'] == "/Reporting/" and b['httpMethod'] == "POST"])
    reporting_edited = len([b for b in group if "/Reporting/" in b['path'] and b['httpMethod'] == "PATCH"])

    files_created = len([b for b in group if b['path'] == "/ReportingFile/" and b['httpMethod'] == "POST"])
    files_edited = len([b for b in group if "/ReportingFile/" in b['path'] and b['httpMethod'] == "PATCH"])

    reporting_reqs = len([b for b in group if b['path'] == "/Reporting/" and b['httpMethod'] == "GET"])
    is_full = any([b for b in group if "Road/" in b['path']])
    
#     print("tempo: " + str(time_spent).replace("0 days ", ""))
#     print("apontamentos baixados - de {} a {}".format((reporting_reqs-1) * 300, reporting_reqs * 300))
#     print(reporting_created, files_created, reporting_edited)
#     print("Apontamentos criados: ", reporting_created)
#     print("Imagens criadas: ", files_created)
#     print("Apontamentos editados: ", reporting_edited)
#     print("Imagens editadas: ", files_edited)

#     for log in group:
#         print(log['timestamp'], log['httpMethod'], log['path'])
#     print()
#     print()

    print("\t".join([
        username,
        group[0]['timestamp'].strftime("%d/%m/%Y %H:%M"),
        str(time_spent).replace("0 days ", "").split(".")[0],
        "FULL" if is_full else "PARTIAL",
        str((reporting_reqs-1) * 300) if reporting_reqs else "300",
        str(reporting_created),
        str(reporting_edited),
        str(files_created)
    ]))
#     print(group[0]['path'])
#     if len(group) > 100:
#         for log in group:
#             print(log['timestamp'], log['httpMethod'], log['path'])
#         print()
#         print()

#     if username == "leandromonteirocosta":
#         print(files_created)
#         for log in group:
#             print(log['timestamp'], log['httpMethod'], log['path'])
#         print()
#         print()

marcos.hollerweger.ccr	13/12/2023 21:03	00:01:40	FULL	3300	0	0	0
marcos.hollerweger.ccr	13/12/2023 21:06	00:01:35	FULL	3300	0	0	0
marcos.hollerweger.ccr	13/12/2023 21:08	00:01:40	FULL	3300	0	0	0
marcos.hollerweger.ccr	13/12/2023 21:28	00:01:40	FULL	3300	0	0	0
marcos.hollerweger.ccr	13/12/2023 21:30	00:00:22	FULL	0	0	0	0
fabio.boton	14/12/2023 03:11	00:00:21	FULL	300	0	0	0
fabio.boton	14/12/2023 03:12	00:00:19	FULL	300	0	0	0
fabio.boton	14/12/2023 03:14	00:00:23	FULL	600	0	0	0
thiago.susuki@grupoccr.com.br	14/12/2023 11:37	00:00:31	FULL	0	0	0	0
thiago.susuki@grupoccr.com.br	14/12/2023 11:45	00:00:01	PARTIAL	300	0	0	0
thiago.susuki@grupoccr.com.br	14/12/2023 19:58	00:00:21	FULL	0	0	0	0
