In [1]:
# ambiente compartilhado
# !export AWS_DEFAULT_REGION=us-east-1
# CCR/Engie
# !export AWS_DEFAULT_REGION=sa-east-1

In [1]:
import boto3
import json
import base64
from botocore.config import Config
import awswrangler as wr
from tqdm.notebook import tqdm
from datetime import timedelta, datetime
from collections import defaultdict

In [2]:
boto3.setup_default_session(region_name="sa-east-1")

In [3]:
action_logs = []
for a in ActionLog.objects.filter(
    action="Login",
    user_agent__icontains="okhttp",
    created_at__gte="2024-04-23",
    user__username="viviane.ssilva"
).order_by("created_at"):
    action_logs.append({
        "username": a.user.username,
        "created_at": a.created_at.timestamp(),
    })



In [4]:
action_logs

[{'username': 'viviane.ssilva', 'created_at': 1713881853.630359},
 {'username': 'viviane.ssilva', 'created_at': 1713899993.843307},
 {'username': 'viviane.ssilva', 'created_at': 1713900202.705483},
 {'username': 'viviane.ssilva', 'created_at': 1713900288.073672},
 {'username': 'viviane.ssilva', 'created_at': 1713900340.75479},
 {'username': 'viviane.ssilva', 'created_at': 1713973155.444548},
 {'username': 'viviane.ssilva', 'created_at': 1713973236.718531},
 {'username': 'viviane.ssilva', 'created_at': 1713973271.742873},
 {'username': 'viviane.ssilva', 'created_at': 1713973285.458257},
 {'username': 'viviane.ssilva', 'created_at': 1713973301.79944},
 {'username': 'viviane.ssilva', 'created_at': 1713973312.166073},
 {'username': 'viviane.ssilva', 'created_at': 1713973325.403897},
 {'username': 'viviane.ssilva', 'created_at': 1713973338.128191},
 {'username': 'viviane.ssilva', 'created_at': 1713973352.235408},
 {'username': 'viviane.ssilva', 'created_at': 1713973363.354774},
 {'username'

In [5]:
token_groups = defaultdict(list)

for a in tqdm(action_logs):
    username = a['username']
    username_filter = base64.b64encode(
        ('","username":"' + username).encode('utf-8')
    )[:-4].decode("utf-8")
    
    # O timedelta hours=3 aqui é pra compensar diferença de fuso
    start_time = datetime.utcfromtimestamp(a['created_at']) - timedelta(seconds=30) - timedelta(hours=3)
    end_time = datetime.utcfromtimestamp(a['created_at']) + timedelta(minutes=30) - timedelta(hours=3)
    
    df = wr.cloudwatch.read_logs(
        log_group_names=["/aws/lambda/ccr-production-ccr-production"],
        start_time=start_time,
        end_time=end_time,
        query="""
            fields @timestamp, httpMethod, path, `headers.User-Agent`, `headers.Authorization`, `queryStringParameters.company`
            | sort @timestamp asc
            | filter headers.Authorization like "{}"
        """.format(username_filter),
        limit=2000
    )
    
    first_token = None
    last_method = None
    for row in df.to_dict('records'):
        if "okhttp" not in row['headers.User-Agent']:
            continue
        if "/Reporting/" in row['path'] and len(row['path']) > 20 and row['httpMethod'] == "GET":
            continue
        if row['path'] in ["/ApprovalTransition/", "/WmDBSync/"] and row['httpMethod'] == "GET":
            continue

        if row['ptr'] not in [b['ptr'] for b in token_groups[row['headers.Authorization']]]:
            token_groups[row['headers.Authorization']].append(row)

  0%|          | 0/39 [00:00<?, ?it/s]

In [7]:
ordered_stuff = []

for token, group in token_groups.items():
    group = sorted(group, key=lambda x: x['timestamp'])
    ordered_stuff.append((token, group))
    
ordered_stuff = sorted(ordered_stuff, key=lambda x: x[1][0]['timestamp'])

# groups = sorted(token_groups.values(), key=lambda x: len(x))
for token, group in ordered_stuff:
    token_obj = json.loads(base64.b64decode(token.split(" ")[-1].split(".")[1].encode("utf-8") + b"=="))
    username = token_obj['username']
    
    company_list = [a['queryStringParameters.company'] for a in group if type(a['queryStringParameters.company']) is str]

#     if "3af64f25-59e8-446b-bb1e-963549090b0d" not in company_list:
#         continue
    
#     if len(group) < 15:
#         continue
    
#     print("sincronização por {} iniciada em {}".format(token_obj['username'], group[0]['timestamp'].strftime("%d/%m/%Y %H:%M")))
#     group = sorted(group, key=lambda x: x['timestamp'])

    time_spent = group[-1]['timestamp'] - group[0]['timestamp']
    
    reporting_created = len([b for b in group if b['path'] == "/Reporting/" and b['httpMethod'] == "POST"])
    reporting_edited = len([b for b in group if "/Reporting/" in b['path'] and b['httpMethod'] == "PATCH"])

    files_created = len([b for b in group if b['path'] == "/ReportingFile/" and b['httpMethod'] == "POST"])
    files_edited = len([b for b in group if "/ReportingFile/" in b['path'] and b['httpMethod'] == "PATCH"])

    reporting_reqs = len([b for b in group if b['path'] == "/Reporting/" and b['httpMethod'] == "GET"])
    is_full = any([b for b in group if "Road/" in b['path']])
    
#     print("tempo: " + str(time_spent).replace("0 days ", ""))
#     print("apontamentos baixados - de {} a {}".format((reporting_reqs-1) * 300, reporting_reqs * 300))
#     print(reporting_created, files_created, reporting_edited)
#     print("Apontamentos criados: ", reporting_created)
#     print("Imagens criadas: ", files_created)
#     print("Apontamentos editados: ", reporting_edited)
#     print("Imagens editadas: ", files_edited)

#     for log in group:
#         print(log['timestamp'], log['httpMethod'], log['path'])
#     print()
#     print()

    print("\t".join([
        username,
        group[0]['timestamp'].strftime("%d/%m/%Y %H:%M"),
        str(time_spent).replace("0 days ", "").split(".")[0],
        "FULL" if is_full else "PARTIAL",
        str((reporting_reqs-1) * 300) if reporting_reqs else "300",
        str(reporting_created),
        str(reporting_edited),
        str(files_created)
    ]))
#     print(group[0]['path'])
#     if len(group) > 100:
#         for log in group:
#             print(log['timestamp'], log['httpMethod'], log['path'])
#         print()
#         print()

#     if username == "leandromonteirocosta":
#         print(files_created)
#         for log in group:
#             print(log['timestamp'], log['httpMethod'], log['path'])
#         print()
#         print()

In [8]:
username = "samuel.nunes"
username_filter = base64.b64encode(
    ('","username":"' + username).encode('utf-8')
)[:-4].decode("utf-8")

# O timedelta hours=3 aqui é pra compensar diferença de fuso
start_time = datetime.now() - timedelta(hours=100)
end_time = datetime.now()

df = wr.cloudwatch.read_logs(
    log_group_names=["/aws/lambda/roadlabsspotway-production"],
    start_time=start_time,
    end_time=end_time,
    query="""
        fields @timestamp, httpMethod, path, body, `headers.User-Agent`, `queryStringParameters.company`
        | sort @timestamp asc
        | filter headers.Authorization like "{}"
    """.format(username_filter),
    limit=2000
)

In [9]:
for row in df.to_dict('records'):
    print(row['httpMethod'], row['path'], row['timestamp'], base64.b64decode(row['body']))
    print()