## Usage

Run the `report_historic_ocurrences` to either create or update OcurrenceReports for the end of every day since Aug 8 2020 til today.

In [19]:
import arrow
from datetime import datetime
from typing import List

from apps.patterns.git import repo_at
from apps.patterns.models import OcurrenceReport
from apps.patterns.analyzer import analyze_repo_with_pattern

def to_arrow(in_datetime: datetime, tz: str = "America/Santiago") -> arrow.Arrow:
    return arrow.Arrow(
        year=in_datetime.year,
        month=in_datetime.month,
        day=in_datetime.day,
        hour=in_datetime.hour,
        minute=in_datetime.minute,
        second=in_datetime.second,
        tzinfo=tz
    )


def get_end_of_day(arrow_date, days_ago=1, tz="America/Santiago"):
    return arrow_date.shift(days=-days_ago).replace(hour=23, minute=59, second=59, tzinfo=tz)

def report_historic_ocurrences(study_app: str, ignore_paths: List[str] = []):
    now = arrow.now()
    max_datetime = arrow.Arrow(year=2020, month=8, day=15, hour=0, mimute=0, second=0)
    days_left_to_analyze = (now - max_datetime).days
    last_of_days_commits = []
    with repo_at(commit=None, force_delete=False, always_pull=True) as repo:
        current_day = 1
        skip_commits = 0
        max_page = 1000
        commits = repo.iter_commits("devel", max_count=max_page, skip=skip_commits)
        while current_day != days_left_to_analyze:
            critic_moment = get_end_of_day(now, days_ago=current_day)

            last_day_commit_found = False
            while not last_day_commit_found:
                try:
                    commit = next(commits)
                except StopIteration:
                    skip_commits += max_page
                    commits = repo.iter_commits("devel", max_count=max_page, skip=skip_commits)
                    commit = next(commits)

                commit_datetime = to_arrow(commit.committed_datetime)
                if commit_datetime < critic_moment:
                    last_of_days_commits.append(commit)
                    print(f"added new study commit for {commit_datetime}")
                    last_day_commit_found = True

            current_day += 1

    for commit in last_of_days_commits:
        try:
            report = OcurrenceReport.objects.filter(commit=commit.hexsha, app_name=study_app).get()
            report.last_of_day = True
            report.save()
            print(f"Found stored commit {commit.hexsha}")
        except OcurrenceReport.DoesNotExist:
            print(f"Moving repo and analyzing {commit.hexsha}")
            with repo_at(commit=commit.hexsha, force_delete=False, always_pull=True) as repo:
                hits = analyze_repo_with_pattern(pattern=f"apps.{study_app}.models", ignore_paths=ignore_paths)
                payload = dict(
                    app_name=study_app,
                    ocurrences=hits,
                    commit=commit.hexsha,
                    commited_epoch=to_arrow(commit.committed_datetime).timestamp,
                    last_of_day=True
                )
                OcurrenceReport.objects.create(**payload)

                
## Study Catalog
report_historic_ocurrences(study_app="catalog", ignore_paths=["apps/catalog", "apps/promotions", "apps/stores"])

## Study Orders
#report_historic_ocurrences(study_app="orders", ignore_paths=["apps/orders"])

added new study commit for 2020-10-21T21:42:52-03:00
added new study commit for 2020-10-20T21:51:24-03:00
added new study commit for 2020-10-19T21:01:03-03:00
added new study commit for 2020-10-18T22:34:03-03:00
added new study commit for 2020-10-17T22:59:32-03:00
added new study commit for 2020-10-16T23:18:17-03:00
added new study commit for 2020-10-15T23:59:24-03:00
added new study commit for 2020-10-14T23:15:23-03:00
added new study commit for 2020-10-13T23:50:56-03:00
added new study commit for 2020-10-12T21:54:01-03:00
added new study commit for 2020-10-11T14:35:31-03:00
added new study commit for 2020-10-10T20:26:11-03:00
added new study commit for 2020-10-09T21:38:30-03:00
added new study commit for 2020-10-08T23:41:37-03:00
added new study commit for 2020-10-07T23:06:39-03:00
added new study commit for 2020-10-06T23:26:42-03:00
added new study commit for 2020-10-05T21:42:01-03:00
added new study commit for 2020-10-04T10:27:35-03:00
added new study commit for 2020-10-03T13:27:49

In [23]:
import arrow
all_reports = OcurrenceReport.objects.filter(last_of_day=True, app_name="catalog").order_by('commited_epoch').values_list('_total', 'commited_epoch')
for tot, epoch in all_reports:
    print(arrow.get(epoch).format('YYYY-MM-DD'))
for tot, epoch in all_reports:
    print(tot)

2020-08-17
2020-08-18
2020-08-19
2020-08-20
2020-08-21
2020-08-22
2020-08-23
2020-08-24
2020-08-25
2020-08-26
2020-08-27
2020-08-28
2020-08-29
2020-08-31
2020-09-01
2020-09-02
2020-09-03
2020-09-04
2020-09-05
2020-09-07
2020-09-08
2020-09-09
2020-09-10
2020-09-11
2020-09-12
2020-09-13
2020-09-15
2020-09-16
2020-09-17
2020-09-18
2020-09-20
2020-09-21
2020-09-22
2020-09-23
2020-09-24
2020-09-25
2020-09-26
2020-09-27
2020-09-29
2020-09-30
2020-10-01
2020-10-02
2020-10-03
2020-10-04
2020-10-06
2020-10-07
2020-10-08
2020-10-09
2020-10-10
2020-10-11
2020-10-13
2020-10-14
2020-10-15
2020-10-16
2020-10-17
2020-10-18
2020-10-19
2020-10-20
2020-10-21
2020-10-22
372
374
374
374
377
377
377
377
378
377
378
377
378
379
376
376
376
377
384
382
381
372
384
372
371
371
363
359
356
356
356
356
353
347
346
347
348
348
347
338
337
336
336
336
334
334
315
315
317
317
317
312
305
312
309
308
309
305
301
307


In [22]:
from apps.patterns.models import OcurrenceReport
rep = OcurrenceReport.objects.filter(last_of_day=True, app_name="catalog").order_by("-commited_epoch").values_list('_total', 'commited_epoch').first()
#rep.total

print(rep)
print(arrow.get(rep[1]).format('YYYY-MM-DD'))
print(rep[0])

(307, 1603327372)
2020-10-22
307


# Clean ocurrences

Sometimes more than one OcurrenceReport with last_of_day=True is created for the same day. We should only keep the "latest" one

In [21]:
from apps.patterns.models import OcurrenceReport


reports = OcurrenceReport.objects.filter(last_of_day=True, app_name="catalog")
acc = {}

for report in reports:
    report_date = arrow.get(report.commited_epoch).date()
    if report_date in acc:
        element = acc[report_date]
        element.append((report.id, report.commited_epoch))
        acc[report_date] = element
    else:
        acc[report_date] = [(report.id, report.commited_epoch)]

ids_to_delete = []
for date, metadata in acc.items():
    if len(metadata) > 1:
        ordered_metadata = sorted(metadata, key=lambda x: x[1], reverse=True)
        all_but_first = ordered_metadata[1:]
        ids_to_delete.extend(list(map(lambda x: x[0], all_but_first)))

print(ids_to_delete)
OcurrenceReport.objects.filter(id__in=ids_to_delete).delete()

[824, 802, 809, 816, 829, 836, 843]


(7, {'patterns.OcurrenceReport': 7})