# New content in hawc

Given two datetimes, see what content has changed

In [None]:
import django
django.setup()

In [None]:
%matplotlib inline

from datetime import datetime, timedelta
from django.utils.timezone import make_aware

import numpy as np
import pandas as pd

from hawc.apps.myuser.models import HAWCUser
from hawc.apps.study.models import Study
from hawc.apps.lit.models import Reference, Search
from hawc.apps.assessment.models import Assessment
from hawc.apps.animal.models import Experiment, Endpoint
from hawc.apps.summary.models import Visual, DataPivotQuery

In [None]:
def changes(Model, start, end):
    # find matching datetime attribute
    for field in ['created', 'date_joined']:
        if hasattr(Model, field):
            q = f'{field}__lte'
            break
    
    latest = Model.objects.filter(**{q: end}).count() 
    earliest = Model.objects.filter(**{q: start}).count()
    change = (latest-earliest)/earliest
    print(f"{Model.__name__}: {latest} total, {latest-earliest} ({change:.1%}) new between {start:%Y-%M-%d} and {start:%Y-%M-%d}")
    
    
start = make_aware(datetime.now() - timedelta(days=365))
end = make_aware(datetime.now())

changes(HAWCUser, start, end)
changes(Reference, start, end)
changes(Search, start, end)
changes(Study, start, end)
changes(Assessment, start, end)
changes(Experiment, start, end)
changes(Endpoint, start, end)
changes(Visual, start, end)
changes(DataPivotQuery, start, end)

In [None]:
def changes2(Models, start, end):
    timestamps = [
        make_aware(datetime.fromordinal(el)) for el in 
        np.linspace(start.toordinal(), end.toordinal(), 12).astype(int)
    ]
    data = []
    for Model in Models:
        # find matching datetime attribute
        for field in ['created', 'date_joined']:
            if hasattr(Model, field):
                q = f'{field}__lte'
                break
            
        for timestamp in timestamps:
            data.append([
                Model.__name__,
                timestamp,
                Model.objects.filter(**{q: timestamp}).count()
            ])
    return pd.DataFrame(data=data, columns="model timestamp count".split(" "))

models = [HAWCUser, Reference, Search, Study, Assessment, Experiment, Endpoint, Visual, DataPivotQuery]
df = changes2(models, start, end)

In [None]:
for m in df.model.unique(): 
    subset = df[df.model == m]
    subset.plot(x='timestamp', y='count', title=m, figsize=(8,4), ylim=(0, subset['count'].max()*1.1))