# Imports

In [None]:
import datetime as dt

import matplotlib.pyplot as plt
import pymongo

# MongoDB Connection

In [None]:
user = 'user'
password = 'pass'
node_ip = '192.168.2.20'
node_port = '30017'
url = f'mongodb://{user}:{password}@{node_ip}:{node_port}/'
client = pymongo.MongoClient(url, serverSelectionTimeoutMS = 2000)
client.server_info()

In [None]:
print('Databases:', client.list_database_names())
db = client['test']
print('Collections:', db.list_collection_names())
collection = db['test']
print('First Document:', str(collection.find_one())[:100])

# Get Data

In [None]:
hours = 0.1
offset = 0

datetime_utcnow = dt.datetime.utcnow() - dt.timedelta(hours=offset)
datetime_start = datetime_utcnow - dt.timedelta(hours=hours) - dt.timedelta(hours=offset)

In [None]:
query = {
    'metadata.request_datetime_utc': {
        '$gte': datetime_start,
        '$lt': datetime_utcnow
    }
}

In [None]:
everything = []
for x in collection.find(query):
    everything.append(x)
len(everything)

# Plot Data

In [None]:
X_error, X_ok = [], []

for x in everything:
    request_datetime_utc = x['metadata']['request_datetime_utc']
    if x['result'] is None:
        X_error.append(request_datetime_utc)
    else:
        X_ok.append(request_datetime_utc)

In [None]:
X_has_invalid, X_all_valid = [], []

for x in everything:
    request_datetime_utc = x['metadata']['request_datetime_utc']
    if x['result'] is not None:
        num_invalid = x['result']['products_invalid'] if 'products_invalid' in x['result'] else 0
        if num_invalid > 0:
            X_has_invalid.append(request_datetime_utc)
        else:
            X_all_valid.append(request_datetime_utc)    

In [None]:
X_has_found, X_has_available = [], []

for x in everything:
    request_datetime_utc = x['metadata']['request_datetime_utc']
    if x['result'] is not None:
        num_found = x['result']['products_found']
        num_available = x['result']['products_available']
        
        if num_found > 0:
            X_has_found.append(request_datetime_utc)
        if num_available > 0:
            X_has_available.append(request_datetime_utc)    

In [None]:
_, ax = plt.subplots(figsize=(40,0.5))

ax.scatter(X_has_found, [0]*len(X_has_found), color='lightgray')
ax.scatter(X_has_available, [0]*len(X_has_available), color='green')

ax.scatter(X_all_valid, [-1]*len(X_all_valid), color='lightgray')
ax.scatter(X_has_invalid, [-1]*len(X_has_invalid), color='orange')

ax.scatter(X_ok, [-2]*len(X_ok), color='lightgray')
ax.scatter(X_error, [-2]*len(X_error), color='red')

ax.set_ylim((-2.5, 0.5))
ax.set_xlim((datetime_start, datetime_utcnow))

plt.show()

In [None]:
X, Y_num_found, Y_num_invalid, Y_num_available = [], [], [], []

for x in everything:
    request_datetime_utc = x['metadata']['request_datetime_utc']
    if x['result'] is not None:
        num_found = x['result']['products_found']
        num_valid = x['result']['products_valid'] if 'products_valid' in x['result'] else num_found
        num_invalid = x['result']['products_invalid'] if 'products_invalid' in x['result'] else 0
        assert num_valid + num_invalid == num_found
        num_available = x['result']['products_available']
        
        X.append(request_datetime_utc)
        Y_num_found.append(num_found)
        Y_num_invalid.append(num_invalid)
        Y_num_available.append(num_available)        

In [None]:
_, ax = plt.subplots(figsize=(40,4))
ax.scatter(X, Y_num_found, color='black', marker='_')
ax.scatter(X, Y_num_invalid, color='orange', marker='_')
ax.scatter(X, Y_num_available, color='green', marker='_')
plt.show()

# Check Error

In [None]:
sum(p == True for p in [True, True, False, None])

In [None]:
query = {
    'metadata.error_report': {
        '$exists': True
    }
}

In [None]:
everything = []
for x in collection.find(query):
    everything.append(x)
len(everything)

In [None]:
with open('page.html', 'w') as f:
    f.write(x['metadata']['listing_pages'][0])