# Import libraries 
matplotlib, cartopy, numpy, pandas and ffmpeg are used

In [None]:
from datetime import datetime, timedelta
from cartopy.feature.nightshade import Nightshade
from matplotlib.image import imread
from PIL import Image 
import ffmpeg
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import pandas as pd
import requests
import json
import numpy as np
import os
import math

# Date configuration
Change parameters to match the day in the logs and DB queries, SIZE is the output dimension

In [None]:
month = 1
day = 10
year = 2021
SIZE = [1440, 720] 

# Load geo-location from IP
geolocation is taken from https://geolocation-db.com/, can be modified to a different source, the IPS.txt included have the ips needed for "1-10-2021" map

In [None]:
IPS = {}
with open('IPS.txt') as f:
    IPS = json.load(f)
TOTAL = 0

def isfloat(value):
    if isinstance(value, float) or  isinstance(value, int):
        return True
    elif isinstance(value, str):
        try:
            float(value)
            return True
        except ValueError:
            return False
    else :
        return False


TOTAL = 0
def getLocation(ip):
    global IPS, TOTAL
    if ip not in IPS:
        ipt = requests.get("https://geolocation-db.com/json/"+ ip + "&position=true").json()
        IPS[ip] = ipt
        with open('IPS.txt', 'w') as outfile:
            json.dump(IPS, outfile)        
    TOTAL = TOTAL +1
    return IPS[ip]
#len(IPS)

# Database queries
All txt files are in "MariaDB report format" (copy-paste from console) "|column1|column2|.....|", different formats have to be loaded differently on pandas dataframes

### Sessions

In [None]:
#   SELECT sessnum, remoteip, walltime, start 
#   FROM narwhal.sessionlog 
#   WHERE start >= '2021-01-10 00:00:00' 
#   AND start <= '2021-01-10 23:59:59' 
#   ORDER by start DESC;

file1 = open('sessions', 'r')
Lines = file1.readlines()
sessions=[]
for line in Lines:
    cols = line.split("|")
    if len(cols) > 5 and cols[1].strip().isdigit():
        sessions.append({"sessnum":cols[1].strip(),"remoteip":cols[2].strip(),"walltime":float(cols[3].strip()),"start":cols[4].strip()})
sessions= pd.DataFrame(sessions)
sessions['start'] = pd.to_datetime(sessions['start'])
sessions['end'] = sessions.apply(lambda x: x['start'] + timedelta(seconds=x['walltime']), axis=1)
sessions['geo'] = sessions['remoteip'].apply(lambda x: getLocation(x)) 
sessions['latitude'] = sessions['geo'].apply(lambda x: float(x['latitude']) if isfloat(x['latitude']) else math.nan)
sessions['longitude'] = sessions['geo'].apply(lambda x: float(x['longitude']) if isfloat(x['longitude']) else math.nan)
#sessions


### Jobs

In [None]:
#   SELECT sessnum, start, walltime 
#   FROM narwhal.joblog
#   WHERE start >= '2021-01-10 00:00:00' 
#   AND start <= '2021-01-10 23:59:59' 
#   ORDER by start DESC;

file1 = open('jobs', 'r')
Lines = file1.readlines()
jobs=[]
for line in Lines:
    cols = line.split("|")
    if len(cols) > 4 and cols[1].strip().isdigit():
        jobs.append({"sessnum":cols[1].strip(),"start":cols[2].strip(),"walltime":float(cols[3].strip())})
jobs= pd.DataFrame(jobs)
jobs = pd.merge(jobs,sessions, how="inner", on="sessnum")
jobs['start'] = pd.to_datetime(jobs['start_x'])
jobs['end'] = jobs.apply(lambda x: x['start'] + timedelta(seconds=x['walltime_x']), axis=1)
#jobs

### Resources
not completely sure this table (jos_media_tracking_detailed) is the most relevant to get information about resources

In [None]:
#   SELECT ip_address, current_position_timestamp, farthest_position_timestamp 
#   FROM nanohub.jos_media_tracking_detailed 
#   WHERE current_position_timestamp >= '2021-01-10 00:00:00' 
#   AND current_position_timestamp <= '2021-01-10 23:59:59';

file1 = open('resources', 'r')
Lines = file1.readlines()
resources=[]
for line in Lines:
    cols = line.split("|")
    if len(cols) > 4 and cols[1].strip() != "ip_address":
        resources.append({"remoteip":cols[1].strip(),"start":cols[3].strip(),"end":(cols[2].strip())})
resources= pd.DataFrame(resources)
resources['start'] = pd.to_datetime(resources['start'])
resources['end'] = pd.to_datetime(resources['end'])
resources['geo'] = resources['remoteip'].apply(lambda x: getLocation(x)) 
resources['latitude'] = resources['geo'].apply(lambda x: float(x['latitude']) if isfloat(x['latitude']) else math.nan)
resources['longitude'] = resources['geo'].apply(lambda x: float(x['longitude']) if isfloat(x['longitude']) else math.nan)

#resources

### Activity
Logs from apache web server, extracted from .gz archived log files

In [None]:
file1 = open('nanohub-access.log-20210110', 'r')
Lines = file1.readlines()
activity=[]
for line in Lines:
    if "bot" in line:
        pass;
    else:
        cols = line.split(" ")
        activity.append({"start":cols[0].strip() + " " + cols[1].strip(), "remoteip": cols[10].strip()})
activity= pd.DataFrame(activity)
activity = activity[activity['remoteip']!="\"-\""]
activity['start'] = pd.to_datetime(activity['start'])
activity['geo'] = activity['remoteip'].apply(lambda x: getLocation(x)) 
activity['latitude'] = activity['geo'].apply(lambda x: float(x['latitude']) if isfloat(x['latitude']) else math.nan)
activity['longitude'] = activity['geo'].apply(lambda x: float(x['longitude']) if isfloat(x['longitude']) else math.nan)
#activity

# Functions
Day-night algorthims modified from https://codegolf.stackexchange.com/questions/24326/is-it-dark-outside-draw-a-sun-map
images from https://blue-marble.de/night/Earthlights_2002.png, https://neo.sci.gsfc.nasa.gov/view.php?datasetId=BlueMarbleNG, https://neo.sci.gsfc.nasa.gov/

In [None]:
def deg2rad(x) :
    return math.radians(x)
    return x / 180.0 * math.pi;

def rad2deg(x) :
    return math.degrees(x)
    return x * 180.0 / math.pi;

def sind(x):
    return math.sin(deg2rad(x))

def cosd(x):
    return math.cos(deg2rad(x))

def asind(x):
    return rad2deg(math.asin(x))

def elevation(latitude, longitude, yday, hours, mins, secs):
    fd = (hours + (mins + (secs / 60.0) ) / 60.0) / 24.0;
    fyd = 360.0 * (yday + fd) / 366.0;

    m = fyd - 3.943;
    ta = -1.914 * sind(m) + 2.468 * sind(2 * m + 205.6);
    hourangle = (fd - 0.5) * 360.0 + longitude + ta;
    decl = 0.396 - 22.913 * cosd(fyd) + 4.025 * sind(fyd) - 0.387 * cosd(2 * fyd) + 0.052 * sind(2 * fyd) - 0.155 * cosd(3 * fyd) + 0.085 * sind(3 * fyd);

    return asind(cosd(hourangle) * cosd(decl) * cosd(latitude) + sind(decl) * sind(latitude));


def createImage(yday, hourt, mint, sect):
    global SIZE
    newsize = SIZE
    day = Image.open('BG/50-natural-earth-1-downsampled.png') 
    #day = Image.open('BG/BM.jpg') 
    #day = Image.open('BG/VI.jpg') 
    day = day.resize(newsize) 

    night =  Image.open('BG/night.png') 
    night = night.resize(newsize) 

    out = Image.new("RGB", newsize)

    width, height = day.size
    for y in range(height):
        latitude = 90.0 - 180.0 * (y + 0.5) / height
        for x in range(width):
            longitude = -180.0 + 360.0 * (x + 0.5) / width
            elev = elevation(latitude, longitude, yday, hourt, mint, sect)
            if (elev > 0): 
                nf = 0.0 
            elif elev > -8 : 
                nf = 1-(8+elev)/8
                #nf = 0.5 
            else:
                nf = 1.0
            df = 1.0 - nf
            dc = day.getpixel((x, y))
            nc = night.getpixel((x, y))
            px = (math.floor(df * dc[0] + nf * nc[0]), 
                  math.floor(df * dc[1] + nf * nc[1]), 
                  math.floor(df * dc[2] + nf * nc[2]))
            out.putpixel((x, y), px) 
    return out



### Main 
Modify the delta variable by default is every 00:07:30, the folder 'out' is required, create it before run this cell (mkdir out)

In [None]:
%matplotlib auto
image_cnt = 0
date_init = datetime(year, month, day)
date_stop = date_init + timedelta(days=1)
delta = timedelta(hours=0, minutes=7, seconds=30)

while (date_init < date_stop):
    date_start = date_init
    date_end = date_start + delta

    sess_filter = sessions[sessions['end'] >= date_start]
    sess_filter = sess_filter[sess_filter['start'] <= date_end]
    sess_filter_counts = sess_filter.groupby(['latitude', 'longitude']).count()
    index = list(sess_filter_counts.index)
    longs_0 = [each[1] for each in index]
    lats_0 = [each[0] for each in index]
    sizes_0 = sess_filter_counts['start']*200

    job_filter = jobs[jobs['end'] >= date_start]
    job_filter = job_filter[job_filter['start'] < date_end]
    job_filter_counts = job_filter.groupby(['latitude', 'longitude']).count()
    index = list(job_filter_counts.index)
    longs_1 = [each[1] for each in index]
    lats_1 = [each[0] for each in index]
    sizes_1 = job_filter_counts['start']*20

    resources_filter = resources[resources['end'] >= date_start]
    resources_filter = resources_filter[resources_filter['start'] < date_end]
    resources_filter_counts = resources_filter.groupby(['latitude', 'longitude']).count()
    index = list(resources_filter_counts.index)
    longs_2 = [each[1] for each in index]
    lats_2 = [each[0] for each in index]
    sizes_2 = resources_filter_counts['start']*50

    activity_filter = activity[activity['start'] >= date_start]
    activity_filter = activity_filter[activity_filter['start'] < date_end]
    activity_filter_counts = activity_filter.groupby(['latitude', 'longitude']).count()
    index = list(activity_filter_counts.index)
    longs_3 = [each[1] for each in index]
    lats_3 = [each[0] for each in index]
    sizes_3 = activity_filter_counts['start']*3 #10 for each 30 seconds


    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
    ax.set_title('nanoHUB.org usage {} PST'.format(date_start), y=1, pad=-24, fontsize=18, color="w")
    date_base = date_start + timedelta(hours=8)
    img = np.asarray(createImage(date_base.timetuple().tm_yday,date_base.hour,date_base.minute,date_base.second))
    ax.imshow(img, origin='upper', transform=ccrs.PlateCarree(), extent=[-180, 180, -90, 90])

    ax.scatter(longs_3, lats_3, s=sizes_3, alpha=0.5,
                   transform=ccrs.PlateCarree(), color="g", label="Activity")

    ax.scatter(longs_0, lats_0, s=sizes_0, alpha=0.5,
                   transform=ccrs.PlateCarree(), color="b", label="Session")

    ax.scatter(longs_1, lats_1, s=sizes_1, alpha=0.5,
                   transform=ccrs.PlateCarree(), color="r", label="Job")

    ax.scatter(longs_2, lats_2, s=sizes_2, alpha=0.5,
                   transform=ccrs.PlateCarree(), color="k", label="Resource")


    #ax.add_feature(cartopy.feature.OCEAN, zorder=0)
    #ax.add_feature(Nightshade(date, alpha=0.5, color='k'))
    lgnd = ax.legend(loc="lower center", fontsize=18, ncol=2)
    lgnd.legendHandles[0]._sizes = [50]
    lgnd.legendHandles[1]._sizes = [50]
    lgnd.legendHandles[2]._sizes = [50]
    lgnd.legendHandles[3]._sizes = [50]
    plt.margins(0,0)
    plt.gca().xaxis.set_major_locator(plt.NullLocator())
    plt.gca().yaxis.set_major_locator(plt.NullLocator())
    plt.savefig("out/out" + str(image_cnt) + ".png", format="png", bbox_inches = 'tight',
    pad_inches = 0, dpi=100)
    image_cnt = image_cnt + 1
    print (image_cnt)
    date_init = date_end
    #plt.show()
    plt.close()

# Video
Create a video using all images in the out folder, it seems the ffmpeg library has some glitch on the final video, run the command on console to better results (ffmpeg -f image2 -i out%d.png -pix_fmt yuv420p -vf scale=1440:720 video.mp4)

In [None]:
#ffmpeg -f image2 -i out%d.png -pix_fmt yuv420p -vf scale=1440:720 video.mp4
(
    ffmpeg
    .input('out/*.png', 
           pattern_type='glob', 
           framerate=25
    )
    .output('out/movie.mp4',
           pix_fmt="yuv420p",
           vf = "scale="+str(SIZE[0])+":"+str(SIZE[1]),
           r=24
    )
    .run(overwrite_output=True)
)