In [None]:
#!/usr/bin/env python
 
'''
Speed_plot.py: A program to plot the data produced for the Speedtestlog.py script
A. J. McCulloch, February 2020
'''

####################################################################################################
# Import modules
####################################################################################################

import datetime as dt # Required for timestamp manipulation
import pandas as pd # Required for dataframe manipulation
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import dateutil.parser
from pytz import timezone

####################################################################################################
# Define functions
####################################################################################################
# A function to aid in the combination of the raw data
def datafix(frame, first, last, cols):
    df_cp = frame.iloc[first:last]
    df_cp.columns = cols
    return df_cp


# A funtion to plot 1D data for versus time (e.g. download vs time)
def speedplot(frame, series, start, end, savefig = False):
    """
    Make the plot
    This is still somewhat manual depending on plot duration
    """
        
    f, ax = plt.subplots(figsize=(14,9)) # Make a subplot to place the axis 
    
    # Full time series
    t, o = frame.index, frame[series] # Make lists of time and series value
           
    ax.plot_date(t, o, 'C0-', linewidth=1, alpha = 1.0) # Make it a line plot
    
    # format the ticks
    duration = end - start # Calculate the time difference between start and end dates
    duration = duration.days * 24 # Convert the time into hours
    # For a day plot
    if duration == 24:
        # Set time parameters of the plot
        hours = mdates.HourLocator(interval = 4) # To be used for major ticks
        midhours = mdates.HourLocator(interval = 2)# To be used for minor ticks, alternative is byhour=range(0, 24, 2)
        hoursFmt = mdates.DateFormatter('%H%M') # Format of the labels
        ax.xaxis.set_major_locator(hours) # Set the major ticks
        ax.xaxis.set_major_formatter(hoursFmt) # Set the format of the ticks
        ax.xaxis.set_minor_locator(midhours) # Set the minor ticks
    else: # Manual handling below
        #if plotall == True:
        #    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
        pass
    
    plt.gcf().autofmt_xdate() # Make the x-axis not complete gibberish
    # plt.yticks((0,1),('Offline','Online')) # Label the y-axis ticks
    
    # Time range over which to plot
    tmin = t.min()
    tmax = t.max()
    ax.set_xlim(tmin, tmax)
    
    # Label the plot
    plt.title('Speedtest.net results', fontsize=40)
    plt.xlabel('Time stamp', fontsize=30)
    plt.ylabel(series, fontsize=30)
    
    if savefig == True:
        # Save the data
        outputname = frame.index[-1].strftime('%Y%m%d_%H%M-')+series # Generate the file name
        plt.savefig(outputname+'.pdf', bbox_inches='tight') # Save a .pdf
        
# A funtion to plot uploads and downloads both against time
def downupplot(frame, start, end, savefig = False):
    """
    Make the plot
    This is still somewhat manual depending on plot duration
    """
        
    f, ax1 = plt.subplots(figsize=(14,9)) # Make a subplot to place the axis 
    
    # Full time series
    t, d = frame.index, frame['download'] # Make lists of time and download speed
    
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    u = frame['upload'] # Make lists of time and upload speed
           
    ax1.plot_date(t, d, 'C0-', linewidth=1, alpha = 1.0) # Make it a line plot
    ax2.plot_date(t, u, 'C4', linewidth=1, alpha = 1.0) # Make it a line plot
    
    # format the ticks
    duration = end - start # Calculate the time difference between start and end dates
    duration = duration.days * 24 # Convert the time into hours
    # For a day plot
    if duration == 24:
        # Set time parameters of the plot
        hours = mdates.HourLocator(interval = 4) # To be used for major ticks
        midhours = mdates.HourLocator(interval = 2)# To be used for minor ticks, alternative is byhour=range(0, 24, 2)
        hoursFmt = mdates.DateFormatter('%H%M') # Format of the labels
        ax.xaxis.set_major_locator(hours) # Set the major ticks
        ax.xaxis.set_major_formatter(hoursFmt) # Set the format of the ticks
        ax.xaxis.set_minor_locator(midhours) # Set the minor ticks
    else: # Manual handling below
        #if plotall == True:
        #    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
        pass
    
    plt.gcf().autofmt_xdate() # Make the x-axis not complete gibberish
    # plt.yticks((0,1),('Offline','Online')) # Label the y-axis ticks
    
    # Time range over which to plot
    tmin = t.min()
    tmax = t.max()
    ax1.set_xlim(tmin, tmax)
    
    # Label the plot
    plt.title('Speedtest.net results', fontsize=40)
    ax1.set_xlabel('Time stamp', fontsize=30)
    ax1.set_ylabel('Download [Mbps]', fontsize=30)
    ax2.set_ylabel('Upload [Mbps]', fontsize=30)
    
    if savefig == True:
        # Save the data
        outputname = frame.index[-1].strftime('%Y%m%d_%H%M-')+'d_u_plot' # Generate the file name
        plt.savefig(outputname+'.pdf', bbox_inches='tight') # Save a .pdf
        
def denplot(frame, series, start, end, Logplot = False, logscale = 1, savefig = False):
    from scipy.stats import kde
    from matplotlib.colors import LogNorm
    
    f = plt.subplots(figsize=(14,9))
    axes = plt.gca()
    
    x, y = frame.index.hour, frame[series]
    
    # Evaluate a gaussian kde on a regular grid of nbins x nbins over data extents
    nbins=100
    k = kde.gaussian_kde([x,y])
    xi, yi = np.mgrid[x.min():x.max():nbins*1j, y.min():y.max():nbins*1j]
    zi = k(np.vstack([xi.flatten(), yi.flatten()]))
    
    # Make the plot
    if Logplot == True:
        plt.pcolormesh(xi, yi, zi.reshape(xi.shape), norm=LogNorm(vmin=zi.min()*logscale, vmax=zi.max()), cmap='PuBu_r')
    else:
        plt.pcolormesh(xi, yi, zi.reshape(xi.shape), cmap='PuBu_r')
    plt.colorbar()
    
    axes.set_xlim([0,23])
    axes.set_ylim([0,frame[series].max()*1])
    axes.set_xlabel('Time of day', fontsize=30)
    axes.set_ylabel(series+' [Mbps]', fontsize=30)
    plt.title('Speedtest.net results', fontsize=40)
    
    if savefig == True:
        # Save the data
        outputname = frame.index[-1].strftime('%Y%m%d_%H%M-')+series+'dplot' # Generate the file name
        plt.savefig(outputname+'.pdf', bbox_inches='tight') # Save a .pdf

# A function to make a polar plot of connection speed versus time of day
def hourplot(frame, series, day = False, savefig = True):
    f = plt.figure(figsize=(14,9))
    ax = f.add_subplot(111, projection='polar')
    
    r = frame[series]
    times_wday = frame.index.strftime('%w%H%M')
    times = [float(i[0]) * 2400 + float(i[-4:]) for i in times_wday]
    theta = [i * 2 * np.pi/(2400*7) for i in times]
    
    if day == True:
        theta = frame.index.strftime('%H%M')
        theta = theta.astype(np.int)

        #Set the circumference ticks
        ax.set_xticks(np.linspace(0, 2*np.pi, 24, endpoint=False))

        # set the label names
        time_label = []
        for i in range(24):
            if i<10:
                a = str(i)+'000'
            else:
                a = str(i)+'00'
            time_label.append(a)
            
        colour = frame.index.weekday
        
    else:
        times_wday = frame.index.strftime('%w%H%M')
        times = [float(i[0]) * 2400 + float(i[-4:]) for i in times_wday]
        theta = [i * 2 * np.pi/(2400*7) for i in times]
        
        #Set the circumference ticks
        ax.set_xticks(np.linspace(0, 2*np.pi, 14, endpoint=False))
        
        # set the label names
        time_label = ['Monday', '', 'Tuesday', '', 'Wednesday', '', 'Thursday', '', 'Friday', '', 'Saturday', '', 'Sunday', '']
        
        #colour = frame.index.strftime('%y%m%d')
        colour = (df.index-df.index[0]).days
        #colour = colour.astype(np.int)

    ax.scatter(theta, r, c=colour, s=30, alpha=0.1, cmap='viridis')
    
    # Make the labels go clockwise
    ax.set_theta_direction(-1)
    
    #Place Zero at Top
    ax.set_theta_offset(np.pi/2)
    
    #Bars to the wall
    plt.ylim(0, frame[series].max()*1.1)
    
    ax.set_xticklabels(time_label)
    
    if savefig == True:
        # Save the data
        outputname = frame.index[-1].strftime('%Y%m%d_%H%M-')+series+'week_plot' # Generate the file name
        plt.savefig(outputname+'.pdf', bbox_inches='tight') # Save a .pdf


In [None]:
####################################################################################################
####################################################################################################
# Code starts here
####################################################################################################
####################################################################################################

# Import the .csv containing the speedtest results
file = 'speedtestresults.csv' # File to import (a .csv created by speedtestlog.py)
dfraw = pd.read_csv(file) # Import the data into a dataframe
# Process the data
location = ['Australia/Melbourne'] # Set location (hence timezone) to which the timestamp will be converted
fmt = '%Y-%m-%d %H:%M:%S %Z%z' # Set the time format

# Process the data
df = df.dropna(subset=['timestamp'])
df['timestamp'] = df['timestamp'].apply(lambda x: dateutil.parser.parse(x).astimezone(timezone(*location))) # Interpret ISO timestamp and convert to local time
df = df.set_index('timestamp') # Set dataframe index to be the timestamp
to_take = ['ping', 'upload', 'download'] # Data fields to keep
df = df[to_take] # Make frame contain only the wanted data
df = df.dropna() # Drop NaN results
df[to_take] = df[to_take].astype(float) # Convert from strings to floating point numbers
df.upload /= 1e6 # Convert from bits/s to Mbps
df.download /= 1e6 # Convert from bits/s to Mbps

first, last = df.index[0],  df.index[-1]

Plot density maps of speed versus time of day

In [None]:
denplot(df, 'download', first, last, True, 50, True)
denplot(df, 'upload', first, last, True, 25, True)

Plot download and upload speeds versus time

In [None]:
dftp = df.resample('12H').mean()
downupplot(dftp, first, last, True)

In [None]:
hourplot(df, 'download')
hourplot(df, 'upload')