# Progress Report Plots

## Libraries

In [1]:
import os
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium import plugins
import branca
from pprint import pprint

%matplotlib inline


## Preprocessing

In [2]:
dataset_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk("../results/") for f in filenames if os.path.splitext(f)[1] == '.csv']
website_files, network_files = [], []
for dataset_file in dataset_files:
    if "websites" in dataset_file:
        website_files.append(dataset_file)
    elif "network" in dataset_file:
        network_files.append(dataset_file)

website_df = pd.concat([pd.read_csv(f) for f in website_files], ignore_index=True)
network_df = pd.concat([pd.read_csv(f) for f in network_files], ignore_index=True)

In [3]:
website_df.head()

Unnamed: 0,destination,packet_transmit,packet_receive,packet_loss_count,packet_loss_rate,rtt_min,rtt_avg,rtt_max,rtt_mdev,packet_duplicate_count,packet_duplicate_rate,jitter,loss_rate,loss_count,location,latitude,longitude,wifi_name
0,google.com,40.0,40.0,0.0,0.0,9.547,12.937,52.278,7.213,0.0,0.0,3.751795,0.0,0.0,SchomburgA,40.91339,-73.13221,eduroam
1,youtube.com,40.0,39.0,1.0,2.5,10.032,11.614,13.654,0.744,0.0,0.0,0.978947,2.5,1.0,SchomburgA,40.91339,-73.13221,eduroam
2,tmall.com,40.0,40.0,0.0,0.0,247.545,310.718,370.803,34.253,0.0,0.0,38.846154,0.0,0.0,SchomburgA,40.91339,-73.13221,eduroam
3,facebook.com,40.0,39.0,1.0,2.5,11.671,13.271,17.899,1.166,0.0,0.0,1.123684,2.5,1.0,SchomburgA,40.91339,-73.13221,eduroam
4,qq.com,40.0,40.0,0.0,0.0,222.477,298.483,477.298,66.467,0.0,0.0,66.641026,0.0,0.0,SchomburgA,40.91339,-73.13221,eduroam


In [4]:
website_df.isnull().sum()

destination               11
packet_transmit           11
packet_receive            11
packet_loss_count         11
packet_loss_rate          11
rtt_min                   11
rtt_avg                   11
rtt_max                   11
rtt_mdev                  11
packet_duplicate_count    11
packet_duplicate_rate     11
jitter                     0
loss_rate                 11
loss_count                11
location                   0
latitude                   0
longitude                  0
wifi_name                  0
dtype: int64

Some points from Roth Cafe and Frey Hall returned NaN values, so we will drop this from the dataset

In [5]:
website_df.dropna(axis=0, how="any", inplace=True)

In [6]:
website_df.isnull().sum()

destination               0
packet_transmit           0
packet_receive            0
packet_loss_count         0
packet_loss_rate          0
rtt_min                   0
rtt_avg                   0
rtt_max                   0
rtt_mdev                  0
packet_duplicate_count    0
packet_duplicate_rate     0
jitter                    0
loss_rate                 0
loss_count                0
location                  0
latitude                  0
longitude                 0
wifi_name                 0
dtype: int64

In [7]:
print(f"Length of website_df: {len(website_df)}")

Length of website_df: 1249


In [8]:
network_df.head()

Unnamed: 0,ssid,signal_strength,download,upload,location,latitude,longitude,wifi_name
0,eduroam,-44,222595800.0,219285000.0,SchomburgA,40.91339,-73.13221,eduroam
1,WolfieNet-Secure,-48,254404400.0,141832700.0,SchomburgA,40.913137,-73.132169,WolfieNet-Secure
2,eduroam,-50,213533800.0,130961000.0,Humanities2ndFloorLounge,40.91411,-73.12103,eduroam
3,WolfieNet-Secure,-49,197428000.0,174538100.0,Humanities2ndFloorLounge,40.91411,-73.12103,WolfieNet-Secure
4,eduroam,-62,99560180.0,135713900.0,StudentUnion-F2,40.917092,-73.122437,eduroam


In [9]:
network_df.isnull().sum()

ssid               0
signal_strength    0
download           0
upload             0
location           0
latitude           0
longitude          0
wifi_name          0
dtype: int64

In [10]:
print(f"Length of network_df: {len(network_df)}")

Length of network_df: 36


## Analysis of WolfieNet-Secure vs. eduroam (Tables)

### Helper Function for Generating Maps

In [11]:
def map_points(df, lat_col='latitude', lon_col='longitude', zoom_start=11, \
                plot_points=False, pt_radius=15, \
                draw_heatmap=False, heat_map_weights_col=None, \
                heat_map_weights_normalize=True, heat_map_radius=15, plot_title="Title"):
    """Creates a map given a dataframe of points. Can also produce a heatmap overlay

    Stolen from: https://alysivji.github.io/getting-started-with-folium.html

    Arg:
        df: dataframe containing points to maps
        lat_col: Column containing latitude (string)
        lon_col: Column containing longitude (string)
        zoom_start: Integer representing the initial zoom of the map
        plot_points: Add points to map (boolean)
        pt_radius: Size of each point
        draw_heatmap: Add heatmap to map (boolean)
        heat_map_weights_col: Column containing heatmap weights
        heat_map_weights_normalize: Normalize heatmap weights (boolean)
        heat_map_radius: Size of heatmap point

    Returns:
        folium map object
    """

    ## center map in the middle of points center in
    middle_lat = df[lat_col].median()
    middle_lon = df[lon_col].median()

    title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(plot_title) 

    curr_map = folium.Map(location=[middle_lat, middle_lon],
                          zoom_start=zoom_start)

    curr_map.get_root().html.add_child(folium.Element(title_html))

    if heat_map_weights_col in ("download", "upload"):
        df[heat_map_weights_col] /= 1000000
    
    # Colormap and legend
    min_val = df[heat_map_weights_col].min()
    max_val = df[heat_map_weights_col].max()
    # print(min_val, max_val)
    colormap = None
    if heat_map_weights_col == "signal_strength":
        c = branca.colormap.linear.OrRd_09.colors
        c.reverse()
        colormap = branca.colormap.LinearColormap(colors=c).scale(-80, -42)
    else:
        colormap = branca.colormap.linear.OrRd_09.scale(min_val, max_val)
    # colormap = colormap.to_step(index=np.arange(min_val, max_val, (max_val - min_val) / len(df[heat_map_weights_col])))
    if heat_map_weights_col in ("download", "upload"):
        colormap.caption = f"Value of {heat_map_weights_col.capitalize()} (mbps)"
    else:
        colormap.caption = f"Value of {heat_map_weights_col.capitalize()}"

    # # Format ticks
    # my_js = """
    # [![enter image description here][1]][1]var div = document.getElementById('legend');
    # var ticks = document.getElementsByClassName('tick')
    # for(var i = 0; i < ticks.length; i++){
    # var values = ticks[i].textContent.replace(',','')
    # val = parseFloat(values).toExponential(2).toString()
    # if(parseFloat(ticks[i].textContent) == 0) val = 0.
    # div.innerHTML = div.innerHTML.replace(ticks[i].textContent,val);
    # }
    # """
    # e = branca.element.Element(my_js)
    # html = colormap.get_root()
    # html.script.get_root().render()
    # html.script._children[e.get_name()] = e

    colormap.add_to(curr_map)

    # Prepare gradient dictionary according to the example like {0.4: ‘blue’, 0.65: ‘lime’, 1: ‘red’}
    gradient_dict = {}

    # Get the index values and colors from the just created branca-colormap
    # NOTE: colors are RGBA-vectors, like "(0.9372549019607843, 0.9529411764705882, 1.0, 1.0)":
    for ind_val, c in zip(colormap.index, colormap.colors):
        # Create gradient dictionary for heatmap on the fly
        r, g, b, a = c
        gradient_dict[ind_val] = f"rgba({r},{g},{b},{a})"

    # pprint(gradient_dict)

    # add points to map
    if plot_points:
        for _, row in df.iterrows():
            folium.CircleMarker([row[lat_col], row[lon_col]],
                                radius=pt_radius,
                                popup=f"Location: {row['location']}\n{heat_map_weights_col.capitalize()} Value: {row[heat_map_weights_col]}",
                                fill=True,
                                fill_color=colormap(row[heat_map_weights_col]),
                               ).add_to(curr_map)

    # add heatmap
    if draw_heatmap:
        # convert to (n, 2) or (n, 3) matrix format
        if heat_map_weights_col is None:
            cols_to_pull = [lat_col, lon_col]
        else:
            # if we have to normalize
            if heat_map_weights_normalize:
                df[heat_map_weights_col] = \
                    df[heat_map_weights_col] / df[heat_map_weights_col].sum()

            cols_to_pull = [lat_col, lon_col, heat_map_weights_col]

        stations = df[cols_to_pull].to_numpy()
        curr_map.add_children(plugins.HeatMap(stations, radius=heat_map_radius, gradient=gradient_dict))

    return curr_map

### Signal Strength
- Measured using `iwconfig`
- Lower is better

In [12]:
wolfie_ss = network_df[network_df.wifi_name == "WolfieNet-Secure"][["location", "signal_strength"]].reset_index(drop=True)
eduroam_ss = network_df[network_df.wifi_name == "eduroam"][["location", "signal_strength"]].reset_index(drop=True)
ss_df = pd.DataFrame({
    "Location": wolfie_ss.location,
    "WolfieNet-Secure": wolfie_ss.signal_strength,
    "eduroam": eduroam_ss.signal_strength
})
ss_df["diff"] = ss_df["WolfieNet-Secure"] - ss_df["eduroam"]
ss_df

Unnamed: 0,Location,WolfieNet-Secure,eduroam,diff
0,SchomburgA,-48,-44,-4
1,Humanities2ndFloorLounge,-49,-50,1
2,StudentUnion-F2,-60,-62,2
3,EngineeringLounge,-42,-48,6
4,Engineering145GBM,-50,-53,3
5,WangLounge,-45,-45,0
6,FreyHall-Lobby,-64,-64,0
7,NorthReadingRoom-F2,-52,-60,8
8,SAC-Lobby,-56,-55,-1
9,NCS106,-48,-49,1


In [13]:
map_points(
    network_df[network_df.wifi_name == "WolfieNet-Secure"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='signal_strength',
    heat_map_radius=0,
    plot_title="Heatmap of Signal Strength on WolfieNet-Secure (Lower is Better)"
)



In [21]:
map_points(
    network_df[network_df.wifi_name == "eduroam"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='signal_strength',
    heat_map_radius=0,
    plot_title="Heatmap of Signal Strength on eduroam (Lower is better)"
)



### Bandwidth
- Measured using `speedtest-cli`
- Results are in bits, to convert to mbps / 1,000,000
- Higher is better

#### Downloads

In [15]:
wolfie_download = network_df[network_df.wifi_name == "WolfieNet-Secure"][["location", "download"]].reset_index(drop=True)
eduroam_download = network_df[network_df.wifi_name == "eduroam"][["location", "download"]].reset_index(drop=True)
download_df = pd.DataFrame({
    "Location": wolfie_download.location,
    "WolfieNet-Secure": wolfie_download.download / 1000000,
    "eduroam": eduroam_download.download / 1000000
})
download_df["diff"] = download_df["WolfieNet-Secure"] - download_df["eduroam"]
download_df

Unnamed: 0,Location,WolfieNet-Secure,eduroam,diff
0,SchomburgA,254.404421,222.595768,31.808653
1,Humanities2ndFloorLounge,197.427995,213.533758,-16.105763
2,StudentUnion-F2,113.702474,99.560185,14.142289
3,EngineeringLounge,203.401241,248.331222,-44.929981
4,Engineering145GBM,180.647292,214.483755,-33.836463
5,WangLounge,191.7224,198.953203,-7.230803
6,FreyHall-Lobby,8.345845,79.409176,-71.063331
7,NorthReadingRoom-F2,47.258784,41.563387,5.695397
8,SAC-Lobby,10.643223,67.05141,-56.408187
9,NCS106,246.577984,210.348198,36.229785


In [16]:
map_points(
    network_df[network_df.wifi_name == "WolfieNet-Secure"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='download',
    heat_map_radius=0,
    plot_title="Heatmap of Download Speed on WolfieNet-Secure (Higher is Better)"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [17]:
map_points(
    network_df[network_df.wifi_name == "eduroam"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='download',
    heat_map_radius=0,
    plot_title="Heatmap of Download Speed on eduroam (Higher is Better)"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


#### Uploads

In [18]:
wolfie_upload = network_df[network_df.wifi_name == "WolfieNet-Secure"][["location", "upload"]].reset_index(drop=True)
eduroam_upload = network_df[network_df.wifi_name == "eduroam"][["location", "upload"]].reset_index(drop=True)
upload_df = pd.DataFrame({
    "Location": wolfie_upload.location,
    "WolfieNet-Secure": wolfie_upload.upload / 1000000,
    "eduroam": eduroam_upload.upload / 1000000
})
upload_df["diff"] = upload_df["WolfieNet-Secure"] - upload_df["eduroam"]
upload_df

Unnamed: 0,Location,WolfieNet-Secure,eduroam,diff
0,SchomburgA,141.83272,219.284979,-77.45226
1,Humanities2ndFloorLounge,174.538138,130.960958,43.57718
2,StudentUnion-F2,136.557833,135.713866,0.843968
3,EngineeringLounge,168.60273,173.483326,-4.880596
4,Engineering145GBM,203.839668,220.830256,-16.990588
5,WangLounge,217.262406,202.395398,14.867008
6,FreyHall-Lobby,2.898055,82.817935,-79.91988
7,NorthReadingRoom-F2,61.632813,84.759453,-23.12664
8,SAC-Lobby,9.290205,102.163304,-92.873099
9,NCS106,235.306,202.984366,32.321634


In [19]:
map_points(
    network_df[network_df.wifi_name == "WolfieNet-Secure"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='upload',
    heat_map_radius=0,
    plot_title="Heatmap of Upload Speed on WolfieNet-Secure (Higher is Better)"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [20]:
map_points(
    network_df[network_df.wifi_name == "eduroam"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='upload',
    heat_map_radius=0,
    plot_title="Heatmap of Upload Speed on eduroam (Higher is Better)"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
