# Progress Report Plots

## Libraries

In [1]:
import os
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium import plugins
import branca
from pprint import pprint

%matplotlib inline




## Preprocessing

In [2]:
dataset_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk("../results/") for f in filenames if os.path.splitext(f)[1] == '.csv']
website_files, network_files = [], []
for dataset_file in dataset_files:
    if "websites" in dataset_file:
        website_files.append(dataset_file)
    elif "network" in dataset_file:
        network_files.append(dataset_file)

website_df = pd.concat([pd.read_csv(f) for f in website_files], ignore_index=True)
network_df = pd.concat([pd.read_csv(f) for f in network_files], ignore_index=True)

In [3]:
website_df.isnull().sum()

destination               11
packet_transmit           11
packet_receive            11
packet_loss_count         11
packet_loss_rate          11
rtt_min                   11
rtt_avg                   11
rtt_max                   11
rtt_mdev                  11
packet_duplicate_count    11
packet_duplicate_rate     11
jitter                     0
loss_rate                 11
loss_count                11
location                   0
latitude                   0
longitude                  0
wifi_name                  0
dtype: int64

Some points from Roth Cafe and Frey Hall returned NaN values, so we will drop this from the dataset

In [4]:
website_df.dropna(axis=0, how="any", inplace=True)

In [5]:
website_df.isnull().sum()

destination               0
packet_transmit           0
packet_receive            0
packet_loss_count         0
packet_loss_rate          0
rtt_min                   0
rtt_avg                   0
rtt_max                   0
rtt_mdev                  0
packet_duplicate_count    0
packet_duplicate_rate     0
jitter                    0
loss_rate                 0
loss_count                0
location                  0
latitude                  0
longitude                 0
wifi_name                 0
dtype: int64

In [6]:
print(f"Length of website_df: {len(website_df)}")

Length of website_df: 1284


In [7]:
website_df.head()

Unnamed: 0,destination,packet_transmit,packet_receive,packet_loss_count,packet_loss_rate,rtt_min,rtt_avg,rtt_max,rtt_mdev,packet_duplicate_count,packet_duplicate_rate,jitter,loss_rate,loss_count,location,latitude,longitude,wifi_name
0,google.com,40.0,40.0,0.0,0.0,15.352,23.674,54.95,6.034,0.0,0.0,4.584744,0.0,0.0,CentralReadingRoom,40.9153,-73.122491,eduroam
1,youtube.com,40.0,40.0,0.0,0.0,13.072,35.425,301.164,48.38,0.0,0.0,26.409359,0.0,0.0,CentralReadingRoom,40.9153,-73.122491,eduroam
2,tmall.com,40.0,39.0,1.0,2.5,253.354,326.708,489.106,51.194,0.0,0.0,40.591,2.5,1.0,CentralReadingRoom,40.9153,-73.122491,eduroam
3,facebook.com,40.0,40.0,0.0,0.0,17.666,34.434,233.888,42.317,0.0,0.0,13.417821,0.0,0.0,CentralReadingRoom,40.9153,-73.122491,eduroam
4,qq.com,40.0,32.0,8.0,20.0,275.351,338.606,462.851,52.197,0.0,0.0,48.369968,20.0,8.0,CentralReadingRoom,40.9153,-73.122491,eduroam


In [8]:
avg_website_df = website_df.groupby(['destination','wifi_name']).mean().sort_values(['wifi_name', 'destination'])
avg_website_df = avg_website_df.drop(columns=['packet_transmit', 'packet_receive', 'packet_loss_count', 'packet_loss_rate', 'rtt_min', 'rtt_max', 'rtt_mdev', 'packet_duplicate_count', 'packet_duplicate_rate', 'loss_rate', 'latitude', 'longitude'])
pd.set_option('display.max_rows', None)
print(avg_website_df)

                                               rtt_avg      jitter  loss_count
destination               wifi_name                                           
360.cn                    WolfieNet-Secure  345.860778   67.884823    1.444444
alipay.com                WolfieNet-Secure  337.003235   58.379850    1.823529
amazon.com                WolfieNet-Secure   36.057444   22.246946    0.055556
baidu.com                 WolfieNet-Secure  327.847526   78.508535    1.315789
bing.com                  WolfieNet-Secure   25.583667   17.088816    0.055556
blackboard.stonybrook.edu WolfieNet-Secure   24.480278   16.343138    0.000000
chegg.com                 WolfieNet-Secure   61.227947   39.701930    0.526316
classroom.google.com      WolfieNet-Secure   29.802833   17.961973    0.111111
coursehero.com            WolfieNet-Secure   35.226000   28.920498    0.631579
discord.com               WolfieNet-Secure   23.947053   15.518180    0.052632
facebook.com              WolfieNet-Secure   61.2873

In [9]:
print(avg_website_df.groupby('wifi_name').mean())

                     rtt_avg     jitter  loss_count
wifi_name                                          
WolfieNet-Secure  146.122947  44.923797    0.677163
eduroam           131.397341  35.022604    0.734921


In [10]:
network_df.head()

Unnamed: 0,signal_strength,ssid,download,upload,location,latitude,longitude,wifi_name
0,-60,eduroam,73878650.0,79902890.0,CentralReadingRoom,40.9153,-73.122491,eduroam
1,-59,WolfieNet-Secure,51009040.0,61287280.0,CentralReadingRoom,40.9153,-73.122491,WolfieNet-Secure
2,-53,eduroam,214483800.0,220830300.0,Engineering145GBM,40.91298,-73.1244,eduroam
3,-50,WolfieNet-Secure,180647300.0,203839700.0,Engineering145GBM,40.91298,-73.1244,WolfieNet-Secure
4,-48,eduroam,248331200.0,173483300.0,EngineeringLounge,40.91309,-73.12455,eduroam


In [11]:
network_df.isnull().sum()

signal_strength    0
ssid               0
download           0
upload             0
location           0
latitude           0
longitude          0
wifi_name          0
dtype: int64

In [12]:
print(f"Length of network_df: {len(network_df)}")

Length of network_df: 37


## Analysis of WolfieNet-Secure vs. eduroam (Tables)

### Helper Function for Generating Maps

In [13]:
def map_points(df, lat_col='latitude', lon_col='longitude', zoom_start=11, \
                plot_points=False, pt_radius=15, \
                draw_heatmap=False, heat_map_weights_col=None, \
                heat_map_weights_normalize=True, heat_map_radius=15, plot_title="Title"):
    """Creates a map given a dataframe of points. Can also produce a heatmap overlay

    Stolen from: https://alysivji.github.io/getting-started-with-folium.html

    Arg:
        df: dataframe containing points to maps
        lat_col: Column containing latitude (string)
        lon_col: Column containing longitude (string)
        zoom_start: Integer representing the initial zoom of the map
        plot_points: Add points to map (boolean)
        pt_radius: Size of each point
        draw_heatmap: Add heatmap to map (boolean)
        heat_map_weights_col: Column containing heatmap weights
        heat_map_weights_normalize: Normalize heatmap weights (boolean)
        heat_map_radius: Size of heatmap point

    Returns:
        folium map object
    """

    ## center map in the middle of points center in
    middle_lat = df[lat_col].median()
    middle_lon = df[lon_col].median()

    title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(plot_title) 

    curr_map = folium.Map(location=[middle_lat, middle_lon],
                          zoom_start=zoom_start)

    curr_map.get_root().html.add_child(folium.Element(title_html))

    if heat_map_weights_col in ("download", "upload"):
        df[heat_map_weights_col] /= 1000000
    
    # Colormap and legend
    min_val = df[heat_map_weights_col].min()
    max_val = df[heat_map_weights_col].max()
    # print(min_val, max_val)
    colormap = None
    if heat_map_weights_col == "signal_strength":
        c = branca.colormap.linear.OrRd_09.colors
        c.reverse()
        colormap = branca.colormap.LinearColormap(colors=c).scale(-80, -42)
    else:
        colormap = branca.colormap.linear.OrRd_09.scale(min_val, max_val)
    # colormap = colormap.to_step(index=np.arange(min_val, max_val, (max_val - min_val) / len(df[heat_map_weights_col])))
    if heat_map_weights_col in ("download", "upload"):
        colormap.caption = f"Value of {heat_map_weights_col.capitalize()} (mbps)"
    else:
        colormap.caption = f"Value of {heat_map_weights_col.capitalize()}"

    # # Format ticks
    # my_js = """
    # [![enter image description here][1]][1]var div = document.getElementById('legend');
    # var ticks = document.getElementsByClassName('tick')
    # for(var i = 0; i < ticks.length; i++){
    # var values = ticks[i].textContent.replace(',','')
    # val = parseFloat(values).toExponential(2).toString()
    # if(parseFloat(ticks[i].textContent) == 0) val = 0.
    # div.innerHTML = div.innerHTML.replace(ticks[i].textContent,val);
    # }
    # """
    # e = branca.element.Element(my_js)
    # html = colormap.get_root()
    # html.script.get_root().render()
    # html.script._children[e.get_name()] = e

    colormap.add_to(curr_map)

    # Prepare gradient dictionary according to the example like {0.4: ‘blue’, 0.65: ‘lime’, 1: ‘red’}
    gradient_dict = {}

    # Get the index values and colors from the just created branca-colormap
    # NOTE: colors are RGBA-vectors, like "(0.9372549019607843, 0.9529411764705882, 1.0, 1.0)":
    for ind_val, c in zip(colormap.index, colormap.colors):
        # Create gradient dictionary for heatmap on the fly
        r, g, b, a = c
        gradient_dict[ind_val] = f"rgba({r},{g},{b},{a})"

    # pprint(gradient_dict)

    # add points to map
    if plot_points:
        for _, row in df.iterrows():
            folium.CircleMarker([row[lat_col], row[lon_col]],
                                radius=pt_radius,
                                popup=f"Location: {row['location']}\n{heat_map_weights_col.capitalize()} Value: {row[heat_map_weights_col]}",
                                fill=True,
                                fill_color=colormap(row[heat_map_weights_col]),
                               ).add_to(curr_map)

    # add heatmap
    if draw_heatmap:
        # convert to (n, 2) or (n, 3) matrix format
        if heat_map_weights_col is None:
            cols_to_pull = [lat_col, lon_col]
        else:
            # if we have to normalize
            if heat_map_weights_normalize:
                df[heat_map_weights_col] = \
                    df[heat_map_weights_col] / df[heat_map_weights_col].sum()

            cols_to_pull = [lat_col, lon_col, heat_map_weights_col]

        stations = df[cols_to_pull].to_numpy()
        curr_map.add_children(plugins.HeatMap(stations, radius=heat_map_radius, gradient=gradient_dict))

    return curr_map

### Signal Strength
- Measured using `iwconfig`
- Lower is better

In [14]:
wolfie_ss = network_df[network_df.wifi_name == "WolfieNet-Secure"][["location", "signal_strength"]].reset_index(drop=True)
eduroam_ss = network_df[network_df.wifi_name == "eduroam"][["location", "signal_strength"]].reset_index(drop=True)
ss_df = pd.DataFrame({
    "Location": wolfie_ss.location,
    "WolfieNet-Secure": wolfie_ss.signal_strength,
    "eduroam": eduroam_ss.signal_strength
})
ss_df["diff"] = ss_df["WolfieNet-Secure"] - ss_df["eduroam"]
ss_df

Unnamed: 0,Location,WolfieNet-Secure,eduroam,diff
0,CentralReadingRoom,-59,-60.0,1.0
1,Engineering145GBM,-50,-53.0,3.0
2,EngineeringLounge,-42,-48.0,6.0
3,FreyHall-Lobby,-64,-64.0,0.0
4,Humanities2ndFloorLounge,-49,-50.0,1.0
5,HumanitiesGlassArea,-51,-52.0,1.0
6,Jasmine,-63,-67.0,4.0
7,NCS106,-48,-49.0,1.0
8,NCS120,-46,-49.0,3.0
9,NorthReadingRoom-F1,-58,-63.0,5.0


In [27]:
ss_df['eduroam'].mean()

-55.666666666666664

In [28]:
ss_df['WolfieNet-Secure'].mean()

-53.78947368421053

In [15]:
map_points(
    network_df[network_df.wifi_name == "WolfieNet-Secure"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='signal_strength',
    heat_map_radius=0,
    plot_title="Heatmap of Signal Strength on WolfieNet-Secure (Lower is Better)"
)

  curr_map.add_children(plugins.HeatMap(stations, radius=heat_map_radius, gradient=gradient_dict))


In [16]:
map_points(
    network_df[network_df.wifi_name == "eduroam"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='signal_strength',
    heat_map_radius=0,
    plot_title="Heatmap of Signal Strength on eduroam (Lower is better)"
)

  curr_map.add_children(plugins.HeatMap(stations, radius=heat_map_radius, gradient=gradient_dict))


### Bandwidth
- Measured using `speedtest-cli`
- Results are in bits, to convert to mbps / 1,000,000
- Higher is better

#### Downloads

In [17]:
wolfie_download = network_df[network_df.wifi_name == "WolfieNet-Secure"][["location", "download"]].reset_index(drop=True)
eduroam_download = network_df[network_df.wifi_name == "eduroam"][["location", "download"]].reset_index(drop=True)
download_df = pd.DataFrame({
    "Location": wolfie_download.location,
    "WolfieNet-Secure": wolfie_download.download / 1000000,
    "eduroam": eduroam_download.download / 1000000
})
download_df["diff"] = download_df["WolfieNet-Secure"] - download_df["eduroam"]
download_df

Unnamed: 0,Location,WolfieNet-Secure,eduroam,diff
0,CentralReadingRoom,51.009037,73.878649,-22.869612
1,Engineering145GBM,180.647292,214.483755,-33.836463
2,EngineeringLounge,203.401241,248.331222,-44.929981
3,FreyHall-Lobby,8.345845,79.409176,-71.063331
4,Humanities2ndFloorLounge,197.427995,213.533758,-16.105763
5,HumanitiesGlassArea,55.195317,63.399907,-8.20459
6,Jasmine,136.464276,73.461958,63.002318
7,NCS106,246.577984,210.348198,36.229785
8,NCS120,202.773587,208.59813,-5.824543
9,NorthReadingRoom-F1,5.898175,45.340249,-39.442074


In [25]:
download_df['eduroam'].mean()

125.57746196394325

In [26]:
download_df['WolfieNet-Secure'].mean()

128.3128506783433

In [19]:
map_points(
    network_df[network_df.wifi_name == "WolfieNet-Secure"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='download',
    heat_map_radius=0,
    plot_title="Heatmap of Download Speed on WolfieNet-Secure (Higher is Better)"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[heat_map_weights_col] /= 1000000
  curr_map.add_children(plugins.HeatMap(stations, radius=heat_map_radius, gradient=gradient_dict))


In [20]:
map_points(
    network_df[network_df.wifi_name == "eduroam"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='download',
    heat_map_radius=0,
    plot_title="Heatmap of Download Speed on eduroam (Higher is Better)"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[heat_map_weights_col] /= 1000000
  curr_map.add_children(plugins.HeatMap(stations, radius=heat_map_radius, gradient=gradient_dict))


#### Uploads

In [21]:
wolfie_upload = network_df[network_df.wifi_name == "WolfieNet-Secure"][["location", "upload"]].reset_index(drop=True)
eduroam_upload = network_df[network_df.wifi_name == "eduroam"][["location", "upload"]].reset_index(drop=True)
upload_df = pd.DataFrame({
    "Location": wolfie_upload.location,
    "WolfieNet-Secure": wolfie_upload.upload / 1000000,
    "eduroam": eduroam_upload.upload / 1000000
})
upload_df["diff"] = upload_df["WolfieNet-Secure"] - upload_df["eduroam"]
upload_df

Unnamed: 0,Location,WolfieNet-Secure,eduroam,diff
0,CentralReadingRoom,61.287276,79.902893,-18.615618
1,Engineering145GBM,203.839668,220.830256,-16.990588
2,EngineeringLounge,168.60273,173.483326,-4.880596
3,FreyHall-Lobby,2.898055,82.817935,-79.91988
4,Humanities2ndFloorLounge,174.538138,130.960958,43.57718
5,HumanitiesGlassArea,71.617969,87.818501,-16.200532
6,Jasmine,170.69424,148.567924,22.126316
7,NCS106,235.306,202.984366,32.321634
8,NCS120,169.889743,216.42629,-46.536547
9,NorthReadingRoom-F1,14.695338,54.894079,-40.198741


In [29]:
upload_df['eduroam'].mean()

125.28344766392723

In [30]:
upload_df['WolfieNet-Secure'].mean()

114.59135561280705

In [22]:
map_points(
    network_df[network_df.wifi_name == "WolfieNet-Secure"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='upload',
    heat_map_radius=0,
    plot_title="Heatmap of Upload Speed on WolfieNet-Secure (Higher is Better)"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[heat_map_weights_col] /= 1000000
  curr_map.add_children(plugins.HeatMap(stations, radius=heat_map_radius, gradient=gradient_dict))


In [23]:
map_points(
    network_df[network_df.wifi_name == "eduroam"], 
    lat_col="latitude",
    lon_col="longitude", 
    zoom_start=16,
    plot_points=True, 
    pt_radius=20, 
    draw_heatmap=True, 
    heat_map_weights_normalize=False,
    heat_map_weights_col='upload',
    heat_map_radius=0,
    plot_title="Heatmap of Upload Speed on eduroam (Higher is Better)"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[heat_map_weights_col] /= 1000000
  curr_map.add_children(plugins.HeatMap(stations, radius=heat_map_radius, gradient=gradient_dict))
