# Progress Report Plots

## Libraries

In [2]:
import os
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
import folium
from folium import plugins
import branca
from pprint import pprint

%matplotlib inline


## Preprocessing

In [3]:
dataset_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk("../results/") for f in filenames if os.path.splitext(f)[1] == '.csv']
website_files, network_files = [], []
for dataset_file in dataset_files:
    if "websites" in dataset_file:
        website_files.append(dataset_file)
    elif "network" in dataset_file:
        network_files.append(dataset_file)

website_df = pd.concat([pd.read_csv(f) for f in website_files], ignore_index=True)
network_df = pd.concat([pd.read_csv(f) for f in network_files], ignore_index=True)

In [4]:
website_df.head()

Unnamed: 0,destination,packet_transmit,packet_receive,packet_loss_count,packet_loss_rate,rtt_min,rtt_avg,rtt_max,rtt_mdev,packet_duplicate_count,packet_duplicate_rate,jitter,loss_rate,loss_count,location,latitude,longitude,wifi_name
0,google.com,40.0,40.0,0.0,0.0,9.547,12.937,52.278,7.213,0.0,0.0,3.751795,0.0,0.0,SchomburgA,40.91339,-73.13221,eduroam
1,youtube.com,40.0,39.0,1.0,2.5,10.032,11.614,13.654,0.744,0.0,0.0,0.978947,2.5,1.0,SchomburgA,40.91339,-73.13221,eduroam
2,tmall.com,40.0,40.0,0.0,0.0,247.545,310.718,370.803,34.253,0.0,0.0,38.846154,0.0,0.0,SchomburgA,40.91339,-73.13221,eduroam
3,facebook.com,40.0,39.0,1.0,2.5,11.671,13.271,17.899,1.166,0.0,0.0,1.123684,2.5,1.0,SchomburgA,40.91339,-73.13221,eduroam
4,qq.com,40.0,40.0,0.0,0.0,222.477,298.483,477.298,66.467,0.0,0.0,66.641026,0.0,0.0,SchomburgA,40.91339,-73.13221,eduroam


In [5]:
website_df.isnull().sum()

destination               11
packet_transmit           11
packet_receive            11
packet_loss_count         11
packet_loss_rate          11
rtt_min                   11
rtt_avg                   11
rtt_max                   11
rtt_mdev                  11
packet_duplicate_count    11
packet_duplicate_rate     11
jitter                     0
loss_rate                 11
loss_count                11
location                   0
latitude                   0
longitude                  0
wifi_name                  0
dtype: int64

Some points from Roth Cafe and Frey Hall returned NaN values, so we will drop this from the dataset

In [6]:
website_df.dropna(axis=0, how="any", inplace=True)

In [7]:
website_df.isnull().sum()

destination               0
packet_transmit           0
packet_receive            0
packet_loss_count         0
packet_loss_rate          0
rtt_min                   0
rtt_avg                   0
rtt_max                   0
rtt_mdev                  0
packet_duplicate_count    0
packet_duplicate_rate     0
jitter                    0
loss_rate                 0
loss_count                0
location                  0
latitude                  0
longitude                 0
wifi_name                 0
dtype: int64

In [8]:
print(f"Length of website_df: {len(website_df)}")

Length of website_df: 1284


In [9]:
network_df.head()

Unnamed: 0,ssid,signal_strength,download,upload,location,latitude,longitude,wifi_name
0,eduroam,-44,222595800.0,219285000.0,SchomburgA,40.91339,-73.13221,eduroam
1,WolfieNet-Secure,-43,190220000.0,120625700.0,SchomburgA,40.91339,-73.13221,WolfieNet-Secure
2,WolfieNet-Secure,-48,254404400.0,141832700.0,SchomburgA,40.913137,-73.132169,WolfieNet-Secure
3,eduroam,-50,213533800.0,130961000.0,Humanities2ndFloorLounge,40.91411,-73.12103,eduroam
4,WolfieNet-Secure,-49,197428000.0,174538100.0,Humanities2ndFloorLounge,40.91411,-73.12103,WolfieNet-Secure


In [10]:
network_df.isnull().sum()

ssid               0
signal_strength    0
download           0
upload             0
location           0
latitude           0
longitude          0
wifi_name          0
dtype: int64

In [11]:
print(f"Length of network_df: {len(network_df)}")

Length of network_df: 37


In [12]:
network_df

Unnamed: 0,ssid,signal_strength,download,upload,location,latitude,longitude,wifi_name
0,eduroam,-44,222595800.0,219285000.0,SchomburgA,40.91339,-73.13221,eduroam
1,WolfieNet-Secure,-43,190220000.0,120625700.0,SchomburgA,40.91339,-73.13221,WolfieNet-Secure
2,WolfieNet-Secure,-48,254404400.0,141832700.0,SchomburgA,40.913137,-73.132169,WolfieNet-Secure
3,eduroam,-50,213533800.0,130961000.0,Humanities2ndFloorLounge,40.91411,-73.12103,eduroam
4,WolfieNet-Secure,-49,197428000.0,174538100.0,Humanities2ndFloorLounge,40.91411,-73.12103,WolfieNet-Secure
5,eduroam,-62,99560180.0,135713900.0,StudentUnion-F2,40.917092,-73.122437,eduroam
6,WolfieNet-Secure,-60,113702500.0,136557800.0,StudentUnion-F2,40.917092,-73.122437,WolfieNet-Secure
7,eduroam,-48,248331200.0,173483300.0,EngineeringLounge,40.91309,-73.12455,eduroam
8,WolfieNet-Secure,-42,203401200.0,168602700.0,EngineeringLounge,40.91309,-73.12455,WolfieNet-Secure
9,eduroam,-53,214483800.0,220830300.0,Engineering145GBM,40.91298,-73.1244,eduroam


In [13]:
website_df

Unnamed: 0,destination,packet_transmit,packet_receive,packet_loss_count,packet_loss_rate,rtt_min,rtt_avg,rtt_max,rtt_mdev,packet_duplicate_count,packet_duplicate_rate,jitter,loss_rate,loss_count,location,latitude,longitude,wifi_name
0,google.com,40.0,40.0,0.0,0.0,9.547,12.937,52.278,7.213,0.0,0.0,3.751795,0.0,0.0,SchomburgA,40.913390,-73.132210,eduroam
1,youtube.com,40.0,39.0,1.0,2.5,10.032,11.614,13.654,0.744,0.0,0.0,0.978947,2.5,1.0,SchomburgA,40.913390,-73.132210,eduroam
2,tmall.com,40.0,40.0,0.0,0.0,247.545,310.718,370.803,34.253,0.0,0.0,38.846154,0.0,0.0,SchomburgA,40.913390,-73.132210,eduroam
3,facebook.com,40.0,39.0,1.0,2.5,11.671,13.271,17.899,1.166,0.0,0.0,1.123684,2.5,1.0,SchomburgA,40.913390,-73.132210,eduroam
4,qq.com,40.0,40.0,0.0,0.0,222.477,298.483,477.298,66.467,0.0,0.0,66.641026,0.0,0.0,SchomburgA,40.913390,-73.132210,eduroam
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1290,discord.com,40.0,40.0,0.0,0.0,6.849,77.831,342.538,92.057,0.0,0.0,48.602256,0.0,0.0,NorthReadingRoom-F1,40.916034,-73.122723,WolfieNet-Secure
1291,quizlet.com,40.0,40.0,0.0,0.0,9.545,76.555,306.264,63.958,0.0,0.0,50.058256,0.0,0.0,NorthReadingRoom-F1,40.916034,-73.122723,WolfieNet-Secure
1292,github.com,40.0,40.0,0.0,0.0,15.892,77.098,244.655,62.112,0.0,0.0,42.872538,0.0,0.0,NorthReadingRoom-F1,40.916034,-73.122723,WolfieNet-Secure
1293,psns.cc.stonybrook.edu,40.0,40.0,0.0,0.0,9.131,76.306,324.948,74.961,0.0,0.0,51.688077,0.0,0.0,NorthReadingRoom-F1,40.916034,-73.122723,WolfieNet-Secure


In [14]:
final_df = pd.merge(network_df, website_df, how='left')
final_df

Unnamed: 0,ssid,signal_strength,download,upload,location,latitude,longitude,wifi_name,destination,packet_transmit,...,packet_loss_rate,rtt_min,rtt_avg,rtt_max,rtt_mdev,packet_duplicate_count,packet_duplicate_rate,jitter,loss_rate,loss_count
0,eduroam,-44,2.225958e+08,2.192850e+08,SchomburgA,40.913390,-73.132210,eduroam,google.com,40.0,...,0.0,9.547,12.937,52.278,7.213,0.0,0.0,3.751795,0.0,0.0
1,eduroam,-44,2.225958e+08,2.192850e+08,SchomburgA,40.913390,-73.132210,eduroam,youtube.com,40.0,...,2.5,10.032,11.614,13.654,0.744,0.0,0.0,0.978947,2.5,1.0
2,eduroam,-44,2.225958e+08,2.192850e+08,SchomburgA,40.913390,-73.132210,eduroam,tmall.com,40.0,...,0.0,247.545,310.718,370.803,34.253,0.0,0.0,38.846154,0.0,0.0
3,eduroam,-44,2.225958e+08,2.192850e+08,SchomburgA,40.913390,-73.132210,eduroam,facebook.com,40.0,...,2.5,11.671,13.271,17.899,1.166,0.0,0.0,1.123684,2.5,1.0
4,eduroam,-44,2.225958e+08,2.192850e+08,SchomburgA,40.913390,-73.132210,eduroam,qq.com,40.0,...,0.0,222.477,298.483,477.298,66.467,0.0,0.0,66.641026,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1279,WolfieNet-Secure,-58,5.898175e+06,1.469534e+07,NorthReadingRoom-F1,40.916034,-73.122723,WolfieNet-Secure,discord.com,40.0,...,0.0,6.849,77.831,342.538,92.057,0.0,0.0,48.602256,0.0,0.0
1280,WolfieNet-Secure,-58,5.898175e+06,1.469534e+07,NorthReadingRoom-F1,40.916034,-73.122723,WolfieNet-Secure,quizlet.com,40.0,...,0.0,9.545,76.555,306.264,63.958,0.0,0.0,50.058256,0.0,0.0
1281,WolfieNet-Secure,-58,5.898175e+06,1.469534e+07,NorthReadingRoom-F1,40.916034,-73.122723,WolfieNet-Secure,github.com,40.0,...,0.0,15.892,77.098,244.655,62.112,0.0,0.0,42.872538,0.0,0.0
1282,WolfieNet-Secure,-58,5.898175e+06,1.469534e+07,NorthReadingRoom-F1,40.916034,-73.122723,WolfieNet-Secure,psns.cc.stonybrook.edu,40.0,...,0.0,9.131,76.306,324.948,74.961,0.0,0.0,51.688077,0.0,0.0


In [16]:
final_df.to_csv('integrated_data.csv')