In [1]:
import pandas as pd
import numpy as np
import pickle

from bokeh.plotting import figure, show
from bokeh.layouts import layout,column,row
from bokeh.models import ColumnDataSource,Slider


from bokeh.io import curdoc,curstate
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Select

import warnings
warnings.filterwarnings("ignore")

from bokeh.models.formatters import CategoricalTickFormatter
from bokeh.models.mappers import CategoricalColorMapper
from bokeh.models import (
    HoverTool,
    LinearColorMapper
)
import bokeh.palettes as pt

In [2]:
from bokeh.io import output_notebook, push_notebook
output_notebook()

In [3]:
suite_dict=pickle.load(open('suite_dict.pkl','rb'))
print(len(suite_dict.keys()))
source=ColumnDataSource(data=dict(x=list(suite_dict.keys()),y=list(suite_dict.values())))


81


In [27]:
def get_barchart(source,suite_dict,loc,x_label,y_label,title="Cipher suite distribution"):
    values=np.array(list(suite_dict.values()))
    x_range_=list(suite_dict.keys())
    y_range_=(np.min(values),np.max(values))
    
    p=pt.inferno(len(x_range_))
    mapper=CategoricalColorMapper(palette=p,factors=x_range_)    
  
    plot=figure(x_range=x_range_, plot_height=550,plot_width=770,toolbar_location='above',\
                 title=title,x_axis_label=x_label,y_axis_label=y_label)
    
    plot.vbar(x='x',top='y',bottom=0,width=0.5,source=source,legend='Cipher suites',\
               fill_color={'field':'x','transform':mapper}) 
    plot.add_tools(HoverTool(tooltips=[("Cipher suite","@x"),("Count","@y")], show_arrow=False,\
                            point_policy='follow_mouse'))
                            
    plot.xaxis.major_label_orientation = np.pi/2  
    plot.legend.orientation = "horizontal"
    plot.legend.location = loc
    plot.xaxis.major_label_text_font_size ="7pt" 
    
    return plot


In [44]:
from collections import OrderedDict
try:
    del suite_dict['TLSv1.2']
except:
    pass 
sorted_suite_dict=OrderedDict(sorted(suite_dict.items(),key=lambda x:-x[1]))
#this is a list of tuples that can be made to dict.
source2=ColumnDataSource(data=dict(x=list(sorted_suite_dict.keys()),y=list(sorted_suite_dict.values())))
plot1=get_barchart(source2,sorted_suite_dict,"top_right",title="Cipher Suites vs Number of Websites",\
                   x_label="Cipher Suites",y_label="Number of websites") 
show(plot1)                         

In [42]:
cipher_suites_df=pd.read_csv('./crawl_cipher_suites.csv',header=None,names=["Company","CipherSuite","CDN"])
cipher_suites_df.ix[cipher_suites_df['CDN']==100,'CDN']= 5
cipher_suites_df.head()
suites=np.unique(cipher_suites_df['CipherSuite'].values)
print(suites)
grouped=cipher_suites_df.groupby(['CipherSuite'])

def get_group(suite):
    global grouped
    return grouped.get_group(suite)

mean_dict=grouped.mean().to_dict()['CDN']
sorted_mean_dict=OrderedDict(sorted(mean_dict.items(),key=lambda x: x[1]))
source3=ColumnDataSource(data=dict(x=list(sorted_mean_dict.keys()),y=list(sorted_mean_dict.values())))
plot3=get_barchart(source3,sorted_mean_dict,"top_left",x_label="CipherSuite",y_label="Mean CDT(seconds)",\
                   title="CipherSuite Vs Mean CDT") 
show(plot3)                         

['DH+3DES' 'DH+AES' 'DH+AES256' 'DH+AESGCM' 'DH+CAMELLIA' 'DH+HIGH'
 'ECDH+3DES' 'ECDH+AES128' 'ECDH+AES256' 'ECDH+AESGCM' 'ECDH+HIGH'
 'ECDHE+ECDSA+3DES' 'ECDHE+ECDSA+AES128' 'ECDHE+ECDSA+AES256' 'RSA+3DES'
 'RSA+AES' 'RSA+AESGCM' 'RSA+HIGH' 'http']


In [41]:
cipher_suites_df=pd.read_csv('./crawl_cipher_suites.csv',header=None,names=["Company","CipherSuite","CDN"])
cipher_suites_df.ix[cipher_suites_df['CDN']==100,'CDN']= 5
cipher_suites_df.head()
suites=np.unique(cipher_suites_df['CipherSuite'].values)
print(suites)
grouped=cipher_suites_df.groupby(['CipherSuite'])

def get_group(suite):
    global grouped
    return grouped.get_group(suite)

median_dict=grouped.median().to_dict()['CDN']
sorted_median_dict=OrderedDict(sorted(median_dict.items(),key=lambda x: x[1]))
source6=ColumnDataSource(data=dict(x=list(sorted_median_dict.keys()),y=list(sorted_median_dict.values())))
plot6=get_barchart(source6,sorted_median_dict,"top_left",x_label="CipherSuite",y_label="Median CDT(seconds)",\
                   title="CipherSuite Vs Median CDT") 
show(plot6)                         

['DH+3DES' 'DH+AES' 'DH+AES256' 'DH+AESGCM' 'DH+CAMELLIA' 'DH+HIGH'
 'ECDH+3DES' 'ECDH+AES128' 'ECDH+AES256' 'ECDH+AESGCM' 'ECDH+HIGH'
 'ECDHE+ECDSA+3DES' 'ECDHE+ECDSA+AES128' 'ECDHE+ECDSA+AES256' 'RSA+3DES'
 'RSA+AES' 'RSA+AESGCM' 'RSA+HIGH' 'http']


In [43]:
min_dict=grouped.min().to_dict()['CDN']
sorted_min_dict=OrderedDict(sorted(min_dict.items(),key=lambda x: x[1]))
source4=ColumnDataSource(data=dict(x=list(sorted_min_dict.keys()),y=list(sorted_min_dict.values())))
plot2=get_barchart(source4,sorted_min_dict,"top_left",x_label="CipherSuite",y_label="Min CDT(seconds)",\
                   title="CipherSuite Vs Min CDT") 
show(plot2)

In [39]:
cipher_suites_df=pd.read_csv('./crawl_cipher_suites.csv',header=None,names=["Company","CipherSuite","CDN"])
cipher_suites_df.ix[cipher_suites_df['CDN']==100,'CDN']= 5
cipher_suites_df['Company']=list(map(lambda x: 'www.'+x,cipher_suites_df['Company']))
companies=np.unique(cipher_suites_df['Company']).tolist()
print(len(companies))
#companies=list(map(lambda x: 'www.'+x,companies))
grouped_company=cipher_suites_df.groupby(['Company'])
scatter=None
select=None

def get_group(company_name):
    global grouped_company
    return grouped_company.get_group(company_name)

g_df=get_group('www.google.com')
ciphers=g_df['CipherSuite'].values
source=ColumnDataSource(data=dict(x=g_df['CipherSuite'].tolist(),y=g_df['CDN'].tolist()))

def update_plot(attr,old,new):
    global select,scatter,source
    selected_website=select.value
    scatter.title.text='Access times for'+ selected_website
    g_df=get_group(selected_website)
    source.data['x']=g_df['CipherSuite'].tolist()
    source.data['y']=g_df['CDN'].tolist()
    
     
select = Select(title="Website", value="www.google.com", options=companies)
select.on_change('value',update_plot)

def get_scatter():
    global source,scatter,ciphers
    x_range_=list(ciphers)

    values=source.data['y']
    y_range_=(np.min(values),np.max(values))
    
    p=pt.inferno(len(x_range_))
    mapper=CategoricalColorMapper(palette=list(p),factors=x_range_)    
  
    plot=figure(x_range=x_range_,plot_height=350,plot_width=500,toolbar_location='above',x_axis_label="CiphreSuites",\
               y_axis_label="Access Times")
    
    plot.title.text='Access times for www.google.com'
    plot.circle(x='x',y='y',source=source,size=12,\
               fill_color={'field':'x','transform':mapper}) 
    
    plot.add_tools(HoverTool(tooltips=[("Cipher suite","@x"),("AccessTime","@y")], show_arrow=False,\
                            point_policy='follow_mouse'))
                            
    plot.xaxis.major_label_orientation = np.pi/2  
    return plot
    
scatter=get_scatter()

layout=(column(row(scatter,widgetbox(select)),plot1,plot2,plot3))
curdoc().add_root(layout)

532
