# Initial data import 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os.path
import pandas as pd
CSV_PATH = 'processed_data/q6.csv'
if not os.path.isfile(CSV_PATH):
    from preprocessing import q6
    q6.process()

df = pd.read_csv(CSV_PATH)
df

# Pivoted Dataframe
We use the pandas pivot function to reshape our data. Note that the data is mirrored across the line of missing(nan) values. This is because the self-join query returned data for (c1,c2) and (c2,c1) pairs e.g. (US,RU) and (RU,US). We will clean this up in the next step. 

In [None]:
pv = df.pivot(index='c1', columns='c2', values='shared_percent')
pv.rename_axis(None).style.set_table_styles([{'selector': '.index_name', 'props': [('font-size', '0')]}])

# Generate heatmap
Uses numpy to generate a mask which hides the upper-half of the data in the plot. 

In [None]:
sns.set(style="ticks", font_scale=1.2)
%matplotlib inline
plt.figure(figsize=(12,8)) 


mask = np.zeros_like(pv, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

ax = sns.heatmap(pv, cmap = sns.color_palette("YlGnBu", 10), 
                 annot=True, annot_kws = {'size': 'large'}, fmt='.2%',
                 mask = mask,
                 cbar = False
                )
# fix for mpl bug that cuts off top/bottom of seaborn viz
b, t = plt.ylim() # discover the values for bottom and top
b += 0.5 # Add 0.5 to the bottom
t -= 0.5 # Subtract 0.5 from the top
plt.ylim(b, t) # update the ylim(bottom, top) values

ax.xaxis.get_major_ticks()[-1].set_visible(False)
ax.yaxis.get_major_ticks()[0].set_visible(False)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
ax.set_xlabel('')
ax.set_ylabel('');