In [None]:
#Let's bring in a dataset and try different styles and different libraries
import seaborn as sns
import warnings                         #We do this to keep from seeing lots of info junk printing out
warnings.filterwarnings('ignore')
import plotly
df = sns.load_dataset('taxis');
print(df.head())

Week 2: set up style sheet to demonstrate fundamental principles

In [None]:
from matplotlib import pyplot as plt

We will mostly use the Seaborn library for visualization in Python. Seaborn is built on Matplotlib, so a lot of your code may use a combo of Seaborn (sns) and Matplotlib Pyplot (plt) functions. Matplotlib has a built-in style library (plt.style.use()), as does Seaborn (sns.set_style()).   

Here, we'll start out manually defining a style. Once we have one we like, we can drop the code into a mplstyle file and save it where the other style files are, so we can call it in the same way. 

In [None]:
sns.axes_style()

In [None]:
#To see visual examples of matplotlib styles, go to https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html
#To see what style adustments can be made with Matplotlib, do:
plt.style.available

In [None]:
#Now, let's try some of those to see how the aesthetics change. First, let's look at the default plot.
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize=(20,5))
sns.barplot(df['pickup_borough'],df['total'], ax = ax[0])
sns.histplot(df['total'], ax = ax[1])

In [None]:
#Here's what I need to define my own personal styles. I can save this as a style to call from the matplotlib folder.
#That way, instead of all the stuff that is needed for my_rcParams() being in this file, I would just call plt.style.use(my_rcParams)
#in the my_styleXY function.
axes = {'spines.right' : False,
        'spines.left' : False,
        'grid' : True,
        'grid.axis' : 'y',
        'grid.which' : 'major',
        'titlesize' : 16,
        'titleweight' : 'bold'}
lines = {'markerfacecolor' : 'gray'}
grid = {'color' : 'black',
        'alpha' : 0.1}
figure = {'figsize' : (10,5),
          'titlesize' : 25}
font = {'font.family':'serif',
        'font.style':'italic',
        'font.weight':800}
xtick = {'labelsize' : 5}
ytick = {'labelsize' : 10}

def my_rcParams():
        plt.style.use('default')
        plt.rc('axes', **axes),
        plt.rc('grid', **grid),
        plt.rc('ytick', **ytick),
        plt.rc('figure', **figure),
        plt.rc('lines', **lines),

#You could, in theory, turn this function into a library package, just like any other that you'd install / import.        
def my_styleXY(kind, x, y, nrows, ncols, index, xtext, ytext, suptitle, title, source, highlight):
        my_rcParams()
        ax = plt.subplot(nrows, ncols, index)
        if kind == 'bar': sns.barplot(x = x, y = y, color = 'gray', ax = ax)
        if kind == 'scatter': sns.scatterplot(x = x, y = y, color = 'gray', ax = ax)
        if kind == 'violin': sns.violinplot(x = x, y = y, color = 'gray', ax = ax)
        ax.xaxis.set_label_text(xtext)
        ax.yaxis.set_label_text(ytext)
        heights = [p.get_height() for p in ax.patches]
        highest = sorted(heights)[len(heights)-1]
        for p in ax.patches:
                if p.get_height() == highest:
                        if highlight == "TRUE":
                                p.set_color('darkred')
        print(heights)
        fig.suptitle(suptitle, x = .15, fontsize = 16, 
                     fontstyle = 'italic', font = 'times new roman', 
                     transform = fig.transFigure, clip_on = False)
        ax.set_title(title, loc = 'left', fontsize = 12, fontstyle = 'italic', font = 'arial')
        ax.text(.04, -.01, source, transform=fig.transFigure, fontsize = 10, alpha = .7)


In [None]:
#Once my style is developed, this is all I need to do to create consistently styled graphs

fig = plt.figure()

# We define my_styleXY as follows: (kind (can be bar, scatter, violin), x, y, nrows (in figure), ncols (in figure), index (of subplot in figure), 
# xtext (x axis label), ytext (y axis label), suptitle (overall figure title), title (subplot title), source (where you got the data))

my_styleXY('bar', df['pickup_borough'], df['total'], 1, 1, 1, "Pickup borough",
               "Total bill","Different plots of tips",'Queens has the best tips',
               "Source: This dataset is on taxis from Seaborn", 'FALSE')

The graph above uses several principles we discussed as fundamental:

- start with gray
- readable axes labels
- main takeaway title / subtitle
- source is cited
- no lie factor
- minimal chart junk
- aligned objects (if we had stacked bars instead, we would have misaligned 'y' values)

Still, we could do better with this graph. It's a bit boring. For instance, how would you draw attention to the most notable element? How would you lead the viewer through the graph? How would you tell the story? What question does this answer?

In [None]:
fig = plt.figure()
# We define my_styleXY as follows: (kind (can be bar, scatter, violin), x, y, nrows (in figure), ncols (in figure), index (of subplot in figure), 
# xtext (x axis label), ytext (y axis label), suptitle (overall figure title), title (subplot title), source (where you got the data))
my_styleXY('bar', df['pickup_borough'], df['total'], 1, 1, 1, "Pickup borough",
               "Total bill","Different plots of tips",'Queens has the best tips',
               "Source: This dataset is on taxis from Seaborn", "TRUE")
plt.show()



In [None]:

highest = sorted(heights)[len(heights)-1]
for p in ax.patches:
    if  p.get_height() == highest:
        p.set_color('darkred')
plt.title("tips")
plt.show()

In [None]:
#Options are below for plots using Seaborn. Examples in following blocks.

'''
(data, kind, x, y, style, col, size, ci, row, hue, palette, order, orient, height, aspect, markers, linestyles, split, inner, col_wrap)
viz = sns.relplot(                      #What category of plot (catplot, relplot, displot)
    data=data,                          #What is your dataset?
    data=data.query(use " " and/or),    #Want to slice out any part of it for this viz?     
    kind = "line",                      #Plot type [relplot (line, scatter), catplot (strip, box, violin, boxen, point, bar, count, swarm)
                                        displot (hist, kde, ecdf), lmplot, regplot, residplot, heatmap, clustermap]
    x = "flipper_length_mm",            #What is your X
    y = "bill_length_mm",               #What is your Y   
    style = ,                           #Similar to hue, set marker style based on category
    col = "species",                    #Want to break out into small multiples vertically?    
    size = ,                            #Automatically normalized into ranges, size of marker or line by value
    ci = "sd",                          #Confidence interval for line plots ["none", "sd"]    
    row = ,                             #Use row with or instead of col    
    hue = "sex",                        #Want to color by a 3rd feature? Lineplot shows multiple lines. Or,    
    palette = {" " : " ", " " : " "}    #Want to specify colors for each category?
    order = [" ", " "],                 #Order your categories explicitly, or data = data.sort_values("feature")
    orient = "h",                       #Want to swap x and y?     
    height = , aspect = ,               #Want to set the height and width?    
    markers = ["^", "o"],               #Want some cool markers?
    linestyles = ["--"]                 #Want some cool linestyles? Line plot shows multiple lines.
    split = True,                       #Use to split violin plot into two hues
    inner = "stick"                     #Use to add value markers inside violin plot
    col_wrap = 5                        #Wraps values in small multiples of 5 columns, across many values    
    )

viz.set_axis_labels("Flipper length", "Bill length")
viz.set(xscale = "log")                 #Use to convert x to log scale
viz.figure.autofmt_xdate()              #Use when plotting time series, to format date
viz2 = sns.type(args, ax = viz.ax)      #Use to add a second type of plot to the same axis
'''