# Matplotlib Basics

### Contents:
    1.Basics:
        1. Scatter Plot.
        2. Line Plot.
        3. Bar Chart.
        4. Subplot & Subplots.
        5. Histogram.
        6. Box and Whisker Plot.
        7. Heat Map.
        8. Animations.
        9. Interaction.
        10. Pie Chart.
    2.Real Life Examples.
       
    

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.ticker import (
    AutoLocator, AutoMinorLocator)
%matplotlib notebook

   ## Scatter plot  
    Zip function():
    1. Zip method creates a tuple from any number of given list. 
    2. Using asterisk (*) unpacks the tuple.

#### Example 1 : Using Zip function & Slicing, plot values into two seperate color and label them
   

In [3]:
import matplotlib.pyplot as plt
contents = zip([1,2,3,4,5,6,7,8,9,10],[10,20,30,40,50,60,70,80,90])
# (1, 10) (2, 20) (3, 30) (4, 40) (5, 50) (6, 60) (7, 70) (8, 80) (9, 90)

x,y = zip(*contents)
plt.scatter(x[4:], y[4:],s = 100, color = 'red', label = 'Elite')
plt.scatter(x[:4], y[:4], color = 'blue', label = 'Normal')
plt.legend(loc = 4)
plt.show()

<IPython.core.display.Javascript object>

## Line Plot

In [4]:
#inputs
x = np.array([1,2,3,4,])
y = np.array([6,11,14,16])

plt.figure()
#for plotting the inputs, adding color, label name, linestyle , width, marker size and style
plt.plot(x, y, label = '5x', color = 'lightslategrey', linestyle = '--', linewidth = 2, marker = 'o', markersize = 5)

#for adding title to the chart
plt.title('1st Chart', fontdict = {'Fontname':'MS Reference Sans Serif','Fontsize':15,'Color':'maroon'})

#for labeling the x and y axis
plt.xlabel('X axis', fontdict = {'Color':'green'})
plt.ylabel('Y axis', fontdict = {'Color':'g'})

#for showing the lagend(label name of lines)
plt.legend()

#for showing the chart
plt.show()

<IPython.core.display.Javascript object>

  plt.title('1st Chart', fontdict = {'Fontname':'MS Reference Sans Serif','Fontsize':15,'Color':'maroon'})
  plt.xlabel('X axis', fontdict = {'Color':'green'})
  plt.ylabel('Y axis', fontdict = {'Color':'g'})


##### Shorthand method
    format = '[color][marker][line]'

#### Example 2 :

In [5]:
#inputs
x = np.array([0,1,2,3,4,5,6,7])
y= x
y1 = x**2
y2 = x**3

#resize the graph
plt.figure(figsize = (6,4), dpi = 100)

#for plotting points into the canvas
plt.plot(x, y , 'g.-', label = 'line0')
plt.plot(x,y1, 'r.--', label = 'line1')
plt.plot(x,y2, 'b.-', label = 'line2')

#for adding a title in the graph
plt.title('2nd chart')

#for leveling x and y axis
plt.xlabel('X axis')
plt.ylabel('Y axis')

#to show the lebel of lines
plt.legend()

#to limit the x and y axis ticks (graph scaling)
plt.xticks([1,2,3,4,5,6,7,8,9])
plt.yticks([100,200,300,400])

#for saving the graph
plt.savefig('graph.png', dpi = 350)

# for showing the graph and close the graph (the image is closed and freed from memory)
plt.show()


<IPython.core.display.Javascript object>

#### Shading between lines

In [6]:
plt.figure()
plt.gca().fill_between(range(len(y)),
                      x,y1,
                      color = 'g', alpha = .25)
plt.gca().fill_between(range(len(y1)),
                        y1, y2,
                        color = 'blue', alpha = .25)

<IPython.core.display.Javascript object>

<matplotlib.collections.PolyCollection at 0xe0900d0>

## Bar charts

#### Example 1 

In [9]:
# inputs
plt.figure()
values = [10,20,30,40,50]
labels = ['A','B','C','D','E']

#for plotting into bar chart
bars = plt.bar(labels , values)

#for styiling the bars
bars[0].set_hatch('/')
bars[1].set_hatch('-')
bars[2].set_hatch('o')
bars[3].set_hatch('*')
bars[4].set_hatch('.')

#resizing hte figure
plt.figure(figsize = (6,4))

#show figure
plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#### Example 2 : Normal Bars 

In [10]:
linear_data = np.array([1,2,3,4,5,6,7,8])
expo_data = linear_data**2

plt.figure()
x_val = range(len(linear_data))
plt.bar(x_val, expo_data, color = 'lightblue')

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

In [11]:
plt.figure()
new_xval = []

for item in x_val:
    item = item + 0.5
    new_xval.append(item)
plt.bar(new_xval, expo_data, color='lightgreen')

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

#### Example 3 : Adding error in bar

In [12]:
plt.figure()
from random import randint
linear_err = [randint(0,15) for x in range(len(linear_data))] 

# This will plot a new set of bars with errorbars using the list of random error values
plt.bar(x_val, linear_data,  yerr=linear_err)

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

#### Example 4 : Stacking on Bars - Bottom stacking

In [10]:
linear_data = np.array([1,2,3,4,5,6,7,8])
expo_data = linear_data**2

plt.figure()
x_val = range(len(linear_data))
plt.bar(x_val, linear_data, color = 'lightblue')
plt.bar(x_val, expo_data, color = 'green', bottom = linear_data)

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

#### Example 5 : Horizontal Bar charts

In [11]:
linear_data = np.array([1,2,3,4,5,6,7,8])
expo_data = linear_data**2

plt.figure()
x_val = range(len(linear_data))
plt.barh(x_val, linear_data, color = 'lightblue')
plt.barh(x_val, expo_data, color = 'green', left = linear_data)

<IPython.core.display.Javascript object>

<BarContainer object of 8 artists>

## Subplot & Subplots

    1 .Format : plt.subplot(row, column, axis )
    2. storing axis data is a good idea.
    3. You can share x and y axis

In [3]:
# subplot with 1 row, 2 columns, and current axis is 1st subplot axes

plt.figure()
plt.subplot(1,2,1)

linear_data = np.array([1,2,3,4,5,6,7,8,9])
expo_data = linear_data ** 2
quad_data = linear_data ** 4
plt.plot(linear_data, label = 'Linear', marker = 'o')

plt.subplot(1,2,2)
plt.plot(expo_data, label = 'exponential', marker = 'o')
plt.legend()

plt.subplot(1,2,1)
plt.plot(quad_data, label = 'Quadratic', marker = '.')
plt.legend()

plt.show()

<IPython.core.display.Javascript object>

  plt.subplot(1,2,1)


    The previous Figure shows the problem that arise with different Y axis scaling, you can fix it with sharex & sharey parameter

In [4]:
plt.figure()
ax1 = plt.subplot(1,2,1)
plt.plot(linear_data, label = 'linear', marker = ".")
plt.legend()

ax2 = plt.subplot(1,2,2, sharey = ax1)
plt.plot(expo_data, label = 'Exponential', marker = 'o')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1e9f575b850>

    Create a 3x3 grid of subplots using plt.subplots() , which is a plural form of plt.subplot()

In [14]:
fig, ((ax1,ax2,ax3),(ax4,ax5,ax6),(ax7,ax8,ax9)) = plt.subplots(3,3, sharex = True, sharey = True)

ax1.plot(linear_data)
ax5.plot(expo_data)
ax9.plot(quad_data)

plt.show()

<IPython.core.display.Javascript object>

In [15]:
# set inside tick labels to visible
for ax in plt.gcf().get_axes():
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_visible(True)

In [16]:
# necessary on some systems to update the plot
plt.gcf().canvas.draw()

## Histograms 

In [17]:
# 2 by 2 Grid
fig, ((ax1,ax2), (ax3,ax4)) = plt.subplots(2,2, sharex = True)
axes = [ax1, ax2, ax3, ax4]

#Iterating through the grid and plotting histograms
for n in range(len(axes)):
    sample_size = 10**(n+1)
    sample = np.random.normal(loc = 0.0, scale = 1.0, size = sample_size)
    axes[n].hist(sample, bins = 100) # try no bins , bins = 100 etc
    axes[n].set_title('n={}'.format(sample_size))


<IPython.core.display.Javascript object>

In [18]:
plt.figure()
Y = np.random.normal(loc = 0.0, scale = 1.0, size = 10000)
X = np.random.random(size = 10000)
plt.scatter(X,Y)
plt.show()

<IPython.core.display.Javascript object>

#### Gridspec
    GridSpec class is used to specify the geometry of the grid to place a subplot. Which means, gridspec can be used to create custom grid .

#### Example : 
    1.Make a custom grid (4 graphs in a 3*3 grid space)
    2.plot a scatter plot in top_right grid and two histograms in bottom_left and bottom_right grid space
    3.Clean the bottom_left & bottom_right grid space and plot normed histograms 

In [19]:
### part 1: Making the grid
import matplotlib.gridspec as gridspec
%matplotlib notebook
plt.figure(figsize = (6,4))

gspec = gridspec.GridSpec(3,3) #Starts with zero like list indexing

top_left = plt.subplot(gspec[0,0])
top_right = plt.subplot(gspec[0,1:])
bottom_left = plt.subplot(gspec[1:,0])

bottom_right = plt.subplot(gspec[1:,1:])

<IPython.core.display.Javascript object>

In [20]:
### part 2 : plotting into grid
Y = np.random.normal(loc = 0.0, scale = 1.0, size = 10000)
X = np.random.random(size = 10000)
bottom_right.scatter(X,Y)
top_right.hist(X, bins = 100)
bottom_left.hist(Y,bins= 100, orientation = 'horizontal')
plt.show()

In [21]:
### part 3 : clear the histograms of top_right and bottom_left axis and plot normed histograms.
top_right.clear()
top_right.hist(X, bins = 100, density = True)
bottom_left.clear()
bottom_left.hist(Y, bins = 100, orientation = 'horizontal', density = 'True')
bottom_left.invert_xaxis()
plt.show()

 ## Box and Whisker Plots

In [22]:
#Creating a dataframe
import pandas as pd
normal_sample = np.random.normal(loc = 0.0, scale = 1.0, size = 10000)
random_sample = np.random.random(size = 10000)
gamma_sample = np.random.gamma(2, size = 10000)

df = pd.DataFrame({'Normal':normal_sample,
                   'Random': random_sample,
                    'Gamma': gamma_sample})
df.describe()

Unnamed: 0,Normal,Random,Gamma
count,10000.0,10000.0,10000.0
mean,0.012082,0.498566,2.023898
std,0.997038,0.28756,1.422549
min,-3.547427,0.000164,0.014927
25%,-0.67078,0.251134,0.980896
50%,0.002341,0.497885,1.705425
75%,0.674015,0.74789,2.735975
max,3.393475,0.99984,11.671992


In [23]:
plt.figure()
plt.boxplot(df['Normal'], whis = 'range')
plt.show()

<IPython.core.display.Javascript object>

  plt.boxplot(df['Normal'], whis = 'range')


In [24]:
#clear the current figure and then plot three columns again
plt.clf()
values = [df['Normal'], df['Random'], df['Gamma']]
names = ['Normal', 'Random', 'Gamma']
plt.boxplot(values, labels = names, whis = 'range')
plt.show()

  plt.boxplot(values, labels = names, whis = 'range')


#### Overlaying one axis object on top of another 
    1.Import the mpl_toolkits.axes_grid1.inset_locator toolkit.
    2.From the toolkit, use the inset_axes() function which is an object of the toolkit.

In [25]:
import mpl_toolkits.axes_grid1.inset_locator as mpl_il
plt.figure()
plt.boxplot(values, labels = names, whis = 'range')
ax2 = mpl_il.inset_axes(plt.gca(), height = '40%', width = '60%', loc = 2) # gets the current axis object, 
                                                                           # and makes another axis object on loc = 2 (top left)
                                                                           # with height = 40% and width = 60%
ax2.hist(df['Gamma'], bins = 100, label = 'Gamma')
plt.legend()
ax2.yaxis.tick_right() # moves tick to the right side so that it looks good

<IPython.core.display.Javascript object>

  plt.boxplot(values, labels = names, whis = 'range')


##### if `whis` argument isn't passed, boxplot defaults to showing 1.5*interquartile (IQR) whiskers with outliers

In [26]:
plt.figure()
plt.boxplot(values, labels = names)
plt.show()

<IPython.core.display.Javascript object>

## Heatmaps (2-Dimensional Histograms)

In [27]:
plt.figure()
Y = np.random.normal(loc = 0.0, scale = 1.0, size = 10000)
X = np.random.random(size = 10000)
plt.hist2d(X,Y, bins = 50)
plt.colorbar() # works the same as legend
plt.show()

<IPython.core.display.Javascript object>

In [28]:
# same figure with a different bin number
plt.figure()
plt.hist2d(X,Y, bins = 200)
plt.colorbar() # works the same as legend
plt.show()

<IPython.core.display.Javascript object>

## Animations

In [4]:
import matplotlib.animation as animation
n = 100
x = np.random.randn(n)

In [7]:
def updater(cur_frame):
    if (cur_frame == n):
        a.event_source.stop()
    plt.cla()
    
    bins = np.arange(-4, 4, 0.5)
    plt.hist(x[:cur_frame], bins = bins)
    plt.axis([-4, 4, 0, 30])
    plt.gca().set_title('Simulation of Normal distribution')
    plt.gca().set_xlabel('Value')
    plt.gca().set_ylabel('Frequency')
    
    plt.annotate('n={}'.format(cur_frame), [3,27])

In [8]:
fig = plt.figure()
a = animation.FuncAnimation(fig, updater, interval = 50)

<IPython.core.display.Javascript object>

## Interactivity

#### Example 1: interactivity of Button Press Event.

In [17]:
plt.figure()
data = np.random.randn(10)
plt.plot(data)

def onclick(event):
    plt.cla()
    plt.plot(data)
    plt.gca().set_title('Clicked {}*{} th pixel\n at (x={}, y={}) point '.format(event.x, event.y, 
                                                                           event.xdata, event.ydata))

plt.gcf().canvas.mpl_connect('button_press_event', onclick)

<IPython.core.display.Javascript object>

9

#### Example 2 : Interactivity with Pick Event

In [15]:

from random import shuffle
import pandas as pd
origins = ['China', 'Brazil', 'India', 'USA', 'Canada', 'UK', 'Germany', 'Iraq', 'Chile', 'Mexico']

shuffle(origins)

df = pd.DataFrame({'height': np.random.rand(10),
                   'weight': np.random.rand(10),
                   'origin': origins})
plt.figure()
plt.scatter(df['height'], df['weight'], picker = 5)
plt.gca().set_ylabel('Height')
plt.gca().set_xlabel('Weight')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'Weight')

In [34]:
def onpick(event):
    origin = df.iloc[event.ind[0]]['origin']
    plt.gca().set_title('Selected item is from {} '.format(origin))

plt.gcf().canvas.mpl_connect('pick_event', onpick)

9

# Real Life Examples 

#### Example 1 : Line chart using Gas Price Dataset

In [40]:
gas_dataset = pd.read_csv('gas_prices.csv')
country_list = list(gas_dataset.columns[1:])
gas_dataset

plt.figure(figsize = (6,4), dpi = 100)
for country in country_list:
    plt.plot(gas_dataset['Year'], gas_dataset[country], label = country, linestyle = 'solid', marker = '.') 
plt.title('Year vs Price chart')
plt.xlabel('Year')
plt.ylabel('US Dollar')
plt.legend(loc='best', bbox_to_anchor=(1,1,0,0))
plt.xticks(gas_dataset['Year'][::3])
plt.yticks([1,2,3,4,5,6,7,8])

plt.savefig('Gas Chart.jpg', dpi = 500 , bbox_inches= 'tight')
plt.show()

<IPython.core.display.Javascript object>

### Histogram Example 1 ( using Fifa data )

In [41]:
plt.figure()
fifa_dataset = pd.read_csv('fifa_data.csv')
bins = [0,10,20,30,40,50,60,70,80,90,100]
plt.hist(fifa_dataset['Overall'], bins = bins)

plt.title('Histogram based on overall rating')
plt.xlabel('Overall rating')
plt.ylabel('Numbers of players')
plt.show()

<IPython.core.display.Javascript object>

### Pie Chart - Example 1

In [48]:
plt.figure()

weights = [14.44, 36.13, 8.63, 20.08, 2.52, 2.88, 15.32]
labels = ['Manufacturer of Wearing Apperal \n(Ready Made Garments, RMG)', 'Manufacturer of Textiles',
          'Manufacturer of \nNon Metalic Mineral Products','Manufacturer of Food Products',
          'Manufacture of fabricated Metal Products \n(except machinery and equipment)',
          'Manufacture of furniture','Others']
explode = [0.005,0.05, 0.05, 0.05, 0.2, 0.1, 0.1]
plt.pie(weights, labels = labels, autopct = '%.2f %%', pctdistance = 0.73, explode = explode)
plt.title('Industries in Bangladesh', fontsize = 20)

plt.savefig('newIndustries in Bangladesh.png', dpi = 400, transparent=True , bbox_inches= 'tight')
plt.show()

<IPython.core.display.Javascript object>

### Pie Chart - Example 2

In [16]:
plt.figure()
fifa_dataset = pd.read_csv('fifa_data.csv')
left = fifa_dataset[fifa_dataset['Preferred Foot'] == 'Left'].count()[0]
right = fifa_dataset[fifa_dataset['Preferred Foot'] == 'Right'].count()[0]

labels = ['left', 'right']
colors = ['red', 'Grey']
plt.pie([left, right], labels= labels, colors = colors ,autopct = '%.2f %%', pctdistance = 0.65)

plt.title('Foot preference of FIFA players')

plt.show()

<IPython.core.display.Javascript object>

### Pie Chart - Example 3
##### Instructions: 
    
    1. Convert the 'Weight' Column from Pound to Kilogram 
        i) strip the lbs from the data
        ii) convert them to integars 
        iii) multiply by 0.453592 to get KG values
        iv) apply conditions so that if the data has NaN value, it doesn't show the error (if they have NaN values, the value remains the same)
        
    2. Plot the following groups into the pie chart.
        i) light weight = under 60 kgs
        ii) medium weight = between 60-85 kgs
        iii) over weight = over 85 kgs
        
    3. Additional Work: do task 2 again, but this time consider the height to weight ratio for the groups.
        

In [43]:
plt.figure()
weights = fifa_dataset['Weight'].to_list()
new_weights = []
for i in range(0,len(weights)):
    if type(weights[i]) == str:
        new_digit = int(weights[i].strip('lbs')) * 0.453592  
                                                            
    else:
        new_digit = weights[i]                              
    new_weights.append(new_digit)
fifa_dataset['Weight'] = new_weights

light_weight = fifa_dataset[fifa_dataset['Weight'] <= 60].count()[0]
medium_weight = fifa_dataset[(fifa_dataset['Weight'] >= 60) & (fifa_dataset['Weight'] <= 85)].count()[0]
over_weight = fifa_dataset[fifa_dataset['Weight'] >= 85].count()[0]

weights = [light_weight,medium_weight,over_weight]
labels = ['light weight(under 60 kg)','medium weight(between 60-85 kgs)','over weight(over 85 kgs)']
colors = ['#c7e0eb', '#599695', '#67aac7']
explode = [0.1,0.5,0.5]
plt.pie(weights, labels = labels, colors = colors, autopct = '%.2f %%', pctdistance = 0.65, explode = explode)
plt.title('Category of FIFA players according to weight')
plt.savefig('Category of FIFA players according to weight.jpg', dpi = 400, bbox_inches= 'tight')
plt.show()

<IPython.core.display.Javascript object>

### Box-Whisker Chart - Example 1
    Compare Real Madrid, Fc Barcelona, Juventas, Paris Saint Germain and Liverpool FC strength according to Players overall rating

In [45]:

fifa_dataset = pd.read_csv('fifa_data.csv')

Barca = fifa_dataset[fifa_dataset['Club'] == 'FC Barcelona']['Overall']
Juv = fifa_dataset[fifa_dataset['Club'] == 'Juventus']['Overall']
Madrid = fifa_dataset[fifa_dataset['Club'] == 'Real Madrid']['Overall']
PSG = fifa_dataset[fifa_dataset['Club'] == 'Paris Saint-Germain']['Overall']
Liverpool = fifa_dataset[fifa_dataset['Club'] == 'Liverpool']['Overall']

clubs = [Madrid, Barca, Juv, PSG, Liverpool]
labels = ['Real Madrid', 'FC Barcelona', 'Juventas', 'Paris Saint Germain','Liverpool FC']
plt.figure(figsize = (14,5))
boxes = plt.boxplot(clubs, labels = labels, patch_artist = True, medianprops = {'linewidth' : 3, 'color':'black'})

for box in boxes['boxes']:
    ##edge color
    box.set(color = 'maroon', linewidth = 2)
    ##box color
    box.set(facecolor = 'lightgreen')
plt.title("Comparison Between Different Clubs")
plt.ylabel('Overall Rating')
plt.savefig('Comparison Between Different Clubs.jpg', dpi = 500, bbox_inches = 'tight')
plt.show()

<IPython.core.display.Javascript object>