# Pandas Plotting

In [None]:
# Pandas and numpy for data manipulation
import pandas as pd
import numpy as np

# Matplotlib and seaborn for plotting
import matplotlib.pyplot as plt
%matplotlib inline

## Series.plot()

In [None]:
random_series = np.random.randn(200)
ts = pd.Series(random_series, index=pd.date_range('1/1/2022', periods=200))
ts.plot()

In [None]:
ts.cumsum().plot()

## DataFrame.plot()

In [None]:
random_serie_1 = np.random.randn(1000)
random_serie_2 = np.random.randn(1000)

df = pd.DataFrame({'serie_1':random_serie_1, 'serie_2':random_serie_2}, index=pd.date_range('1/1/2018', periods=1000))
df.cumsum().plot()

plot() function can be combined with matplotlib plotting functions

In [None]:
ts = pd.date_range('1/1/2005', periods=365)

df = pd.DataFrame(np.random.randn(365, 4), index=ts, columns=list('ABCD'))
df = df.cumsum()
df.plot(figsize=(5,5))
plt.title("Title");
df.head(5)

In [None]:
df_milsa = pd.read_csv("../datasets/milsa.csv", index_col='Funcionario')
df_milsa.head(10)

In [None]:
df_milsa.sort_values(by=['Anos']).plot(x='Anos', y='Salario')

## Plots - Part 1

Plotting methods allow for a handful of plot styles other than the default line plot. These methods can be provided as the kind keyword argument to plot(), and include:
* ‘bar’ or ‘barh’ for bar plots
* ‘hist’ for histogram
* ‘box’ for boxplot
* ‘kde’ or ‘density’ for density plots
* ‘area’ for area plots
* ‘scatter’ for scatter plots
* ‘hexbin’ for hexagonal bin plots
* ‘pie’ for pie plots


### Area Plot

In [None]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
df.plot.area();

In [None]:
df.plot.area(stacked=False);

### Scatter Plot

In [None]:
df = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd'])
df.plot.scatter(x='a', y='b');

In [None]:
ax = df.plot.scatter(x='a', y='b', color='DarkBlue', label='Group 1');
df.plot.scatter(x='c', y='d', color='DarkGreen', label='Group 2', ax=ax); # multicolumn

In [None]:
df.plot.scatter(x='a', y='b', c='c', s=50);  # color

In [None]:
df.plot.scatter(x='a', y='b', s=df['c']*200);

In [None]:
df.plot.scatter(x='a', y='b', c='c', s=df['c']*200);

### Bars and Horizontal Bars

In [None]:
df = pd.DataFrame(np.random.randn(365, 4), index=ts, columns=list('ABCD'))

df.iloc[5].plot(kind='bar')

plt.figure();df.iloc[6].plot.bar();

In [None]:
df.iloc[:5].plot(kind='bar');

In [None]:
plt.title("Data viz facens rocks!"); df.iloc[5].plot.bar(); plt.axhline(0, color='blue')

In [None]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])


In [None]:
df.head()

In [None]:
df.plot.barh()
df.plot.bar()
df.plot.bar(stacked=True)
df.plot.barh(stacked=True)

## Plots - Part 2

### Histograms

In [None]:
df_milsa['Salario'].plot.hist(bins=5)

In [None]:
s = np.random.randn(1000)
plt.hist(s, color = 'blue', edgecolor = 'black',bins = 50)

In [None]:
df = pd.DataFrame(np.random.rand(1200), columns=['a'])
df.plot.hist()

df = pd.DataFrame(np.random.randn(1200), columns=['a'])
df.plot.hist(bins=100 )

In [None]:
df4 = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000)}, columns=['a', 'b'])
df4.plot.hist(alpha=0.5)

In [None]:
df4['a'].plot.hist(orientation='horizontal')

In [None]:
df4.head()

In [None]:
df4['a'].hist(color='k', alpha=0.5, bins=50)
#df4['b'].hist(color='k', alpha=0.5, bins=50)
#df4.hist(color='k', alpha=0.5, bins=50)

### Density

In [None]:
ser = pd.Series(np.random.randn(1000))
ser.plot.hist()

In [None]:
ser.plot.kde()

In [None]:
ser = pd.Series(np.random.randn(1000))
ser.plot.hist(density=True);ser.plot.kde()

In [None]:
ser = pd.Series(np.random.randn(1000))
ser.plot.hist(density=True, bins=100);ser.plot.kde()

### Box 

<img src="../../resources/images/simple.box.defs.gif" width="25%" />


In [None]:
df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
df.plot.box()

In [None]:
df_milsa = pd.read_csv("../datasets/milsa.csv", index_col="Funcionario")
df_milsa.head()

In [None]:
df = pd.DataFrame(df_milsa[['Salario','Anos', 'Filhos']])
df.plot.box()

In [None]:
color = dict(boxes='DarkGreen', whiskers='DarkOrange',medians='DarkBlue', caps='Gray')
df.plot.box(color=color, sym='r*')

In [None]:
df.boxplot()

### Violin Plots

#### Comparison with Box Plot

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

# generate some random data
all_data = [np.random.randn(100) for std in range(0, 4)]


# plot box plot
axes[1].boxplot(all_data)
axes[1].set_title('box plot')

# plot violin plot
axes[0].violinplot(all_data,showmeans=False,showmedians=True)
axes[0].set_title('violin plot')


# adding horizontal grid lines
for ax in axes:
    ax.yaxis.grid(True)

plt.show()

In [None]:
df_milsa = pd.read_csv("../datasets/milsa.csv", index_col='Funcionario')
df_milsa.head(10)

In [None]:
plt.violinplot(df_milsa['Salario'],showmeans=False, showmedians=True) #default
plt.xticks([0,1,2], ('', 'Salario',''))
plt.show()
plt.violinplot(df_milsa['Salario'],showmeans=False, showmedians=True,widths = 0.5, bw_method=0.1) #default
plt.xticks([0,1,2], ('', 'Salario',''))
plt.show()


In [None]:
plt.violinplot(df_milsa['Salario'],showmeans=True, showmedians=True) 
plt.xticks([0,1,2], ('', 'Salario',''))
plt.show()
plt.violinplot(df_milsa['Salario'],showmeans=True, showmedians=True,bw_method=0.05)
plt.xticks([0,1,2], ('', 'Salario',''))
plt.show()

In [None]:
plt.violinplot([df_milsa['Salario'],df_milsa["Anos"]],showmeans=True, showmedians=True,widths = 0.1, bw_method=0.1) #default
plt.xticks([0,1,2], ('', 'Salario','Anos'))

plt.show()

In [None]:
plt.violinplot(all_data,showmeans=True, showmedians=False, widths = 0.5, bw_method=0.1) #default
plt.show()
plt.violinplot(all_data,showmeans=True, showmedians=False, widths = 1,   bw_method=0.1)
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(20, 3))

bw_method =[0.1 ,0.2,0.3 ,0.4,None] #last = default

# variating width and the estimator bandwidth of the KDE
for i in range(len(axes)):
    axes[i].violinplot(all_data,showmeans=True, showmedians=False, widths = 1, bw_method=bw_method[i])
    axes[i].set_title('estimator bandwidth = {0}'.format(bw_method[i]))

# adding horizontal grid lines
for ax in axes:
    ax.yaxis.grid(True)

plt.show()

## Plotting Features / Strategies

### Error Bars

In [None]:
df = pd.DataFrame(np.random.randn(365, 4), index=ts, columns=list('ABCD'))

df.iloc[5].plot(kind='bar')

plt.figure();df.iloc[6].plot.bar(yerr=0.3);

In [None]:
df = pd.DataFrame(np.random.rand(15, 4), columns=['a', 'b', 'c', 'd'])
df.plot.scatter(x='a', y='b', yerr=0.06);

In [None]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
df.plot.scatter(x='a', y='b', yerr=0.06,xerr=0.02);

### Subplots

In [None]:
df.head()

In [None]:
df.plot(subplots=True, figsize=(6, 6));

In [None]:
df.plot(subplots=True, layout=(2, 2), figsize=(6, 6), sharex=False);