<a href="https://colab.research.google.com/github/rafasyafiq/pyda-online/blob/master/day2/005_matplotlib_seaborn_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Basic Plots

In [0]:
x = np.linspace(0, 10, 50)
sines = np.sin(x)

plt.plot(x, sines)
plt.show()

In [0]:
plt.plot(x, sines, "o")
plt.show()
# use plt.plot to get color / marker abbreviations

In [0]:
# Rapid multiplot
cosines = np.cos(x)
plt.plot(x, sines, "-b", x, sines, "ob", x, cosines, "-r", x, cosines, "or")
plt.xlabel("This is X")
plt.ylabel("This is Y")
plt.title("Sin and Cosine plot")
plt.show()

In [0]:
# Step by step
plt.plot(x, sines, label='sinus', color='blue',             linestyle='--', linewidth=2)

plt.plot(x, cosines, label='cosinus', color='red', linestyle='-', linewidth=2)
plt.legend()
plt.show()


# Scatter (2D) plots

In [0]:
url = 'https://raw.githubusercontent.com/rafasyafiq/pyda-online/master/Data/salary_table.csv'

salary = pd.read_csv(url)
df = salary

## Simple scatter with colors

In [0]:
colors = colors_edu = {"Bachelor": "r",'Master':'g', 'Ph.D':'blue'}
plt.scatter(df['experience'], df['salary'], c=df['education'].apply(lambda x: colors[x]), s=100)
plt.show()

## Scatter plot with colors and symbols

In [0]:
# list(salary.groupby(['education', 'management']))

In [0]:
plt.figure(figsize=(6, 5))

symbols_manag = dict(Y="*", N=".")
colors_edu = {'Bachelor':'r', 'Master':'g', 'Ph.D':'b'}

## group by education x management => 6 groups
for values, d in salary.groupby(['education', 'management']):
    edu, manager = values
    # print(values, d)
    plt.scatter(d['experience'], d['salary'], marker=symbols_manag[manager], color=colors_edu[edu], s=150,  label=manager+"/"+edu)


## Set labels
plt.xlabel('Experience')
plt.ylabel('Salary')
plt.legend(loc=4) # lower right
plt.show()

## Boxplot

Box plots are non-parametric: they display variation in samples of a statistical population without making any assumptions of the underlying statistical distribution


In [0]:
sns.boxplot(x='education', y='salary', hue='management', data=salary)
plt.show()

# https://datascience.stackexchange.com/questions/46117/meaning-of-hue-in-seaborn-barplot

In [0]:
sns.boxplot(x="management", y="salary", hue="education", data=salary)
sns.stripplot(x="management", y="salary", hue="education", data=salary, jitter=True, dodge=True, linewidth=1)
plt.show()

## Density Plots

In [0]:
# list(salary.groupby(['education']))

f, axes = plt.subplots(3, 1, figsize=(9, 9), sharex=True)

i = 0

for edu, d in salary.groupby(['education']):
    sns.distplot(d.salary[d.management == 'Y'], color='b', bins=10, label="Manager", ax=axes[i])
    sns.distplot(d.salary[d.management == "N"], color='r', bins=10, label="Employee", ax=axes[i])
    axes[i].set_title(edu)
    axes[i].set_ylabel("Density")
    i += 1

plt.tight_layout(pad=2.0)
ax = plt.legend()
plt.show()

## Violin plot(distribution)

Combines a boxplot with the kernel density estimation procedure

In [0]:
ax = sns.violinplot(x="salary", data=salary)
# https://www.geeksforgeeks.org/violin-plot-for-data-analysis

In [0]:
ax = sns.violinplot(x="salary", data=salary, bw=.15)

In [0]:
ax = sns.violinplot(x="management", y="salary", hue="education", data=salary)

In [0]:
# tips dataset

In [0]:
tips = sns.load_dataset("tips")
tips.head()

In [0]:
ax = sns.violinplot(x=tips["total_bill"], bw=.2)

In [0]:
# group by day

ax = sns.violinplot(x="day", y="total_bill", data=tips, palette="muted")

In [0]:
# Group by day and color by time (lunch vs dinner)

ax = sns.violinplot(x="day", y="total_bill", hue="time", data=tips, palette="muted", split=True)


## Pairwise scatter plots

In [0]:
g = sns.PairGrid(salary, hue='management')
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
ax = g.add_legend()