# Visualising Distribution Data

Here we will be using the ```seaborn``` library for data visualisation. Seaborn is a python library built on top of ```matplotlib```. It creates much more attractive plots than ```matplotlib```, and is often more concise than ```matplotlib``` when you want to customize your plots, add colors, grids etc.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# set a seaborn style of your taste
sns.set_style("whitegrid") # white,dark,darkgrid,etc

In [None]:
df = pd.read_csv("./global_sales_data/market_fact.csv")

### Histograms and Density Plots

Histograms and density plots show the frequency of a numeric variable along the y-axis, and the value along the x-axis. The ```sns.distplot()``` function plots a density curve.

In [None]:
#Normal Histogram
sns.distplot(df['Shipping_Cost'])
plt.show()

In [None]:
#With Rug Plot
sns.distplot(df['Shipping_Cost'][:200], rug=True)
plt.show()

In [None]:
#Simple Density Plot
sns.distplot(df['Sales'], hist=False)
plt.show()

# Subplots

In [None]:
# subplots

# subplot 1
plt.subplot(2, 2, 1)
plt.title('Sales')
sns.distplot(df['Sales'])

# subplot 2
plt.subplot(2, 2, 2)
plt.title('Profit')
sns.distplot(df['Profit'])

# subplot 3
plt.subplot(2, 2, 3)
# plt.title('Order Quantity')
sns.distplot(df['Order_Quantity'])

# subplot 4
plt.subplot(2, 2, 4)
# plt.title('Shipping Cost')
sns.distplot(df['Shipping_Cost'])

plt.show()


### Boxplots


In [None]:
# boxplot
sns.boxplot(df['Order_Quantity'])# Default Horizontal Axis
plt.title('Order Quantity')

plt.show()

In [None]:
sns.boxplot(y=df['Order_Quantity'])#For Vertical Axis
plt.title('Order Quantity')

plt.show()

### Joint Plot

In [None]:
sns.jointplot('Sales', 'Profit', df)
plt.show()

# same as sns.jointplot(df['Sales'], df['Profit'])

In [None]:
df = df[(df.Profit < 10000) & (df.Sales < 20000)]

sns.jointplot('Sales', 'Profit', df)
plt.show()

In [None]:
#Using Hex
df = pd.read_csv("./global_sales_data/market_fact.csv")
df = df[(df.Profit < 100) & (df.Profit > -100) & (df.Sales < 200)]
sns.jointplot('Sales', 'Profit', df, kind="hex", color="k")
plt.show()

# Crypto Currency Plotting

Something extra which will help you to plot multivariant data

In [None]:
btc = pd.read_csv("crypto_data/bitcoin_price.csv")
ether = pd.read_csv("crypto_data/ethereum_price.csv")
ltc = pd.read_csv("crypto_data/litecoin_price.csv")
monero = pd.read_csv("crypto_data/monero_price.csv")
neo = pd.read_csv("crypto_data/neo_price.csv")
quantum = pd.read_csv("crypto_data/qtum_price.csv")
ripple = pd.read_csv("crypto_data/ripple_price.csv")

In [None]:
btc.columns = btc.columns.map(lambda x: str(x) + '_a')
ether.columns = ether.columns.map(lambda x: str(x) + '_b')
ltc.columns = ltc.columns.map(lambda x: str(x) + '_c')
monero.columns = monero.columns.map(lambda x: str(x) + '_d')
neo.columns = neo.columns.map(lambda x: str(x) + '_e')
quantum.columns = quantum.columns.map(lambda x: str(x) + '_f')
ripple.columns = ripple.columns.map(lambda x: str(x) + '_g')

In [None]:
btc.head()

In [None]:
# Objective is to uncerstand dataframe merging since this was not covered previously.
m1 = pd.merge(btc, ether, how="inner", left_on="Date_a", right_on="Date_b")
m2 = pd.merge(m1, ltc, how="inner", left_on="Date_a", right_on="Date_c")
m3 = pd.merge(m2, monero, how="inner", left_on="Date_a", right_on="Date_d")
m4 = pd.merge(m3, neo, how="inner", left_on="Date_a", right_on="Date_e")
m5 = pd.merge(m4, quantum, how="inner", left_on="Date_a", right_on="Date_f")
crypto = pd.merge(m5, ripple, how="inner", left_on="Date_a", right_on="Date_g")

crypto.head()

In [None]:
curr = crypto[["Close_a", "Close_b", 'Close_c', "Close_d", "Close_e", "Close_f"]]
curr.head()

In [None]:
sns.pairplot(curr)
plt.show()

In [None]:
cor = curr.corr()
cor=round(cor, 3)
cor

# Heatmap

In [None]:
plt.figure(figsize=(10,8))#Setting Figure Size
sns.heatmap(cor, cmap="YlGnBu", annot=True)
plt.show()