# Data Visualization Collection
- Here I put all the great visualization libraries/tools/etc with examples

In [57]:
### General ###

import numpy as np
import pandas as pd
import calendar

#### Visualization Tools ###

# Base of sns
import matplotlib.pyplot as plt

# Born on top of matplotlib, but more attractive
import seaborn as sns

# For interactive visualization, not good for too many datapoints
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go

**Sources**

https://www.kaggle.com/code/ekrembayar/store-sales-ts-forecasting-a-comprehensive-guide

https://www.kaggle.com/code/kashishrastogi/store-sales-analysis-time-serie?scriptVersionId=81112640


In [58]:
transactions = pd.read_csv("./test_data/transactions.csv").sort_values(["store_nbr", "date"])
transactions["date"] = pd.to_datetime(transactions.date)
train = pd.read_csv("./test_data/train.csv")
train["date"] = pd.to_datetime(train.date)
train.sales = train.sales.astype("float32")

FileNotFoundError: [Errno 2] No such file or directory: './test_data/transactions.csv'

In [None]:
transactions.head()

In [None]:
train.head()

In [None]:
# Visualization of transactions made in 5 years grouped by month.
# Relevant statistics can be found by hovering on the candle stick rectangle.
a = transactions.copy()
a["year"] = a.date.dt.year
a["month"] = a.date.dt.month
px.box(a, x="year", y="transactions" , color = "month", title = "Transactions")

In [None]:
# Resample the transactions monthly with the mean.
a = transactions.set_index("date").resample("M").transactions.mean().reset_index()
a["year"] = a.date.dt.year
px.line(a, x='date', y='transactions', color='year',title = "Monthly Average Transactions" )

In [None]:
temp = pd.merge(train.groupby(["date", "store_nbr"]).sales.sum().reset_index(), transactions, how = "left")
print("Spearman Correlation between Total Sales and Transactions: {:,.4f}".format(temp.corr("spearman").sales.loc["transactions"]))
px.scatter(temp, x = "transactions", y = "sales", trendline = "ols", trendline_color_override = "red")

In [None]:
# Transactions made every day of the week
a = transactions.copy()
a["year"] = a.date.dt.year
a["dayofweek"] = a.date.dt.dayofweek+1
a = a.groupby(["year", "dayofweek"]).transactions.mean().reset_index()
px.line(a, x="dayofweek", y="transactions" , color = "year", title = "Transactions")

In [None]:
a = train[["store_nbr", "sales"]].copy()
a["ind"] = 1
a["ind"] = a.groupby("store_nbr").ind.cumsum().values

a = pd.pivot(a, index = "ind", columns = "store_nbr", values = "sales").corr()

# Upper triangle of an array
mask = np.triu(a.corr())

plt.figure(figsize=(20, 20))

sns.heatmap(a,
        annot=True,
        fmt='.1f',
        cmap='coolwarm',
        square=True,
        mask=mask,
        linewidths=1,
        cbar=False)

plt.title("Correlations among stores",fontsize = 20)
plt.show()

In [None]:
a = train.set_index("date").groupby("store_nbr").resample("D").sales.sum().reset_index()
px.line(a, x = "date", y= "sales", color = "store_nbr", title = "Daily total sales of the stores")

In [None]:
a = train.groupby("family").sales.mean().sort_values(ascending = False).reset_index()
px.bar(a, y = "family", x="sales", color = "family", title = "Which product family preferred more?")

In [None]:
# data
a = train.copy()
a['date'] = pd.to_datetime(a['date'])
a['year'] = a['date'].dt.year
a['month'] = a['date'].dt.month
a['week'] = a['date'].dt.isocalendar().week
a['quarter'] = a['date'].dt.quarter
a['day_of_week'] = a['date'].dt.day_name()

df_m_sa = a.groupby('month').agg({"sales" : "mean"}).reset_index()
df_m_sa['sales'] = round(df_m_sa['sales'].copy(), 2)
df_m_sa['month_text'] = df_m_sa['month'].apply(lambda x: calendar.month_abbr[x])
df_m_sa['text'] = df_m_sa['month_text'] + ' - ' + df_m_sa['sales'].astype(str) 

df_w_sa = a.groupby('week').agg({"sales" : "mean"}).reset_index()
df_q_sa = a.groupby('quarter').agg({"sales" : "mean"}).reset_index()

# chart color
df_m_sa['color'] = '#496595'
df_w_sa['color'] = '#c6ccd8'

# chart
fig = make_subplots(rows=2, cols=2, vertical_spacing=0.08,
                    row_heights=[0.7, 0.3], 
                    specs=[[{"type": "bar"}, {"type": "pie"}],
                           [{"colspan": 2}, None]],
                    column_widths=[0.7, 0.3],
                    subplot_titles=("Month wise Avg Sales Analysis", "Quarter wise Avg Sales Analysis", 
                                    "Week wise Avg Sales Analysis"))

fig.add_trace(go.Bar(x=df_m_sa['sales'], y=df_m_sa['month'], marker=dict(color= df_m_sa['color']),
                     text=df_m_sa['text'],textposition='auto',
                     name='Month', orientation='h'), 
                     row=1, col=1)
fig.add_trace(go.Pie(values=df_q_sa['sales'], labels=df_q_sa['quarter'], name='Quarter',
                     marker=dict(colors=['#334668','#496595','#6D83AA','#91A2BF','#C8D0DF']), hole=0.7,
                     hoverinfo='label+percent+value', textinfo='label+percent'), 
                     row=1, col=2)
fig.add_trace(go.Scatter(x=df_w_sa['week'], y=df_w_sa['sales'], mode='lines+markers', fill='tozeroy', fillcolor='#c6ccd8',
                     marker=dict(color= '#496595'), name='Week'), 
                     row=2, col=1)

# styling
fig.update_yaxes(visible=False, row=1, col=1)
fig.update_xaxes(visible=False, row=1, col=1)
fig.update_xaxes(tickmode = 'array', tickvals=df_w_sa.week, ticktext=[i for i in range(1,53)], 
                 row=2, col=1)
fig.update_yaxes(visible=False, row=2, col=1)
fig.update_layout(height=750, bargap=0.15,
                  margin=dict(b=0,r=20,l=20), 
                  title_text="Average Sales Analysis",
                  template="plotly_white",
                  title_font=dict(size=25, color='#8a8d93', family="Lato, sans-serif"),
                  font=dict(color='#8a8d93'),
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"),
                  showlegend=False)
fig.show()