In [1]:
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine

import plotly.graph_objs as go
from plotly.subplots import make_subplots
import chart_studio.plotly as py
import cufflinks
import plotly.express as px
import plotly.figure_factory as ff
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot
cufflinks.go_offline()
cufflinks.set_config_file(world_readable=True, theme='pearl')

from matplotlib.pyplot import figure

import matplotlib.pyplot as plt

In [2]:
# username, password and DB name are changed. If you want to run the whole notebook, please set them according to your settings.
engine = create_engine('postgresql+psycopg2://user:password@localhost:5432/DB')

#### 1. Trends of net profit over fiscal quarters, year by year.

In [3]:
profit_quarters = pd.read_sql_query("""WITH quarters AS (
SELECT 	t.fiscal_quarter_desc 				AS fiscal_quarter,
		t.fiscal_quarter_number, 
		sum(p.amount_sold - p.total_cost) 	AS total_net_profit
FROM sh.profits p 
INNER JOIN sh.times t 
ON p.time_id = t.time_id 
GROUP BY t.fiscal_quarter_desc, t.fiscal_quarter_number
)
SELECT 	q.fiscal_quarter_number,
		q.fiscal_quarter, 
		q.total_net_profit, 
		round((total_net_profit - lag(total_net_profit) OVER (ORDER BY fiscal_quarter)) * 100 / 
				lag(total_net_profit) OVER (ORDER BY fiscal_quarter), 2) AS pct_change
FROM quarters q""", engine)

In [4]:
profit_quarters

Unnamed: 0,fiscal_quarter_number,fiscal_quarter,total_net_profit,pct_change
0,1,1998-01,1618498.71,
1,2,1998-02,1398357.06,-13.6
2,3,1998-03,1485636.12,6.24
3,4,1998-04,1271674.32,-14.4
4,1,1999-01,1416960.4,11.42
5,2,1999-02,993247.56,-29.9
6,3,1999-03,1099704.68,10.72
7,4,1999-04,1021576.7,-7.1
8,1,2000-01,1106566.86,8.32
9,2,2000-02,949470.03,-14.2


In [5]:
data = [
    go.Scatter(
        x=profit_quarters['fiscal_quarter'],
        y=profit_quarters['total_net_profit'], 
        yaxis='y1',
        text=profit_quarters['total_net_profit'],
        texttemplate='%{text:.3s}',
        textposition='top right', 
        textfont=dict(family='sans serif', size=10, color='blue'), 
        hovertext=profit_quarters[['total_net_profit', 'pct_change']],
        hovertemplate="$ total: %{hovertext[0]}" + '<br>' + "% change: %{hovertext[1]:.3s}",
        name='profit',
        mode='markers+lines+text',
        marker=dict(
            sizemin=10,
            color='green',
            )),
    go.Bar(x=profit_quarters['fiscal_quarter'], 
           y=profit_quarters['pct_change'], 
           yaxis='y2',
           text=profit_quarters['pct_change'], 
           texttemplate='%{text:.3s}', 
           textposition='outside', 
           textfont=dict(family='sans serif', size=9, color='black'),
           name='% change',
           marker=dict(
                colorscale='ylgnbu',
                color=profit_quarters['fiscal_quarter_number'],
                ))
]

figure = go.Figure(
    data=data,
    layout=go.Layout(
        xaxis=dict(title='Fiscal years and quarters', type='category', tickangle=80),
        yaxis=dict(title='Total net profit', overlaying='y2'),
        yaxis2=dict(title='Profit % change', side='right', range=[-100, 100]),
        title='Total Net Profit Trends Over Fiscal Quarters'
    ))
figure

#### 2. The sales (net profit) of different goods categories over years.

In [6]:
year_good_distr = pd.read_sql_query("""SELECT 	t.fiscal_year, 
		pr.prod_category, 
		sum(p.amount_sold - p.total_cost) 	AS total_net_profit, 
		sum(p.amount_sold - p.total_cost) * 100 / sum(sum(p.amount_sold - p.total_cost)) over(PARTITION BY fiscal_year) AS good_cat_share
FROM sh.profits p 
INNER JOIN sh.times t 
ON p.time_id = t.time_id
INNER JOIN sh.products pr 
ON pr.prod_id = p.prod_id 
GROUP BY t.fiscal_year, pr.prod_category;""", engine)

In [7]:
year_good_distr

Unnamed: 0,fiscal_year,prod_category,total_net_profit,good_cat_share
0,1998,Electronics,295468.54,5.117077
1,1998,Hardware,1715791.59,29.714967
2,1998,Peripherals and Accessories,2031745.66,35.186823
3,1998,Photo,1185959.28,20.539057
4,1998,Software/Other,545201.14,9.442076
5,1999,Electronics,440690.34,9.725066
6,1999,Hardware,827876.52,18.269413
7,1999,Peripherals and Accessories,1606041.95,35.441812
8,1999,Photo,979308.33,21.61118
9,1999,Software/Other,677572.2,14.952528


In [8]:
fig = px.bar(year_good_distr, x='fiscal_year', y='total_net_profit', color='prod_category', barmode='relative', 
             color_discrete_sequence=['lightsalmon', 'olive', 'seagreen', 'lemonchiffon', 'orange'],
             text='prod_category', hover_name="prod_category", 
             hover_data={'fiscal_year':False, 
                         'prod_category':False,
                         'total_net_profit':':.3s', 
                         'Share':round(year_good_distr['good_cat_share'], 2).astype(str) + '%'})

fig.update_layout(title='The Sales of Goods Categories by Year', 
                  xaxis_title='Fiscal year', yaxis_title='Total net profit')

In [9]:
# and the shares over all the operating period

fig = go.Figure(data=[go.Pie(labels=year_good_distr['prod_category'], 
                             values=year_good_distr['total_net_profit'], 
                             marker_colors=['lightsalmon', 'olive', 'seagreen', 'lemonchiffon', 'orange'], 
                             textinfo='label+percent',
                             textfont={'size':11},
                             hole=0.4, 
                             pull=[0.2,0,0.2,0,0])])
fig.update_layout(
    title_text="The Shares of Different Goods Groups Sales",
    annotations=[dict(text='1998-2002', x=0.5, y=0.5, font_size=15, font_color='green', showarrow=False)])

In [11]:
# showing only the most and the least profitable categories over years

year_good = pd.read_sql_query("""WITH years_goods AS (
SELECT 	t.fiscal_year, 
		pr.prod_category, 
		sum(p.amount_sold - p.total_cost) 	AS total_net_profit,
		rank() OVER (PARTITION BY t.fiscal_year ORDER BY sum(p.amount_sold - p.total_cost) DESC) AS desc_rank,
		rank() OVER (PARTITION BY t.fiscal_year ORDER BY sum(p.amount_sold - p.total_cost)) AS asc_rank
FROM sh.profits p 
INNER JOIN sh.times t 
ON p.time_id = t.time_id
INNER JOIN sh.products pr 
ON pr.prod_id = p.prod_id 
GROUP BY t.fiscal_year, pr.prod_category 
)
SELECT 	yg.fiscal_year,
		CASE yg.desc_rank WHEN 1 THEN 'most' ELSE 'least' END AS "most / least_profitable", 
		yg.prod_category, 
		yg.total_net_profit
FROM years_goods yg
WHERE yg.desc_rank = 1 OR yg.asc_rank = 1""", engine)

In [12]:
year_good

Unnamed: 0,fiscal_year,most / least_profitable,prod_category,total_net_profit
0,1998,least,Electronics,295468.54
1,1998,most,Peripherals and Accessories,2031745.66
2,1999,least,Electronics,440690.34
3,1999,most,Peripherals and Accessories,1606041.95
4,2000,least,Software/Other,495324.33
5,2000,most,Peripherals and Accessories,1027796.34
6,2001,least,Electronics,838994.19
7,2001,most,Peripherals and Accessories,1751079.16
8,2002,least,Software/Other,3240.03
9,2002,most,Peripherals and Accessories,5154.03


In [13]:
fig = px.bar(year_good, x='fiscal_year', y='total_net_profit', color='most / least_profitable', barmode='group', 
             color_discrete_sequence=['red', 'green'], text='prod_category')

fig.update_layout(title='The Sales of Most and the Least Profitable Goods Categories by Year', 
                  xaxis_title='Fiscal year', yaxis_title='Total net profit')

#### 3. Total sales for every country and top clients group by income level for each country.

In [14]:
country_income = pd.read_sql_query("""WITH agg AS (
SELECT 	cntr.country_name, 
		c.cust_income_level, 
		SUM(s.amount_sold) 																	AS group_total_sold,
		SUM(SUM(s.amount_sold)) OVER(PARTITION BY country_name) 							AS country_total_sold,
		RANK () OVER (PARTITION BY cntr.country_iso_code ORDER BY SUM(s.amount_sold) DESC)	AS group_rank
FROM sh.sales s 
INNER JOIN sh.customers c 
ON c.cust_id = s.cust_id 
INNER JOIN sh.countries cntr
ON cntr.country_id = c.country_id 
GROUP BY cntr.country_iso_code, cntr.country_name, c.cust_income_level
)
SELECT 	country_name, 
		cust_income_level, 
		group_total_sold, 
		country_total_sold
FROM agg 
WHERE group_rank = 1
ORDER BY country_total_sold DESC""", engine)

In [15]:
country_income

Unnamed: 0,country_name,cust_income_level,group_total_sold,country_total_sold
0,United States of America,"F: 110,000 - 129,999",9143098.25,52910773.15
1,Germany,"E: 90,000 - 109,999",1600505.02,9210129.22
2,Japan,"F: 110,000 - 129,999",1722527.46,7207880.09
3,United Kingdom,"F: 110,000 - 129,999",1492949.58,6393762.94
4,Italy,"F: 110,000 - 129,999",875522.95,4854505.28
5,Australia,"F: 110,000 - 129,999",1220749.0,3962293.15
6,France,"F: 110,000 - 129,999",798595.52,3776270.13
7,Singapore,"F: 110,000 - 129,999",853913.61,3063093.7
8,Canada,"F: 110,000 - 129,999",581639.9,2686510.09
9,Spain,"F: 110,000 - 129,999",392809.93,2090863.44


In [17]:
fig = go.Figure(data=go.Choropleth(
                            locations = country_income['country_name'],
                            locationmode = 'country names',
                            z = country_income['country_total_sold'],
                            text = country_income['country_name'] + '<br>' + \
                                    'Key customer income group: ' + country_income['cust_income_level'] + '<br>' + \
                                    'Total sold in group: ' + country_income['group_total_sold'].astype('str'),
                            colorscale = 'spectral',
                            autocolorscale=False,
                            reversescale=False,
                            marker_line_color='white',
                            marker_line_width=0.7,
                            colorbar_tickprefix = '$',
                            colorbar_title = 'Total sold <br>amount, $'),
               layout=go.Layout(title='Total Sells Over the World with Key Customers Income Groups'))

fig.show()