In [None]:
import pandas as pd
import sys
# print(sys.path)
# Adding a path so that it finds the package for numpy
sys.path.append('/usr/local/lib/python3.7/site-packages')
import numpy as np
import matplotlib.pyplot as plt
from random import randint
from collections import Counter
import seaborn as sns

In [None]:
''' 
Read the 4 files and assign names to the datasets
Retail Sales
Horeca Sales
Rent House Prices
Sales House Prices
'''

In [None]:
'SARS exploded and culminated in H1 2003. '

In [None]:
retail = pd.read_excel('../data/Retail Clean.xlsx')
retail.head()

In [None]:
'Check that all year rows are clean and included'
retail['Year'].unique()

In [None]:
'Create a date column by concatenating Year and Month'
retail['Date'] = pd.to_datetime(retail['Month'].astype(str) + '/' + retail['Year'].astype(str))
retail.head()

In [None]:
' Eliminate 2001 and 2020 as they have partial data only'

retail = retail[retail['Year'].isin(['2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019'])]

In [None]:
'Start by looking at the trend by year so to eliminate the impact of seasonality in the month by month'

plt.rcParams['figure.figsize'] = [18.0, 6.0]
sum_year = retail[['All retail outlets','Year']].groupby(['Year']).agg("sum")
sum_year.plot.bar()
plt.title("Cumulative Year Retail Sales in Hong Kong ('000 HK$) - 2002-2019")
plt.show()

In [None]:
'Interestingly, sales declined in 2003 vs 2002 and bounced back already in 2004.'

In [None]:
'Focus on the 2002 to 2005 period for a closer look'

retail_01_to_05 = retail[retail['Year'].isin(['2002','2003','2004','2005'])]

In [None]:
'Look at the trend by month in the 2002 to 2005 range'

chart = sns.lineplot(retail_01_to_05.Date, retail_01_to_05['All retail outlets'])
chart.set(xticks=retail_01_to_05.Date.values)

plt.xticks(
    rotation=45, 
    horizontalalignment='right',
    fontweight='light',
    fontsize='x-large'  
)

plt.title("Sales of Retail Outlets in Hong Kong ('000 HK$) - 2002-2005")
plt.show()


In [None]:
retail_2002 = retail[retail['Year'].isin(['2002'])]
retail_2003 = retail[retail['Year'].isin(['2003'])]
retail_2004 = retail[retail['Year'].isin(['2004'])]
retail_2005 = retail[retail['Year'].isin(['2005'])]

In [None]:
'Create two arrays with the index value of the sales of 2003 and 2004 vs. 2002'

index_03vs02 = [int(tre/due*100) for tre,due in zip(retail_2003['All retail outlets'],retail_2002['All retail outlets'])]
index_04vs02 = [int(quattro/due*100) for quattro,due in zip(retail_2004['All retail outlets'],retail_2002['All retail outlets'])]
index_05vs02 = [int(cinque/due*100) for cinque,due in zip(retail_2005['All retail outlets'],retail_2002['All retail outlets'])]



In [None]:
' Create a dataframe with the indexes'

Month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
Flat = [100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100]
data = {'Month': Month, 'Flat': Flat, '03 vs 02': index_03vs02, '04 vs 02': index_04vs02, '05 vs 02': index_05vs02}
df = pd.DataFrame (data)
df

In [None]:
'Using indexes vs 2002, show how steep sales fell in 2003 Q1 and the recover starting in Q2 and completed in Q3'

fig, ax = plt.subplots()

ax.step(df['Month'],df["03 vs 02"], label='03 vs 02', color="red")
ax.step(df['Month'],df["04 vs 02"], label='04 vs 02', color="green")
ax.step(df['Month'],df["Flat"], label='Flat', color="black", linewidth=0.25)


ax.legend(loc=4) # upper left corner
ax.set_xlabel('Month', fontsize=18)
ax.set_ylabel('Index Month over Month 2002', fontsize=14)
ax.set_title('Retail Sales Index Month over Month 2002');



In [None]:
'Next: Matrice con indici 2003 vs 2002 per tutte le categorie per vedere come si sono mossi nel 2003 e chi si e ripreso prima'

In [None]:
'Select fresh food categories'

retail_fresh = retail[['Year','Date','All retail outlets','Fish, livestock and poultry, fresh or frozen', 
                      'Fruits and vegetables, fresh',
                      'Bread, pastry, confectionery and biscuits',
                      'Other food, not elsewhere classified']]

retail_fresh

In [None]:
sns.scatterplot(data=retail_fresh, x='All retail outlets',y='Fish, livestock and poultry, fresh or frozen', hue = "Year") 
plt.show()

In [None]:
'Eliminate the column with the suma of all sales to look at the split by category'

retail_not_all = retail.drop(columns=['All retail outlets'])

In [None]:
'Aggregate fresh food'

retail['Fresh Food'] = retail['Fish, livestock and poultry, fresh or frozen']+
                        retail['Fruits and vegetables, fresh']+
                        retail['Bread, pastry, confectionery and biscuits']+
                        retail['Other food, not elsewhere classified']


In [None]:
list_categories = [cat for cat in retail_not_all.columns]
list_categories.remove("Year")
list_categories.remove("Month")
list_categories.remove("Date")
list_categories

In [None]:
' Create a pivot by year'

sum_year_not_all = retail_not_all[['Fish, livestock and poultry, fresh or frozen',
                                     'Fruits and vegetables, fresh',
                                     'Bread, pastry, confectionery and biscuits',
                                     'Other food, not elsewhere classified',
                                     'Alcoholic drinks and tobacco',
                                     'Supermarkets',
                                     'Fuels',
                                     'Wearing apparel',
                                     'Footwear, allied products and other clothing accessories',
                                     'Motor vehicles and parts',
                                     'Furniture and fixtures',
                                     'Electrical goods and other consumer durable goods, not elsewhere classified',
                                     'Department stores',
                                     'Jewellery, watches and clocks, and valuable gifts',
                                     'Books, newspapers, stationery and gifts',
                                     'Chinese drugs and herbs',
                                     'Optical shops',
                                     'Medicines and cosmetics',
                                     'Other consumer goods, not elsewhere classified','Year']].groupby(['Year']).agg("sum")


sum_year_not_all

In [None]:
sum_2002 = sum_year_not_all.iloc[0]
sum_2002

In [None]:
'Show the split by category for Year 2002'

fig1, ax1 = plt.subplots(figsize=(12,10))
ax1.pie(sum_2002, autopct='%1.1f%%',
        shadow=True, startangle=90, pctdistance=0.85,)

ax1.legend(list_categories, loc='lower left', ncol=2, mode="expand",borderaxespad=0.)

# Place a legend above this subplot, expanding itself to
# fully use the given bounding box.
# plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left',
#            ncol=2, mode="expand", borderaxespad=0.)


#draw circle
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)


plt.show()