In [1]:
import numpy as np
import pandas as pd
import zipfile as zfile
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
from tradeanalysis import TradeReport as trr
from tradefile import TradeFile as trf

In [None]:
data_2020_2021 = trf("../data/import_PC_2020-2020.zip", merge_file="../data/import_PC_2021-2021.zip")
print(data_2020_2021.data.describe())
print(data_2020_2021.data.sample(10))

In [None]:
data_2020_2021.save_to_file(path="../data/")

In [2]:
report = trr(source_file="../data/HS_2016-01_2021-04.csv")

TradeReport: the uploaded data start at date 2016-01-01 00:00:00 and end at date 2021-04-01 00:00:00.


In [3]:
report.trade_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,value
kind,date,country,code,unit,Unnamed: 5_level_1
HS,2016-01-01,103,11,JPY,1424000
HS,2016-01-01,103,11,KG,5003
HS,2016-01-01,103,19,JPY,2969000
HS,2016-01-01,103,19,KG,3744
HS,2016-01-01,103,99,JPY,12351184000


In [None]:
yoy_report = report.yoy_country_report(kind='PC', country=220)

In [None]:
yoy_report.head(10)

In [4]:
start = report.last_date - pd.DateOffset(months=23)

In [9]:
start

Timestamp('2019-03-01 00:00:00')

In [5]:
end = report.last_date

In [11]:
end

Timestamp('2021-02-01 00:00:00')

In [None]:
report.trade_df.head(10)

In [None]:
report.trade_df.loc['PC', :, 220, '0070101', 'JPY']

# Comparison report

In [None]:
drilled = report.trade_df.loc['PC', start:end, [210,220], '1010103', 'JPY']

In [None]:
date_ix = pd.date_range(start, end, freq=pd.offsets.MonthBegin(), closed=None)
date_ix

In [None]:
drilled.index.get_level_values(2).unique()

In [None]:
new_mux = pd.MultiIndex.from_product([drilled.index.get_level_values(2).unique(), date_ix], names=['country', 'date'])
trends = drilled.groupby(['country', 'date']).mean().reindex(new_mux, fill_value=0)
trends.reset_index().pivot(index='date', columns='country', values='value').plot(figsize=(10,7))
plt.show()

# Year-on-Year report

In [6]:
code_mask = report.trade_df.index.get_level_values('code').str.len() > 3
drilled_yoy = report.trade_df.loc['HS', start:end, 220, code_mask, :].copy()
print(drilled_yoy.head(5))
year_sums = drilled_yoy.droplevel(
    ['kind', 'country']).groupby(['code'] +
                                 [pd.Grouper(freq='12M', level='date', closed='left')] +
                                 ['unit']).sum()
year_compared = year_sums.reset_index().pivot(index='code', columns=['date', 'unit'], values='value')
year_compared = year_compared.sort_values(by=(year_compared.columns.get_level_values(0).max(), 'JPY'), ascending=False)
year_compared.head(20)

                                            value
kind date       country code      unit           
HS   2019-05-01 220     000000011 JPY     8393000
                                  KG        68342
                        000000019 JPY    13231000
                                  KG        10664
                        000000099 JPY   231215000


date,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,...,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30
unit,L,CM,CT,DZ,GR,JPY,KG,KL,MT,NO,...,GR,JPY,KG,KL,MT,NO,PR,SM,ST,TH
code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
240399200,0,0,0,0,0,191664430000,18281319,0,0,0,...,0,202789158000,17534036,0,0,0,0,0,0,0
870324000,0,0,0,0,0,61819722000,0,0,0,2124,...,0,49117749000,0,0,0,1598,0,0,0,0
300490029,0,0,0,0,0,45288312000,478483,0,0,0,...,0,35624048000,388014,0,0,0,0,0,0,0
420291000,0,0,0,71458,0,30698788000,715027,0,0,0,...,0,25554190000,568759,0,0,0,0,0,0,0
711319029,0,0,0,0,4050708,29324476000,0,0,0,0,...,3225012,23849261000,0,0,0,0,0,0,0,0
420231200,0,0,0,148784,0,26457919000,379089,0,0,0,...,0,23231725000,338910,0,0,0,0,0,0,0
300490024,0,0,0,0,0,28371583000,217881,0,0,0,...,0,18742652000,238257,0,0,0,0,0,0,0
420292000,0,0,0,61616,0,18970603000,472989,0,0,0,...,0,18328806000,439258,0,0,0,0,0,0,0
293359400,0,0,0,0,0,16263540000,63143,0,0,0,...,0,16633239000,83562,0,0,0,0,0,0,0
841191000,0,0,0,0,0,20707875000,164105,0,0,0,...,0,16137327000,117601,0,0,0,0,0,0,0


In [8]:
codes = pd.read_csv("../data/HS_codes.csv", sep=',', dtype={'HS': 'str'})

In [9]:
codes_ix = codes.set_index('HS')

codes_ix.columns = [[pd.to_datetime(0)], ['Description']]
codes_ix.head()

Unnamed: 0_level_0,1970-01-01
Unnamed: 0_level_1,Description
HS,Unnamed: 1_level_2
11,Aircrafts'Stores(Ex.Fuels Etc)F/Food Etc
19,"Aircrafts'Stores(Ex.Fuels Etc.), N.E.S."
10121100,"Live Horses, Pure-Bred (Certified)"
10121210,"Live Horses, Pure-Bred, L-Breed(Certified)"
10121290,"Live Horses, Pure-Bred, N.E.S."


In [10]:
country_report_1 = codes_ix.merge(year_compared, left_index=True, right_index=True)

country_report_1.head()

Unnamed: 0_level_0,1970-01-01,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,...,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30
Unnamed: 0_level_1,Description,L,CM,CT,DZ,GR,JPY,KG,KL,MT,...,GR,JPY,KG,KL,MT,NO,PR,SM,ST,TH
11,Aircrafts'Stores(Ex.Fuels Etc)F/Food Etc,0,0,0,0,0,49549000,388945,0,0,...,0,2263000,35489,0,0,0,0,0,0,0
19,"Aircrafts'Stores(Ex.Fuels Etc.), N.E.S.",0,0,0,0,0,467811000,75164,0,0,...,0,1434000,1803,0,0,0,0,0,0,0
10129290,"Live Horses, N.E.S.",0,0,0,0,0,17665000,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10619090,Live Mammals N.E.S.,0,0,0,0,0,0,0,0,0,...,0,215000,6,0,0,2,0,0,0,0
10620010,Live Turtles,0,0,0,0,0,2062000,8,0,0,...,0,1191000,10,0,0,42,0,0,0,0


In [19]:
country_report_1.sort_values(by=(year_compared.columns.get_level_values(0).max(), 'JPY'),
                             ascending=False).to_csv('../data/country_report_italy.csv')

In [16]:
country_report_1.head(5)

Unnamed: 0_level_0,1970-01-01,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,2020-04-30,...,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30,2021-04-30
Unnamed: 0_level_1,Description,L,CM,CT,DZ,GR,JPY,KG,KL,MT,...,GR,JPY,KG,KL,MT,NO,PR,SM,ST,TH
11,Aircrafts'Stores(Ex.Fuels Etc)F/Food Etc,0,0,0,0,0,49549000,388945,0,0,...,0,2263000,35489,0,0,0,0,0,0,0
19,"Aircrafts'Stores(Ex.Fuels Etc.), N.E.S.",0,0,0,0,0,467811000,75164,0,0,...,0,1434000,1803,0,0,0,0,0,0,0
10129290,"Live Horses, N.E.S.",0,0,0,0,0,17665000,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10619090,Live Mammals N.E.S.,0,0,0,0,0,0,0,0,0,...,0,215000,6,0,0,2,0,0,0,0
10620010,Live Turtles,0,0,0,0,0,2062000,8,0,0,...,0,1191000,10,0,0,42,0,0,0,0


In [90]:
fin_report = country_report_1.swaplevel(0, 1, axis=1).sort_index(axis=1)

In [70]:
fin_report.head()

Unnamed: 0_level_0,L,L,CM,CM,CT,CT,DZ,DZ,Description,GR,GR,GR,JPY,KG,KL,MT,NO,PR,SM,ST,TH
Unnamed: 0_level_1,2020-04-30 00:00:00,2021-04-30 00:00:00,2020-04-30 00:00:00,2021-04-30 00:00:00,2020-04-30 00:00:00,2021-04-30 00:00:00,2020-04-30 00:00:00,2021-04-30 00:00:00,1970-01-01 00:00:00,2020-04-30 00:00:00,...,variation,variation,variation,variation,variation,variation,variation,variation,variation,variation
11,0,0,0,0,0,0,0,0,Aircrafts'Stores(Ex.Fuels Etc)F/Food Etc,0,...,0,0,0,0,0,0,0,0,0,0
19,0,0,0,0,0,0,0,0,"Aircrafts'Stores(Ex.Fuels Etc.), N.E.S.",0,...,0,0,0,0,0,0,0,0,0,0
10129290,0,0,0,0,0,0,0,0,"Live Horses, N.E.S.",0,...,0,0,0,0,0,0,0,0,0,0
10619090,0,0,0,0,0,0,0,0,Live Mammals N.E.S.,0,...,0,0,0,0,0,0,0,0,0,0
10620010,0,0,0,0,0,0,0,0,Live Turtles,0,...,0,0,0,0,0,0,0,0,0,0


In [91]:
for col in fin_report.columns.levels[0]:
    if col != 'Description':
        fin_report[col, 'variation'] = ((1 + fin_report[col, year_compared.columns.get_level_values(0).max()]) / \
                                        (1 + fin_report[col, year_compared.columns.get_level_values(0).min()])) - 1

                            L                                      CM  \
          2020-04-30 00:00:00 2021-04-30 00:00:00 2020-04-30 00:00:00   
000000011                   0                   0                   0   
000000019                   0                   0                   0   
010129290                   0                   0                   0   
010619090                   0                   0                   0   
010620010                   0                   0                   0   

                                               CT                      \
          2021-04-30 00:00:00 2020-04-30 00:00:00 2021-04-30 00:00:00   
000000011                   0                   0                   0   
000000019                   0                   0                   0   
010129290                   0                   0                   0   
010619090                   0                   0                   0   
010620010                   0                   0 

In [92]:
fin_report = fin_report.sort_index(axis=1)

In [101]:
fin_report[['Description', 'JPY', 'NO']].sort_values(by=('JPY', year_compared.columns.get_level_values(0).max()),
                             ascending=False).drop_duplicates().head(30)

Unnamed: 0_level_0,Description,JPY,JPY,JPY,NO,NO,NO
Unnamed: 0_level_1,1970-01-01 00:00:00,2020-04-30 00:00:00,2021-04-30 00:00:00,variation,2020-04-30 00:00:00,2021-04-30 00:00:00,variation
240399200,"Manufactured Tobacco & Substitutes, N.E.S",191664430000,202789158000,0.058043,0,0,0.0
870324000,Motor Cars & Other Motor Vehicles,61819722000,49117749000,-0.205468,2124,1598,-0.247529
300490029,"Medicaments, N.E.S.",45288312000,35624048000,-0.213394,0,0,0.0
420291000,"Article Of Cases, Outer Surface Leather",30698788000,25554190000,-0.167583,0,0,0.0
711319029,"Art.Of Jewellery & P/T Of P-Metal, N.E.S.",29324476000,23849261000,-0.186711,0,0,0.0
420231200,"Cigarette-Case & Similar, Outer Leather",26457919000,23231725000,-0.121937,0,0,0.0
300490024,"Medicaments, N.E.S. (For Retail Sale)",28371583000,18742652000,-0.339386,0,0,0.0
420292000,Article Of Cases Pla.-Sheet & Textile,18970603000,18328806000,-0.033831,0,0,0.0
293359400,Compound Of Cont.Pyrimidine Ring.Etc(Inn,16263540000,16633239000,0.022732,0,0,0.0
293359400,Compounds Of Cont.Pyrimidine Ring.Etc,16263540000,16633239000,0.022732,0,0,0.0
