### UK TRQ Quality Assurance Notebook. 

#### The following notebook is a qualtiy assurance script for the UK TRQ analysis. 

The note book is split into the following parts:

**TRQ data:**

1. Data cleaning
2. Aggregate data and calculate utilisation rates
3. Compare outputs

**TRQ sector data:**


**Filled quotas:**

****

### **Part 1: data cleaning:**

Data uploads and clean column strings:

In [1]:
# upload data:

import pandas as pd
import numpy as np

!pip install openpyxl

# chnage from scientific notation 
pd.set_option('display.float_format', lambda x: '%.5f' % x)

uk_trqs = pd.read_excel('../rsp_uk_trq_qa/data/uk_trq_data.xlsx',dtype={'Quota Number': str}) 
uk_trqs2 = pd.read_excel('../rsp_uk_trq_qa/data/uk_license_trq_input.xlsx',dtype={'Quota number': str}) 
eu_trqs = pd.read_excel('../rsp_uk_trq_qa/data/eu_trq_data.xlsx',dtype={'Order_Number': str}) 


print(uk_trqs.shape, uk_trqs2.shape, eu_trqs.shape)

Looking in indexes: https://s3-eu-west-2.amazonaws.com/mirrors.notebook.uktrade.io/pypi/
Collecting openpyxl
  Downloading https://s3-eu-west-2.amazonaws.com/mirrors.notebook.uktrade.io/pypi/openpyxl/openpyxl-3.0.9-py2.py3-none-any.whl (242 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.2/242.2 KB[0m [31m183.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting et-xmlfile
  Downloading https://s3-eu-west-2.amazonaws.com/mirrors.notebook.uktrade.io/pypi/et-xmlfile/et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.9
You should consider upgrading via the '/opt/conda/bin/python3 -m pip install --upgrade pip' command.[0m[33m
[0m(1067, 14) (95, 17) (12445, 26)


In [2]:
# upload input files

uk_trq_country_input = pd.read_excel('../rsp_uk_trq_qa/data/uk_trq_country_input.xlsx')
hs_dsc = pd.read_excel('../rsp_uk_trq_qa/data/hs_descriptions.xlsx')
country_names = pd.read_excel('../rsp_uk_trq_qa/data/country_names.xlsx')
eu_trq_data_input = pd.read_excel('../rsp_uk_trq_qa/data/eu_trq_data_input.xlsx')
eu_trq_sector_manual_input = pd.read_excel('../rsp_uk_trq_qa/data/eu_trq_sector_manual_input.xlsx')
trq_quota_unit = pd.read_excel('../rsp_uk_trq_qa/data/trq_quota_unit.xlsx')

print(uk_trq_country_input.shape, hs_dsc.shape, country_names.shape, eu_trq_data_input.shape, eu_trq_sector_manual_input.shape,trq_quota_unit.shape)

(70, 4) (98, 3) (225, 4) (114, 4) (249, 11) (6, 3)


In [3]:
def  cleanCols(df): 
    df.columns = df.columns.str.lower().str.replace(" ","_")
    return(df)

In [4]:
# clean names of dataframes:

uk_trqs = cleanCols(uk_trqs)
uk_trqs2 = cleanCols(uk_trqs2)
eu_trqs = cleanCols(eu_trqs)
uk_trq_country_input = cleanCols(uk_trq_country_input)
hs_dsc = cleanCols(uk_trq_country_input)
country_names = cleanCols(country_names)
eu_trq_data_input = cleanCols(eu_trq_data_input)
eu_trq_sector_manual_input = cleanCols(eu_trq_sector_manual_input)
trq_quota_unit = cleanCols(trq_quota_unit)

In [5]:
# clean and combine UK TRQ data:
print(uk_trqs.dtypes)

quota_number                  object
quota_product_description     object
year                           int64
quota_origin                  object
validity_start_date           object
validity_end_date             object
quota_volume                 float64
remaining_balance            float64
quota_fill_rate              float64
quota_unit                    object
commodity_codes               object
quota_status                  object
last_allocation_date          object
trq_type                      object
dtype: object


In [6]:
# select data for non-license quotas:

uk = uk_trqs.loc[:,["quota_number",
                   "quota_product_description",
                 "year",
                 "quota_origin",
                 "quota_volume",
                 "remaining_balance",
                 "quota_unit",
                 "commodity_codes"]
                ]

# rename

uk.rename(columns = {'quota_product_description':'quota_description', 'year':'quota_year'}, inplace = True)
uk.head()

# filter for quota year = 2021

uk = uk.loc[uk["quota_year"] == 2021]

# match country/region names for combining with license data and final aggregated outputs:

uk = pd.merge(uk, uk_trq_country_input, left_on = "quota_origin", right_on = "country_name", how = "left")

# filter away NA (i.e. non match)

uk = uk[uk['region'].notnull()]

uknan = uk[uk['region'].isnull()] # NaN df for reference. 

uk.head()

Unnamed: 0,quota_number,quota_description,quota_year,quota_origin,quota_volume,remaining_balance,quota_unit,commodity_codes,iso,country_name,grouping,region
67,50146,Live bovine animals,2021,Switzerland,247.0,247.0,Number of items (p/st),0102294100 ; 0102294900 ; 0102295100 ; 0102295...,CH,Switzerland,Switzerland,Wider Europe & Middle East
72,50155,"Meat and edible offal, of duch, fresh or forzen",2021,Israel,76000.0,76000.0,Kilogram (kg),0207423000 ; 0207428000 ; 0207441000 ; 0207442...,IL,Israel,Israel,Wider Europe & Middle East
78,50202,"Potatoes, fresh or chilled, other than seed",2021,Turkey,417000.0,322220.0,Kilogram (kg),0701900000,TR,Turkey,Turkey,Eastern Europe
79,50204,Fresh table grapes,2021,Turkey,58000.0,58000.0,Kilogram (kg),0806101090,TR,Turkey,Turkey,Eastern Europe
80,50204,Fresh table grapes,2021,Turkey,58000.0,0.0,Kilogram (kg),0806101090,TR,Turkey,Turkey,Eastern Europe


In [7]:
# convert quota unit measurments for consistency i.e. Tonnes to be in Kilograms, HectoLitres to be in Litres etc. 
# convert Tonnes to Kilograms. 
print(pd.unique(uk["quota_unit"]))

['Number of items (p/st)' 'Kilogram (kg)' 'Litre (l)' 'Hectolitre (hl)'
 'Tonne (1,000 kg)' 'Square metre (m2)' 'Number of pairs (pa)'
 'Litre pure (100%) alcohol (l alc. 100%)']


In [8]:
uk["tonne_flag"] = np.where(uk["quota_unit"].str.contains("Tonne"), True,False)
uk["quota_unit"] = np.where(uk["tonne_flag"] == True, "Kilogram (kg)", uk["quota_unit"])

In [9]:
uk["quota_volume"] = np.where(uk["tonne_flag"] == True,uk["quota_volume"]*1000,uk["quota_volume"])
uk["remaining_balance"] = np.where(uk["tonne_flag"] == True,uk["remaining_balance"]*1000,uk["remaining_balance"])

In [10]:
# convert HL to L:
uk["hl_flag"] = np.where(uk["quota_unit"].str.contains("Hecto"), True,False)
uk["quota_unit"] = np.where(uk["hl_flag"] == True, "Litre (l)", uk["quota_unit"])
uk["quota_volume"] = np.where(uk["hl_flag"] == True,uk["quota_volume"]*100,uk["quota_volume"])
uk["remaining_balance"] = np.where(uk["hl_flag"] == True,uk["remaining_balance"]*100,uk["remaining_balance"])


In [11]:
# match final quota unit df. This is to combine a common name across the EU and UK data to combine and aggregate. 

uk = pd.merge(uk, trq_quota_unit[["uk_quota_unit","quota_unit_final"]], left_on = "quota_unit", right_on = "uk_quota_unit", how = "left")
uk.head()

Unnamed: 0,quota_number,quota_description,quota_year,quota_origin,quota_volume,remaining_balance,quota_unit,commodity_codes,iso,country_name,grouping,region,tonne_flag,hl_flag,uk_quota_unit,quota_unit_final
0,50146,Live bovine animals,2021,Switzerland,247.0,247.0,Number of items (p/st),0102294100 ; 0102294900 ; 0102295100 ; 0102295...,CH,Switzerland,Switzerland,Wider Europe & Middle East,False,False,Number of items (p/st),Number of items
1,50155,"Meat and edible offal, of duch, fresh or forzen",2021,Israel,76000.0,76000.0,Kilogram (kg),0207423000 ; 0207428000 ; 0207441000 ; 0207442...,IL,Israel,Israel,Wider Europe & Middle East,False,False,Kilogram (kg),Kilograms
2,50202,"Potatoes, fresh or chilled, other than seed",2021,Turkey,417000.0,322220.0,Kilogram (kg),0701900000,TR,Turkey,Turkey,Eastern Europe,False,False,Kilogram (kg),Kilograms
3,50204,Fresh table grapes,2021,Turkey,58000.0,58000.0,Kilogram (kg),0806101090,TR,Turkey,Turkey,Eastern Europe,False,False,Kilogram (kg),Kilograms
4,50204,Fresh table grapes,2021,Turkey,58000.0,0.0,Kilogram (kg),0806101090,TR,Turkey,Turkey,Eastern Europe,False,False,Kilogram (kg),Kilograms


In [12]:
# remove columns:
uk = uk.drop(["tonne_flag","hl_flag","uk_quota_unit"], 1)
#uk.dtypes

In [13]:
# filter licnese quota data

uk2 = uk_trqs2.loc[uk_trqs2["year"] == 2021]
uk2.shape

(95, 17)

In [14]:
uk2 = uk2[uk2["rsp_region"].notnull()]
uk2.dtypes

quota_number                object
quota_description           object
year                         int64
quota_origin                object
rsp_region                  object
quota_period                object
quota_unit                  object
commodity_codes             object
trq_type                    object
2021_quantity_available      int64
2021_quantity_remaining    float64
2021_usage                 float64
2021_fill_rate             float64
unnamed:_13                float64
unnamed:_14                float64
unnamed:_15                float64
unnamed:_16                float64
dtype: object

In [15]:
# match country input file for consitent naming across dfs
uk2 = pd.merge(uk2, uk_trq_country_input, left_on = "quota_origin", right_on = "country_name", how = "left")
uk2.dtypes

quota_number                object
quota_description           object
year                         int64
quota_origin                object
rsp_region                  object
quota_period                object
quota_unit                  object
commodity_codes             object
trq_type                    object
2021_quantity_available      int64
2021_quantity_remaining    float64
2021_usage                 float64
2021_fill_rate             float64
unnamed:_13                float64
unnamed:_14                float64
unnamed:_15                float64
unnamed:_16                float64
iso                         object
country_name                object
grouping                    object
region                      object
dtype: object

In [16]:
# join quota unit input for consistent quota unit names across dfs
uk2 = pd.merge(uk2, trq_quota_unit[["uk_quota_unit","quota_unit_final"]], left_on = "quota_unit", right_on = "uk_quota_unit", how = "left")

In [17]:
uk2.dtypes

quota_number                object
quota_description           object
year                         int64
quota_origin                object
rsp_region                  object
quota_period                object
quota_unit                  object
commodity_codes             object
trq_type                    object
2021_quantity_available      int64
2021_quantity_remaining    float64
2021_usage                 float64
2021_fill_rate             float64
unnamed:_13                float64
unnamed:_14                float64
unnamed:_15                float64
unnamed:_16                float64
iso                         object
country_name                object
grouping                    object
region                      object
uk_quota_unit               object
quota_unit_final            object
dtype: object

In [18]:
uk2 = uk2.loc[:,["quota_number",
                "quota_description",
                 "year",
                 "quota_origin",
                 "2021_quantity_available",
                 "2021_quantity_remaining",
                 "quota_unit",
                 "commodity_codes",
                     "iso",
                     "country_name",
                     "grouping",
                     "region",
                     "quota_unit_final"]
                ]

uk2.rename(columns = {"2021_quantity_available":"quota_volume","2021_quantity_remaining":"remaining_balance", 'year':'quota_year'}, inplace = True)

In [19]:
# combine uk dfs together

uk3 = pd.concat([uk,uk2])
uk3.shape

(810, 13)

In [20]:
# eu data - match country input for consistent grouping/region names and filter

eu = eu_trqs.loc[eu_trqs["year"] == 2020]

eu = pd.merge(eu, eu_trq_data_input, on = "origin", how = "left")

eu = eu[eu["region"].notnull()] # filter Non region match

# rename order number:

eu.rename(columns = {'order_number':'quota_number', 'year':'quota_year'}, inplace = True)

In [21]:
# match quota unit for consistency across dfs
eu = pd.merge(eu, trq_quota_unit[["eu_quota_unit","quota_unit_final"]], left_on = "unit", right_on = "eu_quota_unit", how = "left")

In [22]:
eu.dtypes

quota_number                         object
origin                               object
description                          object
quota_year                            int64
application_period_starts    datetime64[ns]
application_period_ends      datetime64[ns]
volume                              float64
eu_usage                            float64
unit                                 object
last_import_date             datetime64[ns]
report_date                  datetime64[ns]
live                                float64
adj_ap_start                 datetime64[ns]
adj_ap_end                   datetime64[ns]
source                               object
cn8_codes                            object
cn10_codes                           object
inquota_rate                         object
regulations                          object
quota_type                           object
eu_usage_source                      object
origin_code_x                        object
country_exc                     

In [23]:
eu_trq_data_input

Unnamed: 0,origin,origin_code,grouping,region
0,ACP,"AO, AG, BB, BZ, BJ, BW, BF, BI, CM, CV, CF, TD...",ACP,
1,ACP-Cariforum,"AO, AG, BB, BZ, BJ, BW, BF, BI, CM, CV, CF, TD...",ACP-Cariforum,
2,Albania,AL,Albania,Eastern Europe
3,Algeria,DZ,Algeria,
4,Argentina,AR,Argentina,
...,...,...,...,...
109,WTO countries,E.O.,ERGA OMNES,
110,"XK, MK, RS, ME, XW, AL, HR, BA","XK, MK, RS, ME, XW, AL, HR, BA","XK, MK, RS, ME, XW, AL, HR, BA",
111,"XK, XV, RS, ME, BA","XK, XV, RS, ME, BA","XK, XV, RS, ME, BA",
112,"XN, 2300","AR, BD, BR, SV, GT, HN, IN, ID, LA, PK, LK, TH","XN, 2300",


### **2. Data aggregations and utilisation calculations**

Both UK and EU data need to be grouped by quota origin, grouping and region to produce 3 seperate outputs. 

Grouping is combined with quota unit as TRQs need to be outputted broken down by quota unit as they can't be mixed together (i.e. KG with Litre) as they highlight different measurments. 

When the grouped quota allocation is calculated the full allocaiton utilisation and rates need calculating. 

**UK data aggregation**

In [24]:
uk_agg_origin = uk3.groupby(["quota_origin","quota_unit_final"], as_index = False).agg({"quota_volume":"sum", 
                                                                                        "remaining_balance":"sum",
                                                                                       "quota_number":"count"})
uk_agg_origin["source"] = "UK" 

# calculate quota allocation usage and utilisation rate
uk_agg_origin["usage"] = uk_agg_origin["quota_volume"] - uk_agg_origin["remaining_balance"]
uk_agg_origin["fill_rate"] = uk_agg_origin["usage"] / uk_agg_origin["quota_volume"]

# remove remaining balance as not needed for outputs

uk_agg_origin = uk_agg_origin.drop("remaining_balance",1)

# relocate columns (one option)
#col = uk_agg_origin[["usage","fill_rate"]]
#uk_agg_origin.drop(labels=[['usage',"fill_rate"]], axis=1, inplace = True)
#uk_agg_origin.insert(3,"usage",col)

# small df so can more easily name column order:

uk_agg_origin = uk_agg_origin[["quota_origin","quota_unit_final","quota_volume","usage","fill_rate","quota_number","source"]]

uk_agg_origin

Unnamed: 0,quota_origin,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Albania,Kilograms,384000.00000,0.00000,0.00000,7,UK
1,Albania,Litres,163400.00000,0.00000,0.00000,3,UK
2,Canada,Kilograms,149769000.00000,5861431.21000,0.03914,44,UK
3,Canada,Number of items,3152000.00000,55707.00000,0.01767,20,UK
4,Canada,Square metre (m2),13222000.00000,0.00000,0.00000,8,UK
...,...,...,...,...,...,...,...
61,Tunisia,Kilograms,9615400.00000,45300.00000,0.00471,11,UK
62,Tunisia,Litres,639700.00000,0.00000,0.00000,2,UK
63,Turkey,Kilograms,446451000.00000,280871448.63300,0.62912,64,UK
64,Ukraine,Kilograms,417412000.00000,65218829.00000,0.15625,46,UK


In [25]:
uk_agg_grouping = uk3.groupby(["grouping","quota_unit_final"], as_index = False).agg({"quota_volume":"sum", 
                                                                                      "remaining_balance":"sum",
                                                                                     "quota_number":"count"})
uk_agg_grouping["source"] = "UK" 

# calculate quota allocation usage and utilisation rate
uk_agg_grouping["usage"] = uk_agg_grouping["quota_volume"] - uk_agg_grouping["remaining_balance"]
uk_agg_grouping["fill_rate"] = uk_agg_grouping["usage"] / uk_agg_grouping["quota_volume"]

# remove remaining balance as not needed for outputs

uk_agg_grouping = uk_agg_grouping.drop("remaining_balance",1)

# small df so can more easily name column order:

uk_agg_grouping = uk_agg_grouping[["grouping","quota_unit_final","quota_volume","usage","fill_rate","quota_number","source"]]

uk_agg_grouping.head()

Unnamed: 0,grouping,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Albania,Kilograms,384000.0,0.0,0.0,7,UK
1,Albania,Litres,163400.0,0.0,0.0,3,UK
2,Andean countries,Kilograms,101918000.0,2602643.312,0.02554,112,UK
3,Andean countries,Litre pure (100%) alcohol,45100.0,0.0,0.0,2,UK
4,Andean countries,Litres,1700.0,0.0,0.0,1,UK


In [26]:
uk_agg_region = uk3.groupby(["region","quota_unit_final"], as_index = False).agg({"quota_volume":"sum", 
                                                                                  "remaining_balance":"sum",
                                                                                 "quota_number":"count"})
uk_agg_region["source"] = "UK" 

uk_agg_region["usage"] = uk_agg_region["quota_volume"] - uk_agg_region["remaining_balance"]
uk_agg_region["fill_rate"] = uk_agg_region["usage"] / uk_agg_region["quota_volume"]

# remove remaining balance as not needed for outputs

uk_agg_region = uk_agg_region.drop("remaining_balance",1)

# small df so can more easily name column order:

uk_agg_region = uk_agg_region[["region","quota_unit_final","quota_volume","usage","fill_rate","quota_number","source"]]

uk_agg_region

Unnamed: 0,region,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Africa,Kilograms,314629400.0,185740879.04,0.59035,60,UK
1,Africa,Litres,72122600.0,62438545.176,0.86573,4,UK
2,Asia Pacific,Kilograms,237435000.0,110682361.0,0.46616,46,UK
3,Asia Pacific,Square metre (m2),1068320.0,0.0,0.0,1,UK
4,Eastern Europe,Kilograms,904875906.0,346090277.633,0.38247,138,UK
5,Eastern Europe,Litres,8085726.3,234283.8,0.02897,11,UK
6,The Americas,Kilograms,538510273.0,71164672.665,0.13215,237,UK
7,The Americas,Litre pure (100%) alcohol,196400.0,136676.0,0.69591,4,UK
8,The Americas,Litres,1700.0,0.0,0.0,1,UK
9,The Americas,Number of items,23468686.0,55707.0,0.00237,139,UK


In [27]:
print(uk_agg_origin.shape,uk_agg_grouping.shape,uk_agg_region.shape)

(66, 7) (46, 7) (15, 7)


**EU data aggregation**

In [28]:
eu_agg_origin = eu.groupby(["origin","quota_unit_final"], as_index = False).agg({"volume":"sum", 
                                                                                  "eu_usage":"sum",
                                                                                  "quota_number":"count"})
eu_agg_origin["source"] = "EU" 

# rename origin and usage to combine with UK data:

eu_agg_origin.rename(columns = {'origin':'quota_origin', 'eu_usage':'usage', "volume":"quota_volume"}, inplace = True)

# calculate utilisaiton (fill rate)

eu_agg_origin["fill_rate"] = eu_agg_origin["usage"] / eu_agg_origin["quota_volume"]

col = eu_agg_origin["fill_rate"]
eu_agg_origin.drop(labels=["fill_rate"], axis=1, inplace = True)
eu_agg_origin.insert(4,"fill_rate",col)

eu_agg_origin

Unnamed: 0,quota_origin,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Albania,Kilograms,2810000.00000,1673986.00000,0.59572,7,EU
1,Albania,Litres,700000.00000,4038.00000,0.00577,2,EU
2,Canada,Kilograms,370141000.00000,7833365.20700,0.02116,41,EU
3,Canada,Number of items,3203875.50000,199500.40000,0.06227,20,EU
4,Canada,Square metre (m2),16191000.00000,10274.63900,0.00063,9,EU
...,...,...,...,...,...,...,...
57,Tunisia,Kilograms,126299000.00000,68455753.00000,0.54201,11,EU
58,Tunisia,Litres,23520000.00000,15818.00000,0.00067,2,EU
59,Turkey,Kilograms,4045301876.00400,1799497888.22100,0.44484,68,EU
60,Ukraine,Kilograms,6716435736.75000,3154710277.55700,0.46970,77,EU


In [29]:
eu_agg_origin = eu.groupby(["origin","quota_unit_final"], as_index = False).agg({"volume":"sum", 
                                                                                  "eu_usage":"sum",
                                                                                  "quota_number":"count"})
eu_agg_origin["source"] = "EU" 

# rename origin and usage to combine with UK data:

eu_agg_origin.rename(columns = {'origin':'quota_origin', 'eu_usage':'usage', "volume":"quota_volume"}, inplace = True)

# calculate utilisaiton (fill rate)

eu_agg_origin["fill_rate"] = eu_agg_origin["usage"] / eu_agg_origin["quota_volume"]

col = eu_agg_origin["fill_rate"]
eu_agg_origin.drop(labels=["fill_rate"], axis=1, inplace = True)
eu_agg_origin.insert(4,"fill_rate",col)

eu_agg_origin

Unnamed: 0,quota_origin,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Albania,Kilograms,2810000.00000,1673986.00000,0.59572,7,EU
1,Albania,Litres,700000.00000,4038.00000,0.00577,2,EU
2,Canada,Kilograms,370141000.00000,7833365.20700,0.02116,41,EU
3,Canada,Number of items,3203875.50000,199500.40000,0.06227,20,EU
4,Canada,Square metre (m2),16191000.00000,10274.63900,0.00063,9,EU
...,...,...,...,...,...,...,...
57,Tunisia,Kilograms,126299000.00000,68455753.00000,0.54201,11,EU
58,Tunisia,Litres,23520000.00000,15818.00000,0.00067,2,EU
59,Turkey,Kilograms,4045301876.00400,1799497888.22100,0.44484,68,EU
60,Ukraine,Kilograms,6716435736.75000,3154710277.55700,0.46970,77,EU


In [30]:
eu_agg_grouping = eu.groupby(["grouping","quota_unit_final"], as_index = False).agg({"volume":"sum", 
                                                                                  "eu_usage":"sum",
                                                                                  "quota_number":"count"})
eu_agg_grouping["source"] = "EU" 

# rename origin and usage to combine with UK data:

eu_agg_grouping.rename(columns = {'eu_usage':'usage', "volume":"quota_volume"}, inplace = True)

# calculate utilisaiton (fill rate)

eu_agg_grouping["fill_rate"] = eu_agg_grouping["usage"] / eu_agg_grouping["quota_volume"]

col = eu_agg_grouping["fill_rate"]
eu_agg_grouping.drop(labels=["fill_rate"], axis=1, inplace = True)
eu_agg_grouping.insert(4,"fill_rate",col)

eu_agg_grouping.head()

Unnamed: 0,grouping,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Albania,Kilograms,2810000.0,1673986.0,0.59572,7,EU
1,Albania,Litres,700000.0,4038.0,0.00577,2,EU
2,Andean countries,Kilograms,707581000.0,84464432.67,0.11937,77,EU
3,Andean countries,Litre pure (100%) alcohol,390000.0,0.0,0.0,2,EU
4,Andean countries,Litres,28000.0,0.0,0.0,1,EU


In [31]:
eu_agg_region = eu.groupby(["region","quota_unit_final"], as_index = False).agg({"volume":"sum", 
                                                                                  "eu_usage":"sum",
                                                                                  "quota_number":"count"})
eu_agg_region["source"] = "EU" 

# rename origin and usage to combine with UK data:

eu_agg_region.rename(columns = {'eu_usage':'usage', "volume":"quota_volume"}, inplace = True)

# calculate utilisaiton (fill rate)

eu_agg_region["fill_rate"] = eu_agg_region["usage"] / eu_agg_region["quota_volume"]

col = eu_agg_region["fill_rate"]
eu_agg_region.drop(labels=["fill_rate"], axis=1, inplace = True)
eu_agg_region.insert(4,"fill_rate",col)

eu_agg_region.head()

Unnamed: 0,region,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Africa,Kilograms,1250952313.0,701944589.512,0.56113,58,EU
1,Africa,Litres,137756000.0,96679497.625,0.70182,4,EU
2,Asia Pacific,Kilograms,4049103800.095,1283041327.801,0.31687,83,EU
3,Asia Pacific,Square metre (m2),17805290.0,5876166.197,0.33002,1,EU
4,Eastern Europe,Kilograms,12285437042.754,5599447746.738,0.45578,190,EU


**combine aggregated data outputs:**

In [32]:
trq_agg_origin = pd.concat([uk_agg_origin,eu_agg_origin])
print(trq_agg_origin.shape)

# re-order:
trq_agg_origin = trq_agg_origin.sort_values(by=['quota_origin',"quota_unit_final","source"], ascending=True)
trq_agg_origin.head()

(128, 7)


Unnamed: 0,quota_origin,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Albania,Kilograms,2810000.0,1673986.0,0.59572,7,EU
0,Albania,Kilograms,384000.0,0.0,0.0,7,UK
1,Albania,Litres,700000.0,4038.0,0.00577,2,EU
1,Albania,Litres,163400.0,0.0,0.0,3,UK
2,Canada,Kilograms,370141000.0,7833365.207,0.02116,41,EU


In [33]:
trq_agg_grouping = pd.concat([uk_agg_grouping,eu_agg_grouping])
print(trq_agg_grouping.shape)

# re-order:
trq_agg_grouping = trq_agg_grouping.sort_values(by=['grouping',"quota_unit_final","source"], ascending=True)
trq_agg_grouping

(91, 7)


Unnamed: 0,grouping,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Albania,Kilograms,2810000.00000,1673986.00000,0.59572,7,EU
0,Albania,Kilograms,384000.00000,0.00000,0.00000,7,UK
1,Albania,Litres,700000.00000,4038.00000,0.00577,2,EU
1,Albania,Litres,163400.00000,0.00000,0.00000,3,UK
2,Andean countries,Kilograms,707581000.00000,84464432.67000,0.11937,77,EU
...,...,...,...,...,...,...,...
43,Turkey,Kilograms,446451000.00000,280871448.63300,0.62912,64,UK
43,Ukraine,Kilograms,6716435736.75000,3154710277.55700,0.46970,77,EU
44,Ukraine,Kilograms,417412000.00000,65218829.00000,0.15625,46,UK
44,Vietnam,Kilograms,130914973.00000,39727582.00000,0.30346,17,EU


In [34]:
trq_agg_region = pd.concat([uk_agg_region,eu_agg_region])
print(trq_agg_region.shape)

# re-order:
trq_agg_region = trq_agg_region.sort_values(by=['region',"quota_unit_final","source"], ascending=True)
trq_agg_region

(30, 7)


Unnamed: 0,region,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source
0,Africa,Kilograms,1250952313.0,701944589.512,0.56113,58,EU
0,Africa,Kilograms,314629400.0,185740879.04,0.59035,60,UK
1,Africa,Litres,137756000.0,96679497.625,0.70182,4,EU
1,Africa,Litres,72122600.0,62438545.176,0.86573,4,UK
2,Asia Pacific,Kilograms,4049103800.095,1283041327.801,0.31687,83,EU
2,Asia Pacific,Kilograms,237435000.0,110682361.0,0.46616,46,UK
3,Asia Pacific,Square metre (m2),17805290.0,5876166.197,0.33002,1,EU
3,Asia Pacific,Square metre (m2),1068320.0,0.0,0.0,1,UK
4,Eastern Europe,Kilograms,12285437042.754,5599447746.738,0.45578,190,EU
4,Eastern Europe,Kilograms,904875906.0,346090277.633,0.38247,138,UK


### 2b. UK quota aggregations using function:

There is duplication of code when writing out the groupby aggregation each time. This can be made more efficient with the use of creating a single function. 

In [35]:
def uk_aggFunc(data, group_type,sector_group = None):
    if(sector_group == None):
        cols = [group_type,"quota_unit_final"]
        final_cols = [group_type,"quota_unit_final","quota_volume","usage","fill_rate","quota_number","source"]
    else:
        cols = [group_type,sector_group,"quota_unit_final"]
        final_cols = [group_type,sector_group,"quota_unit_final","quota_volume","usage","fill_rate","quota_number","source"]
        

    df_agg = data.groupby(cols, as_index = False).agg({"quota_volume":"sum", "remaining_balance":"sum","quota_number":"count"})
    df_agg["source"] = "UK" 
    df_agg["usage"] = df_agg["quota_volume"] - df_agg["remaining_balance"]
    df_agg["fill_rate"] = df_agg["usage"] / df_agg["quota_volume"]
    df_agg = df_agg[final_cols]
    return(df_agg)


In [36]:
df_region = uk_aggFunc(uk3,"region")
df_grouping = uk_aggFunc(uk3,"grouping")
df_origin = uk_aggFunc(uk3,"quota_origin")

print(df_region.shape,df_grouping.shape,df_origin.shape)

(15, 7) (46, 7) (66, 7)


Writing and using a functon to remove repetitive use of similar code is preferable. 

### 3. Compare outputs:

Compare R generated TRQ grouping outputs to python as a quality assurance check:

In [37]:
trq_agg_region2 = pd.read_excel('../rsp_uk_trq_qa/outputs/trq_data_output11.xlsx',"region_level")
trq_agg_grouping2 = pd.read_excel('../rsp_uk_trq_qa/outputs/trq_data_output11.xlsx',"grouping_level")
trq_agg_origin2 = pd.read_excel('../rsp_uk_trq_qa/outputs/trq_data_output11.xlsx',"country_level")
print(trq_agg_origin2.shape,trq_agg_grouping2.shape,trq_agg_region2.shape)

(127, 10) (90, 9) (30, 7)


In [38]:
# compare df shapes:
print(trq_agg_origin.shape,trq_agg_grouping.shape,trq_agg_region.shape)

(128, 7) (91, 7) (30, 7)


In [39]:
# match dfs together. Need joinId first:

trq_agg_region["joinID"] = trq_agg_region["region"]+trq_agg_region["quota_unit_final"]+trq_agg_region["source"]
trq_agg_region2["joinID"] = trq_agg_region2["region"]+trq_agg_region2["quota_unit_final"]+trq_agg_region2["source"]

# join:
trq_agg_region_qa = pd.merge(trq_agg_region,trq_agg_region2[["total_quota_volume",
                                                            "total_quota_usage",
                                                            "total_allocation_fill_rate",
                                                           "joinID"]],on = "joinID", how = "left")
trq_agg_region_qa.shape

(30, 11)

In [40]:
# compare values:

trq_agg_region_qa["volume_check"] = trq_agg_region_qa["quota_volume"]-trq_agg_region_qa["total_quota_volume"]
trq_agg_region_qa["usage_check"] = trq_agg_region_qa["usage"]-trq_agg_region_qa["total_quota_usage"]
trq_agg_region_qa["fill_rate_check"] = trq_agg_region_qa["fill_rate"]-trq_agg_region_qa["total_allocation_fill_rate"]

trq_agg_region_qa

Unnamed: 0,region,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source,joinID,total_quota_volume,total_quota_usage,total_allocation_fill_rate,volume_check,usage_check,fill_rate_check
0,Africa,Kilograms,1250952313.0,701944589.512,0.56113,58,EU,AfricaKilogramsEU,1250952313.0,701944589.512,0.56113,0.0,0.0,0.0
1,Africa,Kilograms,314629400.0,185740879.04,0.59035,60,UK,AfricaKilogramsUK,314629400.0,186775277.74,0.59364,0.0,-1034398.7,-0.00329
2,Africa,Litres,137756000.0,96679497.625,0.70182,4,EU,AfricaLitresEU,137756000.0,96679497.625,0.70182,0.0,0.0,-0.0
3,Africa,Litres,72122600.0,62438545.176,0.86573,4,UK,AfricaLitresUK,72122600.0,62438545.176,0.86573,0.0,0.0,0.0
4,Asia Pacific,Kilograms,4049103800.095,1283041327.801,0.31687,83,EU,Asia PacificKilogramsEU,4049103800.095,1283041327.801,0.31687,0.0,0.0,-0.0
5,Asia Pacific,Kilograms,237435000.0,110682361.0,0.46616,46,UK,Asia PacificKilogramsUK,221394000.0,110682361.0,0.49993,16041000.0,0.0,-0.03378
6,Asia Pacific,Square metre (m2),17805290.0,5876166.197,0.33002,1,EU,Asia PacificSquare metre (m2)EU,17805290.0,5876166.197,0.33002,0.0,0.0,-0.0
7,Asia Pacific,Square metre (m2),1068320.0,0.0,0.0,1,UK,Asia PacificSquare metre (m2)UK,1068320.0,0.0,0.0,0.0,0.0,0.0
8,Eastern Europe,Kilograms,12285437042.754,5599447746.738,0.45578,190,EU,Eastern EuropeKilogramsEU,12285437042.754,5599447746.738,0.45578,0.0,0.0,0.0
9,Eastern Europe,Kilograms,904875906.0,346090277.633,0.38247,138,UK,Eastern EuropeKilogramsUK,759913906.0,314154947.633,0.41341,144962000.0,31935330.0,-0.03094


In [41]:
trq_agg_region_qa.dtypes


region                         object
quota_unit_final               object
quota_volume                  float64
usage                         float64
fill_rate                     float64
quota_number                    int64
source                         object
joinID                         object
total_quota_volume            float64
total_quota_usage             float64
total_allocation_fill_rate    float64
volume_check                  float64
usage_check                   float64
fill_rate_check               float64
dtype: object

In [42]:
# country groupings QA:

trq_agg_grouping["joinID"] = trq_agg_grouping["grouping"]+trq_agg_grouping["quota_unit_final"]+trq_agg_grouping["source"]
trq_agg_grouping2["joinID"] = trq_agg_grouping2["grouping"]+trq_agg_grouping2["quota_unit_final"]+trq_agg_grouping2["source"]

# join:
trq_agg_grouping_qa = pd.merge(trq_agg_grouping,trq_agg_grouping2[["total_quota_volume",
                                                            "total_quota_usage",
                                                            "total_allocation_fill_rate",
                                                           "joinID"]],on = "joinID", how = "left")
trq_agg_grouping_qa.shape

(91, 11)

In [157]:
# compare values:

trq_agg_grouping_qa["volume_check"] = trq_agg_grouping_qa["quota_volume"]-trq_agg_grouping_qa["total_quota_volume"]
trq_agg_grouping_qa["usage_check"] = trq_agg_grouping_qa["usage"]-trq_agg_grouping_qa["total_quota_usage"]
trq_agg_grouping_qa["fill_rate_check"] = trq_agg_grouping_qa["fill_rate"]-trq_agg_grouping_qa["total_allocation_fill_rate"]

trq_agg_grouping_qa.tail(50)

Unnamed: 0,grouping,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source,joinID,total_quota_volume,total_quota_usage,total_allocation_fill_rate,volume_check,usage_check,fill_rate_check
41,Israel,Litres,121600.0,121600.0,1.0,1,UK,IsraelLitresUK,121600.0,121600.0,1.0,0.0,0.0,0.0
42,Israel,Number of items,129920.0,0.0,0.0,1,EU,IsraelNumber of itemsEU,129920.0,0.0,0.0,0.0,0.0,0.0
43,Israel,Number of items,17695.0,0.0,0.0,1,UK,IsraelNumber of itemsUK,17695.0,0.0,0.0,0.0,0.0,0.0
44,Japan,Kilograms,79779789.93,16879143.641,0.21157,11,EU,JapanKilogramsEU,79779789.93,16879143.641,0.21157,-0.0,0.0,0.0
45,Japan,Kilograms,2958000.0,0.0,0.0,2,UK,JapanKilogramsUK,,,,,,
46,Jordan,Kilograms,24000000.0,7568.0,0.00032,2,EU,JordanKilogramsEU,24000000.0,7568.0,0.00032,0.0,0.0,0.0
47,Jordan,Kilograms,2180000.0,6534.0,0.003,2,UK,JordanKilogramsUK,2180000.0,6534.0,0.003,0.0,0.0,-0.0
48,Kosovo,Kilograms,35000.0,0.0,0.0,2,EU,KosovoKilogramsEU,35000.0,0.0,0.0,0.0,0.0,0.0
49,Kosovo,Kilograms,70000.0,0.0,0.0,3,UK,KosovoKilogramsUK,70000.0,0.0,0.0,0.0,0.0,0.0
50,Kosovo,Litres,5000000.0,3398564.5,0.67971,3,EU,KosovoLitresEU,5000000.0,3398564.5,0.67971,0.0,0.0,0.0


In [44]:
# country origin QA:


trq_agg_origin["joinID"] = trq_agg_origin["quota_origin"]+trq_agg_origin["quota_unit_final"]+trq_agg_origin["source"]
trq_agg_origin2["joinID"] = trq_agg_origin2["quota_origin"]+trq_agg_origin2["quota_unit_final"]+trq_agg_origin2["source"]

# join:
trq_agg_origin_qa = pd.merge(trq_agg_origin,trq_agg_origin2[["total_quota_volume",
                                                            "total_quota_usage",
                                                            "total_allocation_fill_rate",
                                                           "joinID"]],on = "joinID", how = "left")
trq_agg_origin_qa.shape

(128, 11)

In [45]:
# compare values:

trq_agg_origin_qa["volume_check"] = trq_agg_origin_qa["quota_volume"]-trq_agg_origin_qa["total_quota_volume"]
trq_agg_origin_qa["usage_check"] = trq_agg_origin_qa["usage"]-trq_agg_origin_qa["total_quota_usage"]
trq_agg_origin_qa["fill_rate_check"] = trq_agg_origin_qa["fill_rate"]-trq_agg_origin_qa["total_allocation_fill_rate"]

trq_agg_origin_qa.head()

Unnamed: 0,quota_origin,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source,joinID,total_quota_volume,total_quota_usage,total_allocation_fill_rate,volume_check,usage_check,fill_rate_check
0,Albania,Kilograms,2810000.0,1673986.0,0.59572,7,EU,AlbaniaKilogramsEU,2810000.0,1673986.0,0.59572,0.0,0.0,0.0
1,Albania,Kilograms,384000.0,0.0,0.0,7,UK,AlbaniaKilogramsUK,384000.0,0.0,0.0,0.0,0.0,0.0
2,Albania,Litres,700000.0,4038.0,0.00577,2,EU,AlbaniaLitresEU,700000.0,4038.0,0.00577,0.0,0.0,-0.0
3,Albania,Litres,163400.0,0.0,0.0,3,UK,AlbaniaLitresUK,163400.0,0.0,0.0,0.0,0.0,0.0
4,Canada,Kilograms,370141000.0,7833365.207,0.02116,41,EU,CanadaKilogramsEU,370141000.0,7833365.207,0.02116,0.0,0.0,-0.0


### 3b. Create function to upload and QA output files in notebook:

In [153]:
def uk_trq_data_check(sheetName, group):
    # upload data
    sheet_name = sheetName + "_level"
    file_path = '../rsp_uk_trq_qa/outputs/trq_data_output11.xlsx'
    df = pd.read_excel(file_path, sheet_name)
    df["joinID"] = df[group]+df["quota_unit_final"]+df["source"]
    
    # determine QA df and merge:
    if(sheetName == "country"):
        df2 = trq_agg_origin
    elif(sheetName == "grouping"):
        df2 = trq_agg_grouping
        
    elif(sheetName == "region"):
        df2 = trq_agg_region
               
            
    df2["joinID"] = df2[group]+df2["quota_unit_final"]+df2["source"]
    df3 = pd.merge(df2,df[["total_quota_volume","total_quota_usage","total_allocation_fill_rate","joinID"]], on = "joinID", how = "left")
    
    # check df values:
    
    df3["volume_check"] = round(df3["quota_volume"],0)-round(df3["total_quota_volume"],0)
    df3["usage_check"] = round(df3["usage"],0)-round(df3["total_quota_usage"],0)
    df3["fill_rate_check"] = round(df3["fill_rate"],5)-round(df3["total_allocation_fill_rate"],5)
    
    print(df.shape,df2.shape)
    return(df3)
    

In [147]:
uk_trq_data_check("region","region")

(30, 8) (30, 8)


Unnamed: 0,region,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source,joinID,total_quota_volume,total_quota_usage,total_allocation_fill_rate,volume_check,usage_check,fill_rate_check
0,Africa,Kilograms,1250952313.0,701944589.512,0.56113,58,EU,AfricaKilogramsEU,1250952313.0,701944589.512,0.56113,0.0,0.0,0.0
1,Africa,Kilograms,314629400.0,185740879.04,0.59035,60,UK,AfricaKilogramsUK,314629400.0,186775277.74,0.59364,0.0,-1034399.0,-0.0033
2,Africa,Litres,137756000.0,96679497.625,0.70182,4,EU,AfricaLitresEU,137756000.0,96679497.625,0.70182,0.0,0.0,0.0
3,Africa,Litres,72122600.0,62438545.176,0.86573,4,UK,AfricaLitresUK,72122600.0,62438545.176,0.86573,0.0,0.0,0.0
4,Asia Pacific,Kilograms,4049103800.095,1283041327.801,0.31687,83,EU,Asia PacificKilogramsEU,4049103800.095,1283041327.801,0.31687,0.0,0.0,0.0
5,Asia Pacific,Kilograms,237435000.0,110682361.0,0.46616,46,UK,Asia PacificKilogramsUK,221394000.0,110682361.0,0.49993,16041000.0,0.0,-0.0337
6,Asia Pacific,Square metre (m2),17805290.0,5876166.197,0.33002,1,EU,Asia PacificSquare metre (m2)EU,17805290.0,5876166.197,0.33002,0.0,0.0,0.0
7,Asia Pacific,Square metre (m2),1068320.0,0.0,0.0,1,UK,Asia PacificSquare metre (m2)UK,1068320.0,0.0,0.0,0.0,0.0,0.0
8,Eastern Europe,Kilograms,12285437042.754,5599447746.738,0.45578,190,EU,Eastern EuropeKilogramsEU,12285437042.754,5599447746.738,0.45578,0.0,0.0,0.0
9,Eastern Europe,Kilograms,904875906.0,346090277.633,0.38247,138,UK,Eastern EuropeKilogramsUK,759913906.0,314154947.633,0.41341,144962000.0,31935330.0,-0.0309


Function practice inserting one function into new one (so one function isn't excessivly large)

In [154]:
def return_diff_outputs(group):
    df = uk_trq_data_check(group,group)
    df = df.loc[(df["volume_check"] != 0) | (df["usage_check"] !=0) | (df["fill_rate_check"])]
    print(len(df), " mismatches between outputs and QA code for ",group, " type")
    return(df)
    

In [156]:
return_diff_outputs("grouping")

(90, 10) (91, 8)
12  mismatches between outputs and QA code for  grouping  type


Unnamed: 0,grouping,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source,joinID,total_quota_volume,total_quota_usage,total_allocation_fill_rate,volume_check,usage_check,fill_rate_check
29,EEA,Kilograms,35401383.0,6762040.859,0.19101,58,UK,EEAKilogramsUK,17419383.0,6757540.859,0.38793,17982000.0,4500.0,-0.19692
45,Japan,Kilograms,2958000.0,0.0,0.0,2,UK,JapanKilogramsUK,,,,,,
55,Mexico,Kilograms,193965274.0,5219659.4,0.02691,49,UK,MexicoKilogramsUK,193965274.0,5264659.4,0.02714,0.0,-45000.0,-0.00023
59,Morocco,Kilograms,137583000.0,100372342.84,0.72954,14,UK,MoroccoKilogramsUK,137583000.0,101406741.54,0.73706,0.0,-1034399.0,-0.00752
67,Serbia,Litres,567526.3,0.3,0.0,2,UK,SerbiaLitresUK,56.75263,3e-05,0.0,567469.0,0.0,0.0
70,South Africa,Kilograms,297779500.0,189836708.45,0.63751,13,EU,South Africa KilogramsEU,,,,,,
71,South Africa,Kilograms,118793000.0,73539850.8,0.61906,14,UK,South Africa KilogramsUK,,,,,,
72,South Africa,Litres,114236000.0,96663679.625,0.84618,2,EU,South Africa LitresEU,,,,,,
73,South Africa,Litres,71482900.0,62438545.176,0.87348,2,UK,South Africa LitresUK,,,,,,
75,South Korea,Kilograms,190403000.0,97511142.0,0.51213,25,UK,South KoreaKilogramsUK,177320000.0,97511142.0,0.54992,13083000.0,0.0,-0.03779


In [171]:
trq_agg_grouping.loc[trq_agg_grouping["grouping"] == "South Africa "]

Unnamed: 0,grouping,quota_unit_final,quota_volume,usage,fill_rate,quota_number,source,joinID
35,South Africa,Kilograms,297779500.0,189836708.45,0.63751,13,EU,South Africa KilogramsEU
35,South Africa,Kilograms,118793000.0,73539850.8,0.61906,14,UK,South Africa KilogramsUK
36,South Africa,Litres,114236000.0,96663679.625,0.84618,2,EU,South Africa LitresEU
36,South Africa,Litres,71482900.0,62438545.176,0.87348,2,UK,South Africa LitresUK


In [186]:
trq_agg_grouping["grouping"] = trq_agg_grouping["grouping"].str.strip()
#trq_agg_grouping["grouping2"] = trq_agg_grouping["grouping"].map(str).apply(lambda x: x.str.strip())

In [187]:
pd.unique(trq_agg_grouping["grouping"])

array(['Albania', 'Andean countries', 'Canada', 'Central America',
       'Chile', 'EEA', 'Egypt', 'Faroe Islands', 'Georgia', 'Israel',
       'Japan', 'Jordan', 'Kosovo', 'Lebanon', 'Mexico', 'Moldova',
       'Morocco', 'North Macedonia', 'Serbia', 'Singapore',
       'South Africa', 'South Korea', 'Switzerland', 'Tunisia', 'Turkey',
       'Ukraine', 'Vietnam'], dtype=object)

In [164]:
trq_agg_origin2.head(50)

Unnamed: 0,region,grouping,quota_origin,source,quota_unit_final,quota_count,total_quota_volume,total_quota_usage,total_allocation_fill_rate,iso
0,Africa,Egypt,Egypt,UK,Kilograms,21,48638000.0,11783385.4,0.24227,EG
1,Africa,Egypt,Egypt,EU,Kilograms,18,266715414.0,47602338.832,0.17848,EG
2,Africa,Morocco,Morocco,UK,Kilograms,14,137583000.0,101406741.54,0.73706,MA
3,Africa,Morocco,Morocco,EU,Kilograms,16,560158399.0,396049789.23,0.70703,MA
4,Africa,South Africa,Namibia,UK,Kilograms,1,254000.0,0.0,0.0,
5,Africa,South Africa,South Africa,UK,Kilograms,13,118539000.0,73539850.8,0.62039,ZA
6,Africa,South Africa,South Africa,EU,Kilograms,13,297779500.0,189836708.45,0.63751,ZA
7,Africa,South Africa,South Africa,UK,Litres,2,71482900.0,62438545.176,0.87348,ZA
8,Africa,South Africa,South Africa,EU,Litres,2,114236000.0,96663679.625,0.84618,ZA
9,Africa,Tunisia,Tunisia,UK,Kilograms,11,9615400.0,45300.0,0.00471,TN
