In [2]:
import numpy as np
import pandas as pd
from datetime import datetime

In [67]:
nz_files = [
    {
        'filename':'Data/NZ/nz_exports_quantities_01.txt',
        'filenames':['Data/NZ/nz_exports_quantities_01.txt',
                    'Data/NZ/nz_exports_quantities_02.txt',
                    'Data/NZ/nz_exports_quantities_03.txt',
                    'Data/NZ/nz_exports_quantities_04.txt',
                    'Data/NZ/nz_exports_quantities_05.txt',
                    'Data/NZ/nz_exports_quantities_06.txt',
                    'Data/NZ/nz_exports_quantities_07.txt',
                    'Data/NZ/nz_exports_quantities_08.txt'],
        'partner_column':'Importers',
        'column_name':'Quantity in tonnes',
        'flow':'EXPORT',
        'left_date_text':'',
        'right_date_text':'%Y-M%m-Exported quantity, Kilograms',
        'date_format':'%Y-M%m'
    },
    {
        'filename':'Data/NZ/nz_exports_values_01.txt',
        'filenames':['Data/NZ/nz_exports_values_01.txt',
                    'Data/NZ/nz_exports_values_02.txt',
                    'Data/NZ/nz_exports_values_03.txt',
                    'Data/NZ/nz_exports_values_04.txt',
                    'Data/NZ/nz_exports_values_05.txt',
                    'Data/NZ/nz_exports_values_06.txt',
                    'Data/NZ/nz_exports_values_07.txt',
                    'Data/NZ/nz_exports_values_08.txt'],
        'partner_column':'Importers',
        'column_name':'Value in thousand euro',
        'flow':'EXPORT',
        'left_date_text':'Exported value in ',
        'right_date_text':'',
        'date_format':'%Y-M%m'
    },
    {
        'filename':'Data/NZ/nz_imports_quantities_01.txt',
        'filenames':['Data/NZ/nz_imports_quantities_01.txt',
                    'Data/NZ/nz_imports_quantities_02.txt',
                    'Data/NZ/nz_imports_quantities_03.txt',
                    'Data/NZ/nz_imports_quantities_04.txt',
                    'Data/NZ/nz_imports_quantities_05.txt',
                    'Data/NZ/nz_imports_quantities_06.txt',
                    'Data/NZ/nz_imports_quantities_07.txt',
                    'Data/NZ/nz_imports_quantities_08.txt'],
        'partner_column':'Exporters',
        'column_name':'Quantity in tonnes',
        'flow':'IMPORT',
        'left_date_text':'',
        'right_date_text':'-Imported quantity, Kilograms',
        'date_format':'%Y-M%m'
    },
    {
        'filename':'Data/NZ/nz_imports_values_01.txt',
        'filenames':['Data/NZ/nz_imports_values_01.txt',
                    'Data/NZ/nz_imports_values_02.txt',
                    'Data/NZ/nz_imports_values_03.txt',
                    'Data/NZ/nz_imports_values_04.txt',
                    'Data/NZ/nz_imports_values_05.txt',
                    'Data/NZ/nz_imports_values_06.txt',
                    'Data/NZ/nz_imports_values_07.txt',
                    'Data/NZ/nz_imports_values_08.txt'],
        'partner_column':'Exporters',
        'column_name':'Value in thousand euro',
        'flow':'IMPORT',
        'left_date_text':'Imported value in ',
        'right_date_text':'',
        'date_format':'%Y-M%m'
    }
]

In [68]:
print(nz_files[0]['filenames'][5])

Data/NZ/nz_exports_quantities_06.txt


In [73]:
first_quantity_data = True
first_value_data = True

partner_column_name = 'Partner'

for index in range(len(nz_files)):
    #df = pd.read_csv(nz_files[index]['filename'], sep='\t', lineterminator='\r')
    first_part = True
    for i in range(len(nz_files[index]['filenames'])):
        df_part = pd.read_csv(nz_files[index]['filenames'][i], sep='\t', lineterminator='\r')
        key = nz_files[index]['partner_column']
        
        # Rename the 'Importers' / 'Exporters' column to 'Partner', in line with the EU data 
        df_part.rename(columns = {key:'Partner'}, inplace = True)
        #df_part.columns.values[key] = 'Partner'
        
        if first_part:
            df_parts = df_part
            first_part = False
        else:
            df_parts = df_parts.merge(df_part, on='Partner',how='left', suffixes=('', '_DROP')).filter(regex='^(?!.*_DROP)')
            df_parts.drop(df_parts.filter(regex='^Unnamed').columns,axis=1,inplace=True)
            df_parts = df_parts[(df_parts['Partner'] != '\n')]
                   
    df = df_parts
    
    # Use lambda function to strip unwanted characters from beginning (lstrip) 
    # and end (rstrip) of the Importers values
    df['Partner'] = df['Partner'].map(lambda x: x.lstrip('\n"').rstrip('"'))
    
    # Remove the sum row 'World', the last row (blank) and the the last column (blank)
    df = df[(df['Partner'] != 'World')]
    
    # Reshape the data, creating a row for each monthly observation
    df = df.melt(id_vars=['Partner'], var_name='Month Date', value_name=nz_files[index]['column_name'])
    
    # Add a 'Flow' column to indicate IMPORT or EXPORT
    df['Flow'] = nz_files[index]['flow']
    
    
    # Use lambda function to strip unwanted text and convert Month Date values to datetime
    df['Month Date'] = df['Month Date'].map(lambda x: x.lstrip(nz_files[index]['left_date_text']).rstrip(nz_files[index]['right_date_text']))
    df['Month Date'] = df['Month Date'].map(lambda x: datetime.strptime(x, nz_files[index]['date_format']))
    
    if nz_files[index]['column_name'] == 'Value in thousand euro':
        if first_value_data:
            df_values = df
            first_value_data = False
        else:
            df_values = pd.concat([df_values,df],ignore_index=True)
    else:
        if first_quantity_data:
            df_quantities = df
            first_quantity_data = False
        else:
            df_quantities = pd.concat([df_quantities,df],ignore_index=True)
    
    
df_nz = pd.merge(df_values,
                 df_quantities[['Month Date','Partner','Flow','Quantity in tonnes']],
                 on=['Month Date','Partner','Flow'], 
                 how='left')
df_nz

Unnamed: 0,Partner,Month Date,Value in thousand euro,Flow,Quantity in tonnes
0,Denmark,2010-01-01,8271.0,EXPORT,4332245.0
1,Russian Federation,2010-01-01,5504.0,EXPORT,2008407.0
2,Morocco,2010-01-01,4296.0,EXPORT,2334257.0
3,China,2010-01-01,3211.0,EXPORT,1578730.0
4,Australia,2010-01-01,4608.0,EXPORT,2219114.0
...,...,...,...,...,...
16978,Fiji,2022-09-01,0.0,IMPORT,0.0
16979,France,2022-09-01,0.0,IMPORT,0.0
16980,New Zealand,2022-09-01,544.0,IMPORT,98542.0
16981,India,2022-09-01,4.0,IMPORT,750.0


In [None]:
df_values

In [None]:
df_quantities

In [70]:
# df_nz = pd.merge(df_values, df_quantities, on=['Month Date','Partner','Flow'], how='outer')
df_nz = pd.merge(df_values,
                 df_quantities[['Month Date','Partner','Flow','Quantity in tonnes']],
                 on=['Month Date','Partner','Flow'], 
                 how='left')
df_nz

Unnamed: 0,Partner,Month Date,Value in thousand euro,Flow,Quantity in tonnes
0,Denmark,2010-01-01,8271.0,EXPORT,4332245.0
1,Russian Federation,2010-01-01,5504.0,EXPORT,2008407.0
2,Morocco,2010-01-01,4296.0,EXPORT,2334257.0
3,China,2010-01-01,3211.0,EXPORT,1578730.0
4,Australia,2010-01-01,4608.0,EXPORT,2219114.0
...,...,...,...,...,...
16978,Fiji,2022-09-01,0.0,IMPORT,0.0
16979,France,2022-09-01,0.0,IMPORT,0.0
16980,New Zealand,2022-09-01,544.0,IMPORT,98542.0
16981,India,2022-09-01,4.0,IMPORT,750.0


In [71]:
df_nz[(df_nz['Partner']=='China')]

Unnamed: 0,Partner,Month Date,Value in thousand euro,Flow,Quantity in tonnes
3,China,2010-01-01,3211.0,EXPORT,1578730.0
107,China,2010-02-01,2754.0,EXPORT,1111927.0
211,China,2010-03-01,2067.0,EXPORT,769918.0
315,China,2010-04-01,2652.0,EXPORT,996467.0
419,China,2010-05-01,2374.0,EXPORT,847020.0
...,...,...,...,...,...
15395,China,2022-05-01,22715.0,EXPORT,3717707.0
15499,China,2022-06-01,40882.0,EXPORT,6998525.0
15603,China,2022-07-01,43534.0,EXPORT,7202670.0
15707,China,2022-08-01,22747.0,EXPORT,3343814.0


In [158]:
df = pd.read_csv('Data/NL/nl_exports_quantities_08.txt', sep='\t', lineterminator='\r')
pd.set_option('display.max_columns', None) 
df

Unnamed: 0,Importers,"2021-M02-Exported quantity, Kilograms","2021-M03-Exported quantity, Kilograms","2021-M04-Exported quantity, Kilograms","2021-M05-Exported quantity, Kilograms","2021-M06-Exported quantity, Kilograms","2021-M07-Exported quantity, Kilograms","2021-M08-Exported quantity, Kilograms",2021-M09-Exported quantity,2021-M09-Unit,"2021-M10-Exported quantity, Kilograms","2021-M11-Exported quantity, Kilograms","2021-M12-Exported quantity, Kilograms","2022-M01-Exported quantity, Kilograms","2022-M02-Exported quantity, Kilograms","2022-M03-Exported quantity, Kilograms","2022-M04-Exported quantity, Kilograms","2022-M05-Exported quantity, Kilograms","2022-M06-Exported quantity, Kilograms","2022-M07-Exported quantity, Kilograms","2022-M08-Exported quantity, Kilograms","2022-M09-Exported quantity, Kilograms",Unnamed: 22
0,"\n""World""",17764321.0,23579199.0,24127786.0,18799970.0,19245556.0,19457211.0,21396645.0,22227690.0,Kilograms,24410568.0,30803136.0,33145905.0,16889387.0,22735529.0,22434397.0,22798328.0,22277683.0,22887401.0,18861441.0,19854574.0,22490541.0,
1,"\n""Germany""",5491608.0,10331687.0,11597521.0,6895939.0,7489531.0,7399693.0,6694765.0,7276884.0,Kilograms,9109317.0,12764681.0,17059502.0,4759191.0,10400347.0,7769803.0,8671633.0,8924087.0,8755033.0,5864519.0,8472461.0,10462072.0,
2,"\n""France""",4605594.0,5147730.0,5198267.0,5301655.0,5379087.0,5469687.0,6126581.0,6504677.0,Kilograms,6343295.0,6923926.0,6548859.0,4845296.0,4346613.0,5150724.0,5798929.0,5341501.0,7101355.0,6032435.0,4796995.0,5011387.0,
3,"\n""Belgium""",3438456.0,2730974.0,3006674.0,2233599.0,2386015.0,2601895.0,3232927.0,3296082.0,Kilograms,3479292.0,3448565.0,3715253.0,2699012.0,3004588.0,3829293.0,3358847.0,2784620.0,2340921.0,2390801.0,2061619.0,2506434.0,
4,"\n""Italy""",593433.0,464486.0,286949.0,143481.0,401789.0,275215.0,493070.0,616577.0,Kilograms,855392.0,1242165.0,369174.0,522021.0,419128.0,591235.0,635466.0,453086.0,366431.0,571396.0,360623.0,924735.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,"\n""Tokelau""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,25000.0,0.0,0.0,0.0,0.0,0.0,0.0,
118,"\n""Special categories""",1748.0,25128.0,22353.0,786.0,678.0,2229.0,2045.0,5124.0,Kilograms,6523.0,7323.0,4177.0,134.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
119,"\n""Burkina Faso""",0.0,0.0,0.0,0.0,1016.0,600.0,2150.0,480.0,Kilograms,0.0,0.0,0.0,0.0,4158.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
120,"\n""Uzbekistan""",0.0,20000.0,0.0,20000.0,0.0,0.0,0.0,0.0,,20000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [159]:
df.rename(columns = {'2021-M09-Exported quantity':'2021-M09-Exported quantity, Kilograms'}, inplace = True)

In [160]:
df.drop(['2021-M09-Unit'], axis=1,inplace=True)

In [161]:
df

Unnamed: 0,Importers,"2021-M02-Exported quantity, Kilograms","2021-M03-Exported quantity, Kilograms","2021-M04-Exported quantity, Kilograms","2021-M05-Exported quantity, Kilograms","2021-M06-Exported quantity, Kilograms","2021-M07-Exported quantity, Kilograms","2021-M08-Exported quantity, Kilograms","2021-M09-Exported quantity, Kilograms","2021-M10-Exported quantity, Kilograms","2021-M11-Exported quantity, Kilograms","2021-M12-Exported quantity, Kilograms","2022-M01-Exported quantity, Kilograms","2022-M02-Exported quantity, Kilograms","2022-M03-Exported quantity, Kilograms","2022-M04-Exported quantity, Kilograms","2022-M05-Exported quantity, Kilograms","2022-M06-Exported quantity, Kilograms","2022-M07-Exported quantity, Kilograms","2022-M08-Exported quantity, Kilograms","2022-M09-Exported quantity, Kilograms",Unnamed: 22
0,"\n""World""",17764321.0,23579199.0,24127786.0,18799970.0,19245556.0,19457211.0,21396645.0,22227690.0,24410568.0,30803136.0,33145905.0,16889387.0,22735529.0,22434397.0,22798328.0,22277683.0,22887401.0,18861441.0,19854574.0,22490541.0,
1,"\n""Germany""",5491608.0,10331687.0,11597521.0,6895939.0,7489531.0,7399693.0,6694765.0,7276884.0,9109317.0,12764681.0,17059502.0,4759191.0,10400347.0,7769803.0,8671633.0,8924087.0,8755033.0,5864519.0,8472461.0,10462072.0,
2,"\n""France""",4605594.0,5147730.0,5198267.0,5301655.0,5379087.0,5469687.0,6126581.0,6504677.0,6343295.0,6923926.0,6548859.0,4845296.0,4346613.0,5150724.0,5798929.0,5341501.0,7101355.0,6032435.0,4796995.0,5011387.0,
3,"\n""Belgium""",3438456.0,2730974.0,3006674.0,2233599.0,2386015.0,2601895.0,3232927.0,3296082.0,3479292.0,3448565.0,3715253.0,2699012.0,3004588.0,3829293.0,3358847.0,2784620.0,2340921.0,2390801.0,2061619.0,2506434.0,
4,"\n""Italy""",593433.0,464486.0,286949.0,143481.0,401789.0,275215.0,493070.0,616577.0,855392.0,1242165.0,369174.0,522021.0,419128.0,591235.0,635466.0,453086.0,366431.0,571396.0,360623.0,924735.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,"\n""Tokelau""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25000.0,0.0,0.0,0.0,0.0,0.0,0.0,
118,"\n""Special categories""",1748.0,25128.0,22353.0,786.0,678.0,2229.0,2045.0,5124.0,6523.0,7323.0,4177.0,134.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
119,"\n""Burkina Faso""",0.0,0.0,0.0,0.0,1016.0,600.0,2150.0,480.0,0.0,0.0,0.0,0.0,4158.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
120,"\n""Uzbekistan""",0.0,20000.0,0.0,20000.0,0.0,0.0,0.0,0.0,20000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [162]:
df.to_csv('Data/NL/nl_exports_quantities_08xx.txt',sep='\t',line_terminator='\r',index=False)