### Test for determinants of buyer diversification
#### Scatterplots related to Table 3 regressions of [Krammarz 2016](https://www.princeton.edu/~ies/Spring17/KramarzPaper.pdf)
#### dissagregated by broad Export sections


In [1]:
#Import modules

import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

import dask.dataframe as dd
from dask.diagnostics import ProgressBar

from numpy import arange, log10

from functions import chunk, agg, finalize
tunique = dd.Aggregation('tunique', chunk, agg,finalize)
first = dd.Aggregation('first', chunk, agg,finalize)

### Bring data

In [None]:
drive_path = './../../../../../media/miglesia/Elements/export_france/data/type1/DP1610_MAASTRICHT1_1997_2013'
colnames = [u'YEAR', u'MONTH', u'FLUX', u'ID', u'DEPT', u'CN ID 8', u'CPA6',
       u'PYOD', u'PAYP', u'VAT', u'PRIFAC', u'DEVFAC', u'VFTE', u'VART', u'D_MASSE', u'MASSE', u'USUP', u'USUP_MT']

colname_no = dict(zip(colnames, range(18)))
columns = [u'YEAR', u'FLUX', u'ID', u'CN ID 8', u'VAT', u'PYOD', u'VART']

df_list = []

for y in range(2012, 2014):
    df_list += [dd.read_table(drive_path+'/DP1610_MAASTRICHT1_'+str(y)+'.txt', 
            usecols = map(colname_no.get, columns),
            delimiter = ';', header = None, dtype = {9: 'object'}).fillna('')]

    
data = dd.concat(df_list)
data.columns = columns
data['IMPORT'] = data['FLUX'] % 2

grouped = data.groupby(['ID', 'YEAR', 'IMPORT'])


In [None]:
with ProgressBar():
    df = grouped.agg({'VART': 'sum', u'VAT': tunique, 'CN ID 8': tunique, u'PYOD': tunique}).compute()

df.to_csv('dets_of_diversification.csv')

[                                        ] | 1% Completed |  1min  5.3s

### Plot. Determinants of diversification

In [None]:
df_exp_fl = df.loc[df.FLUX == 4].groupby('ID')
data = aggregate_vars(df_exp_fl)

for sec in df['CN ID 1'].unique()[:-4]:
    
    df_exp_cn = df.loc[df.FLUX == 4].loc[df['CN ID 1'] == sec]
    df_exp_cn_fl = df_exp_cn.groupby('ID')
        
    data_cn = aggregate_vars(df_exp_cn_fl)
    print CN_full.loc[CN_full['CN ID 1'] == sec]['CN label 1'].values[0]

    fig, axs = plt.subplots(1, 3, figsize = (20, 5))

    ax = axs[0]
    data.plot(x = 'log_EUR_UE_exp', y = 'ct_Buyers', marker = 'o', linewidth = 0, alpha = .05, mec = 'None', ax = ax, color = '.5')

    data_cn.plot(x = 'log_EUR_UE_exp', y = 'ct_Buyers', marker = 'o', linewidth = 0, alpha = .4, mec = 'None', ax = ax, color = 'r')
    ax.set_yscale('log')
    ax.set_ylabel('ct_Buyers')

    ax = axs[1]
    data.plot(x = 'log_EUR_UE_exp', y = 'ct_Prods', marker = 'o', linewidth = 0, alpha = .05, mec = 'None', ax = ax, color = '.5')
    data_cn.plot(x = 'log_EUR_UE_exp', y = 'ct_Prods', marker = 'o', linewidth = 0, alpha = .4, mec = 'None', ax = ax, color = 'r')
    ax.set_yscale('log')
    ax.set_ylabel('ct_Prods')
    
    ax = axs[2]
    data.plot(x = 'ct_Prods', y = 'ct_Buyers', marker = 'o', linewidth = 0, alpha = .05, mec = 'None', ax = ax, color = '.5')
    data_cn.plot(x = 'ct_Prods', y = 'ct_Buyers', marker = 'o', linewidth = 0, alpha = .4, mec = 'None', ax = ax, color = 'r')
    ax.set_xlabel('ct_Prods')
    ax.set_ylabel('ct_Buyers')
    ax.set_xscale('log')
    ax.set_yscale('log')

    plt.show()

## Firm sizes