In [150]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 500)

In [151]:
public_df = pd.read_csv('../data/inn_info_public.csv', dtype={'okved2': 'str', 'region': 'str'})
private_df = pd.read_csv('../data/inn_info_private.csv', dtype={'okved2': 'str'})
pays_df = pd.read_csv('../data/pays.csv')

In [152]:
public_df.head(2)

Unnamed: 0,hash_inn,okved2,region,is_public
0,61058,34,86,True
1,8311,18,86,True


In [153]:
pays_df.head(2)

Unnamed: 0,hash_inn_kt,hash_inn_dt,week,count,sum
0,0,0,1,4,38399.6
1,0,0,5,2,399.6


In [154]:
unknown_targets = public_df[public_df['is_public']==False][['hash_inn', 'region']]

In [155]:
unknown_targets = pd.merge(unknown_targets, 
                           private_df, 
                           on = 'hash_inn', 
                           how = 'left')

In [156]:
categotical_df = pd.concat([public_df[public_df['is_public']==True][['hash_inn', 'okved2', 'region']], 
                           unknown_targets[['hash_inn', 'okved2', 'region']]
                           ], axis = 0)

In [157]:
def create_pivot(pays_df, categotical_df, 
                 direction: str,
                 groupby_col: str,
                 agg_col: str,
                 cnt_not_null_cols = 1):
    """
    :direction - направление платежа: kt - входящий, dt - исходящий
    :groupby_col - категорий, по которой группируем (okved2 или region)
    :agg_col - колонка для группировки 
    :cnt_not_null_cols = 0 - количество колонок с ненулевым результатом после пивота
    """
            
    if direction == 'to':
        inn = 'hash_inn_dt'
        postfix = '_kt'
    elif direction == 'from':
        inn = 'hash_inn_kt'
        postfix = '_dt'
        
    pays_df = pd.merge(pays_df, categotical_df,
                       left_on = inn,
                       right_on = 'hash_inn',
                       how = 'left')

    pays_df = pd.pivot_table(pays_df, 
                             values=agg_col, 
                             index='hash_inn'+postfix,
                             columns=[groupby_col],
                             aggfunc='sum')
    
    cols_list = [col for col in pays_df.columns if col not in ['total', 'cnt_not_null']]
    
    pays_df['cnt_not_null'] = pays_df[cols_list].count(axis=1)
    pays_df = pays_df[pays_df['cnt_not_null']>cnt_not_null_cols]
    print('len of data: {}'.format(pays_df.shape[0]))
    pays_df['total'] = pays_df[cols_list].sum(axis=1)

    for col in pays_df[cols_list]:
        pays_df['share_' + col + postfix] = pays_df[col] / pays_df['total']
        pays_df.rename(columns={col: col+'_'+agg_col+postfix}, inplace=True)
        
    for col in pays_df.columns:
        if '_kt' not in col: 
            pays_df.rename(columns={col: col+postfix}, inplace=True)

    pays_df = pays_df \
                .reset_index(drop=False) \
                .rename(columns={'hash_inn'+postfix: 'hash_inn'})
    return pays_df

In [158]:
%%time
df_to = create_pivot(pays_df, categotical_df, 
                 direction='to',
                 agg_col='sum',
                 groupby_col = 'okved2',
                 cnt_not_null_cols=0)

len of data: 152160
CPU times: user 13.9 s, sys: 10.5 s, total: 24.4 s
Wall time: 24.5 s


In [159]:
df_to.head(2)

okved2,hash_inn,0_sum_kt,1_sum_kt,10_sum_kt,11_sum_kt,12_sum_kt,13_sum_kt,14_sum_kt,15_sum_kt,16_sum_kt,17_sum_kt,18_sum_kt,19_sum_kt,2_sum_kt,20_sum_kt,21_sum_kt,22_sum_kt,23_sum_kt,24_sum_kt,25_sum_kt,26_sum_kt,27_sum_kt,28_sum_kt,29_sum_kt,3_sum_kt,30_sum_kt,31_sum_kt,32_sum_kt,33_sum_kt,34_sum_kt,35_sum_kt,36_sum_kt,37_sum_kt,38_sum_kt,39_sum_kt,4_sum_kt,40_sum_kt,41_sum_kt,42_sum_kt,43_sum_kt,44_sum_kt,45_sum_kt,46_sum_kt,47_sum_kt,48_sum_kt,49_sum_kt,5_sum_kt,50_sum_kt,51_sum_kt,52_sum_kt,53_sum_kt,54_sum_kt,55_sum_kt,56_sum_kt,57_sum_kt,58_sum_kt,59_sum_kt,6_sum_kt,60_sum_kt,61_sum_kt,62_sum_kt,63_sum_kt,64_sum_kt,65_sum_kt,66_sum_kt,67_sum_kt,68_sum_kt,69_sum_kt,7_sum_kt,70_sum_kt,71_sum_kt,72_sum_kt,73_sum_kt,74_sum_kt,75_sum_kt,76_sum_kt,77_sum_kt,78_sum_kt,79_sum_kt,8_sum_kt,9_sum_kt,cnt_not_null_kt,total_kt,share_0_kt,share_1_kt,share_10_kt,share_11_kt,share_12_kt,share_13_kt,share_14_kt,share_15_kt,share_16_kt,share_17_kt,share_18_kt,share_19_kt,share_2_kt,share_20_kt,share_21_kt,share_22_kt,share_23_kt,share_24_kt,share_25_kt,share_26_kt,share_27_kt,share_28_kt,share_29_kt,share_3_kt,share_30_kt,share_31_kt,share_32_kt,share_33_kt,share_34_kt,share_35_kt,share_36_kt,share_37_kt,share_38_kt,share_39_kt,share_4_kt,share_40_kt,share_41_kt,share_42_kt,share_43_kt,share_44_kt,share_45_kt,share_46_kt,share_47_kt,share_48_kt,share_49_kt,share_5_kt,share_50_kt,share_51_kt,share_52_kt,share_53_kt,share_54_kt,share_55_kt,share_56_kt,share_57_kt,share_58_kt,share_59_kt,share_6_kt,share_60_kt,share_61_kt,share_62_kt,share_63_kt,share_64_kt,share_65_kt,share_66_kt,share_67_kt,share_68_kt,share_69_kt,share_7_kt,share_70_kt,share_71_kt,share_72_kt,share_73_kt,share_74_kt,share_75_kt,share_76_kt,share_77_kt,share_78_kt,share_79_kt,share_8_kt,share_9_kt
0,0,,,,,4087.124,,4198.916,,3.6,,,,,,,412.472,,,,,,,,2.0,,,,,188.24,,6988.164,,,,29.6,,,,,,,1144.96,,,,,,,160465.024,1.2,,10681.992,,,,4796.136,,,299.692,17684.24,,,,,,,,,,,,,,,,,,,42537.228,,16,253520.588,,,,,0.016121,,0.016562,,1.4e-05,,,,,,,0.001627,,,,,,,,8e-06,,,,,0.000743,,0.027564,,,,0.000117,,,,,,,0.004516,,,,,,,0.632947,5e-06,,0.042135,,,,0.018918,,,0.001182,0.069755,,,,,,,,,,,,,,,,,,,0.167786,
1,1,,,,,138.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,138.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [160]:
%%time
df_from = create_pivot(pays_df, categotical_df, 
                 direction='from',
                 agg_col='sum',
                 groupby_col = 'okved2',
                 cnt_not_null_cols=0)

len of data: 189772
CPU times: user 31.5 s, sys: 23.7 s, total: 55.2 s
Wall time: 55.3 s


In [161]:
df_from.head(2)

okved2,hash_inn,0_sum_dt_dt,1_sum_dt_dt,10_sum_dt_dt,11_sum_dt_dt,12_sum_dt_dt,13_sum_dt_dt,14_sum_dt_dt,15_sum_dt_dt,16_sum_dt_dt,17_sum_dt_dt,18_sum_dt_dt,19_sum_dt_dt,2_sum_dt_dt,20_sum_dt_dt,21_sum_dt_dt,22_sum_dt_dt,23_sum_dt_dt,24_sum_dt_dt,25_sum_dt_dt,26_sum_dt_dt,27_sum_dt_dt,28_sum_dt_dt,29_sum_dt_dt,3_sum_dt_dt,30_sum_dt_dt,31_sum_dt_dt,32_sum_dt_dt,33_sum_dt_dt,34_sum_dt_dt,35_sum_dt_dt,36_sum_dt_dt,37_sum_dt_dt,38_sum_dt_dt,39_sum_dt_dt,4_sum_dt_dt,40_sum_dt_dt,41_sum_dt_dt,42_sum_dt_dt,43_sum_dt_dt,44_sum_dt_dt,45_sum_dt_dt,46_sum_dt_dt,47_sum_dt_dt,48_sum_dt_dt,49_sum_dt_dt,5_sum_dt_dt,50_sum_dt_dt,51_sum_dt_dt,52_sum_dt_dt,53_sum_dt_dt,54_sum_dt_dt,55_sum_dt_dt,56_sum_dt_dt,57_sum_dt_dt,58_sum_dt_dt,59_sum_dt_dt,6_sum_dt_dt,60_sum_dt_dt,61_sum_dt_dt,62_sum_dt_dt,63_sum_dt_dt,64_sum_dt_dt,65_sum_dt_dt,66_sum_dt_dt,67_sum_dt_dt,68_sum_dt_dt,69_sum_dt_dt,7_sum_dt_dt,70_sum_dt_dt,71_sum_dt_dt,72_sum_dt_dt,73_sum_dt_dt,74_sum_dt_dt,75_sum_dt_dt,76_sum_dt_dt,77_sum_dt_dt,78_sum_dt_dt,79_sum_dt_dt,8_sum_dt_dt,9_sum_dt_dt,cnt_not_null_dt,total_dt,share_0_dt_dt,share_1_dt_dt,share_10_dt_dt,share_11_dt_dt,share_12_dt_dt,share_13_dt_dt,share_14_dt_dt,share_15_dt_dt,share_16_dt_dt,share_17_dt_dt,share_18_dt_dt,share_19_dt_dt,share_2_dt_dt,share_20_dt_dt,share_21_dt_dt,share_22_dt_dt,share_23_dt_dt,share_24_dt_dt,share_25_dt_dt,share_26_dt_dt,share_27_dt_dt,share_28_dt_dt,share_29_dt_dt,share_3_dt_dt,share_30_dt_dt,share_31_dt_dt,share_32_dt_dt,share_33_dt_dt,share_34_dt_dt,share_35_dt_dt,share_36_dt_dt,share_37_dt_dt,share_38_dt_dt,share_39_dt_dt,share_4_dt_dt,share_40_dt_dt,share_41_dt_dt,share_42_dt_dt,share_43_dt_dt,share_44_dt_dt,share_45_dt_dt,share_46_dt_dt,share_47_dt_dt,share_48_dt_dt,share_49_dt_dt,share_5_dt_dt,share_50_dt_dt,share_51_dt_dt,share_52_dt_dt,share_53_dt_dt,share_54_dt_dt,share_55_dt_dt,share_56_dt_dt,share_57_dt_dt,share_58_dt_dt,share_59_dt_dt,share_6_dt_dt,share_60_dt_dt,share_61_dt_dt,share_62_dt_dt,share_63_dt_dt,share_64_dt_dt,share_65_dt_dt,share_66_dt_dt,share_67_dt_dt,share_68_dt_dt,share_69_dt_dt,share_7_dt_dt,share_70_dt_dt,share_71_dt_dt,share_72_dt_dt,share_73_dt_dt,share_74_dt_dt,share_75_dt_dt,share_76_dt_dt,share_77_dt_dt,share_78_dt_dt,share_79_dt_dt,share_8_dt_dt,share_9_dt_dt
0,0,,,,78.68,8136.612,,145.18,,477.976,,8.6,1843.6,,172.392,,76394.424,,,,97.288,,,102.72,1.6,,,52.58,,171.472,,,,,,16725.764,,,534.928,998.336,156.34,,,1.744,,,0.44,,,65.9,74.44,-1.42,389.0,572.584,,25.168,,,,1255.364,4990.016,,,,,,,,53.992,12.216,104.672,91.876,,103.556,0.424,10989.04,,2.72,12.688,46246.784,936.412,38,172026.108,,,,0.000457,0.047299,,0.000844,,0.002779,,5e-05,0.010717,,0.001002,,0.444086,,,,0.000566,,,0.000597,9e-06,,,0.000306,,0.000997,,,,,,0.097228,,,0.00311,0.005803,0.000909,,,1e-05,,,3e-06,,,0.000383,0.000433,-8e-06,0.002261,0.003328,,0.000146,,,,0.007298,0.029007,,,,,,,,0.000314,7.1e-05,0.000608,0.000534,,0.000602,2e-06,0.06388,,1.6e-05,7.4e-05,0.268836,0.005443
1,2,,,,,427.484,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,427.484,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [162]:
%%time
result_df = pd.merge(categotical_df, df_to,
                       on='hash_inn',
                       how='left')

result_df = pd.merge(result_df, df_from,
                       on='hash_inn',
                       how='left')

CPU times: user 1.6 s, sys: 863 ms, total: 2.46 s
Wall time: 2.47 s


In [163]:
result_df.head(2)

Unnamed: 0,hash_inn,okved2,region,0_sum_kt,1_sum_kt,10_sum_kt,11_sum_kt,12_sum_kt,13_sum_kt,14_sum_kt,15_sum_kt,16_sum_kt,17_sum_kt,18_sum_kt,19_sum_kt,2_sum_kt,20_sum_kt,21_sum_kt,22_sum_kt,23_sum_kt,24_sum_kt,25_sum_kt,26_sum_kt,27_sum_kt,28_sum_kt,29_sum_kt,3_sum_kt,30_sum_kt,31_sum_kt,32_sum_kt,33_sum_kt,34_sum_kt,35_sum_kt,36_sum_kt,37_sum_kt,38_sum_kt,39_sum_kt,4_sum_kt,40_sum_kt,41_sum_kt,42_sum_kt,43_sum_kt,44_sum_kt,45_sum_kt,46_sum_kt,47_sum_kt,48_sum_kt,49_sum_kt,5_sum_kt,50_sum_kt,51_sum_kt,52_sum_kt,53_sum_kt,54_sum_kt,55_sum_kt,56_sum_kt,57_sum_kt,58_sum_kt,59_sum_kt,6_sum_kt,60_sum_kt,61_sum_kt,62_sum_kt,63_sum_kt,64_sum_kt,65_sum_kt,66_sum_kt,67_sum_kt,68_sum_kt,69_sum_kt,7_sum_kt,70_sum_kt,71_sum_kt,72_sum_kt,73_sum_kt,74_sum_kt,75_sum_kt,76_sum_kt,77_sum_kt,78_sum_kt,79_sum_kt,8_sum_kt,9_sum_kt,cnt_not_null_kt,total_kt,share_0_kt,share_1_kt,share_10_kt,share_11_kt,share_12_kt,share_13_kt,share_14_kt,share_15_kt,share_16_kt,share_17_kt,share_18_kt,share_19_kt,share_2_kt,share_20_kt,share_21_kt,share_22_kt,share_23_kt,share_24_kt,share_25_kt,share_26_kt,share_27_kt,share_28_kt,share_29_kt,share_3_kt,share_30_kt,share_31_kt,share_32_kt,share_33_kt,share_34_kt,share_35_kt,share_36_kt,share_37_kt,share_38_kt,share_39_kt,share_4_kt,share_40_kt,share_41_kt,share_42_kt,share_43_kt,share_44_kt,share_45_kt,share_46_kt,share_47_kt,share_48_kt,share_49_kt,share_5_kt,share_50_kt,share_51_kt,share_52_kt,share_53_kt,share_54_kt,share_55_kt,share_56_kt,share_57_kt,share_58_kt,share_59_kt,share_6_kt,share_60_kt,share_61_kt,share_62_kt,share_63_kt,share_64_kt,share_65_kt,share_66_kt,share_67_kt,share_68_kt,share_69_kt,share_7_kt,share_70_kt,share_71_kt,share_72_kt,share_73_kt,share_74_kt,share_75_kt,share_76_kt,share_77_kt,share_78_kt,share_79_kt,share_8_kt,share_9_kt,0_sum_dt_dt,1_sum_dt_dt,10_sum_dt_dt,11_sum_dt_dt,12_sum_dt_dt,13_sum_dt_dt,14_sum_dt_dt,15_sum_dt_dt,16_sum_dt_dt,17_sum_dt_dt,18_sum_dt_dt,19_sum_dt_dt,2_sum_dt_dt,20_sum_dt_dt,21_sum_dt_dt,22_sum_dt_dt,23_sum_dt_dt,24_sum_dt_dt,25_sum_dt_dt,26_sum_dt_dt,27_sum_dt_dt,28_sum_dt_dt,29_sum_dt_dt,3_sum_dt_dt,30_sum_dt_dt,31_sum_dt_dt,32_sum_dt_dt,33_sum_dt_dt,34_sum_dt_dt,35_sum_dt_dt,36_sum_dt_dt,37_sum_dt_dt,38_sum_dt_dt,39_sum_dt_dt,4_sum_dt_dt,40_sum_dt_dt,41_sum_dt_dt,42_sum_dt_dt,43_sum_dt_dt,44_sum_dt_dt,45_sum_dt_dt,46_sum_dt_dt,47_sum_dt_dt,48_sum_dt_dt,49_sum_dt_dt,5_sum_dt_dt,50_sum_dt_dt,51_sum_dt_dt,52_sum_dt_dt,53_sum_dt_dt,54_sum_dt_dt,55_sum_dt_dt,56_sum_dt_dt,57_sum_dt_dt,58_sum_dt_dt,59_sum_dt_dt,6_sum_dt_dt,60_sum_dt_dt,61_sum_dt_dt,62_sum_dt_dt,63_sum_dt_dt,64_sum_dt_dt,65_sum_dt_dt,66_sum_dt_dt,67_sum_dt_dt,68_sum_dt_dt,69_sum_dt_dt,7_sum_dt_dt,70_sum_dt_dt,71_sum_dt_dt,72_sum_dt_dt,73_sum_dt_dt,74_sum_dt_dt,75_sum_dt_dt,76_sum_dt_dt,77_sum_dt_dt,78_sum_dt_dt,79_sum_dt_dt,8_sum_dt_dt,9_sum_dt_dt,cnt_not_null_dt,total_dt,share_0_dt_dt,share_1_dt_dt,share_10_dt_dt,share_11_dt_dt,share_12_dt_dt,share_13_dt_dt,share_14_dt_dt,share_15_dt_dt,share_16_dt_dt,share_17_dt_dt,share_18_dt_dt,share_19_dt_dt,share_2_dt_dt,share_20_dt_dt,share_21_dt_dt,share_22_dt_dt,share_23_dt_dt,share_24_dt_dt,share_25_dt_dt,share_26_dt_dt,share_27_dt_dt,share_28_dt_dt,share_29_dt_dt,share_3_dt_dt,share_30_dt_dt,share_31_dt_dt,share_32_dt_dt,share_33_dt_dt,share_34_dt_dt,share_35_dt_dt,share_36_dt_dt,share_37_dt_dt,share_38_dt_dt,share_39_dt_dt,share_4_dt_dt,share_40_dt_dt,share_41_dt_dt,share_42_dt_dt,share_43_dt_dt,share_44_dt_dt,share_45_dt_dt,share_46_dt_dt,share_47_dt_dt,share_48_dt_dt,share_49_dt_dt,share_5_dt_dt,share_50_dt_dt,share_51_dt_dt,share_52_dt_dt,share_53_dt_dt,share_54_dt_dt,share_55_dt_dt,share_56_dt_dt,share_57_dt_dt,share_58_dt_dt,share_59_dt_dt,share_6_dt_dt,share_60_dt_dt,share_61_dt_dt,share_62_dt_dt,share_63_dt_dt,share_64_dt_dt,share_65_dt_dt,share_66_dt_dt,share_67_dt_dt,share_68_dt_dt,share_69_dt_dt,share_7_dt_dt,share_70_dt_dt,share_71_dt_dt,share_72_dt_dt,share_73_dt_dt,share_74_dt_dt,share_75_dt_dt,share_76_dt_dt,share_77_dt_dt,share_78_dt_dt,share_79_dt_dt,share_8_dt_dt,share_9_dt_dt
0,61058,34,86,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,89.456,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,89.456,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,8311,18,86,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,39.96,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,39.96,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [164]:
result_df.shape

(240069, 327)

In [165]:
result_df.to_csv('okved_sum_with_private.csv', index=False, header=True)

In [148]:
result_df[(result_df['cnt_not_null_kt'].isna()) & (result_df['cnt_not_null_dt'].isna())].shape

(0, 327)

## Хлам (не используется)

In [23]:
okveds = public_df[['hash_inn', 'okved2']]
okveds['hash_inn_kt'] = okveds['hash_inn']
okveds['okved_kt'] = okveds['okved2']
okveds = okveds.rename(columns={'hash_inn': 'hash_inn_dt', 'okved2': 'okved_dt'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [24]:
pays_df = pd.merge(pays_df, okveds[['hash_inn_kt', 'okved_kt']], how='left', on='hash_inn_kt')
pays_df = pd.merge(pays_df, okveds[['hash_inn_dt', 'okved_dt']], how='left', on='hash_inn_dt')
pays_df = pays_df.dropna()
pays_df['okved_dt'] = pays_df['okved_dt'].astype('int32')
pays_df['hash_inn_kt'] = pays_df['hash_inn_kt'].astype(str)
pays_df['hash_inn_dt'] = pays_df['hash_inn_dt'].astype(str)
pays_df.head(2)

Unnamed: 0,hash_inn_kt,hash_inn_dt,week,count,sum,okved_kt,okved_dt
0,0,0,1,4,38399.6,-1,-1
1,0,0,5,2,399.6,-1,-1


In [25]:
kt_df = pays_df \
    .groupby(['hash_inn_kt', 'okved_dt']) \
    .agg({'count': 'sum'}) \
    .reset_index(drop=False) \
    .rename(columns={'hash_inn_kt': 'inn', 'okved_dt': 'okved'})
kt_df = kt_df[kt_df.okved > 0]
kt_df.head(2)

Unnamed: 0,inn,okved,count
1,0,8,2
2,0,12,7


In [26]:
kt_df.shape

(416970, 3)

In [29]:
kt_df.to_csv('../data/okved_kt_cnt.csv', index=False, header=True)