# Create the input for the model

In [1]:
import pandas as pd
import json
import itertools
import math
import numpy as np

In [2]:
import create_data

In [21]:
reload(create_data)

<module 'create_data' from 'create_data.py'>

## 1. Load and prepare the actionshistory json

In [3]:
# Load the actionshistory
data = pd.read_json('/nfshome/llayer/data/actionshistory.json', orient='index')

# Reset index
data_index_reset = data.reset_index()
data_index_reset = data_index_reset.rename(columns={'index': 'task_name'})

In [4]:
# Get the unique exit codes and sites
good_sites_exit_codes, bad_sites_exit_codes = create_data.get_exit_codes(data_index_reset)
good_sites_names, bad_sites_names = create_data.get_sites(data_index_reset)
unique_sites = list(set(good_sites_names + bad_sites_names)) 
unique_codes = list(set(good_sites_exit_codes + bad_sites_exit_codes))

In [5]:
# Set the labels
create_data.set_labels(data_index_reset)

In [6]:
data_index_reset.head()

Unnamed: 0,task_name,errors,parameters,splitting,splitting_encoded,xrootd,xrootd_encoded,memory,memory_encoded,action,action_encoded,action_split,action_split_encoded,action_binary,action_binary_encoded
0,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,0,acdc_1x,2,acdc,0
1,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,0,acdc_1x,2,acdc,0
2,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,0,acdc_1x,2,acdc,0
3,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"{u'good_sites': {u'85': {u'T1_UK_RAL': 1}}, u'...","{u'action': u'acdc', u'cores': u'', u'xrootd':...",1x,0,enabled,0,,3,acdc,0,acdc_1x,2,acdc,0
4,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"{u'good_sites': {u'-1': {u'T2_DE_RWTH': 1, u'T...","{u'action': u'acdc', u'cores': u'', u'sites': ...",1x,0,,2,,3,acdc,0,acdc_1x,2,acdc,0


In [7]:
splitting_categories = sorted(list(set(data_index_reset['action_encoded'])))
print(splitting_categories)

[0, 1, 2]


## 2. Standard input

In [156]:
template_df = pd.DataFrame(columns=unique_sites, index=unique_codes).fillna(value=0).sort_index()

In [157]:
# Set up the matrix

def build_table(df, sparse_df, site_name, exit_code):
    
    #error_site = pd.DataFrame(columns=site_name, index=exit_code).fillna(value=0).sort_index()
    error_site = sparse_df * 0 
    
    if len(df.keys()) == 0 or len(df.values()) == 0:
        return error_site
    
    else:
        for exit_code, site_dict in zip(df.keys(), df.values()):
            for site, count in site_dict.items():
                error_site.at[exit_code, site] = 0 if math.isnan(count) else count
    
        return error_site


def build_table_flatten(x):
    
    d_outer = []
    
    for column in x: # 60 columns (i.e. sites)
        #d_outer.append([item for item in x[column]]) # 43 items
        for item in x[column]:
            d_outer.append(item)
    
    return d_outer

def combine_features(x, feature1, feature2):
    return x[feature1]+x[feature2]

In [154]:
#data_index_reset['tables_good_sites'] = data_index_reset['errors'].apply(lambda x: build_table(x,'good_sites'))

In [158]:
# Build good/bad site features
%timeit data_index_reset['tables_good_sites'] = data_index_reset['errors'].apply(lambda x: build_table(x['good_sites'], template_df, good_sites_names,good_sites_exit_codes))

"""
data_index_reset['tables_bad_sites'] = data_index_reset['errors'].apply(lambda x:
                                                                       build_table(x['bad_sites'],
                                                                                   bad_sites_names,
                                                                                   bad_sites_exit_codes))
"""

1 loop, best of 3: 19.2 s per loop


"\ndata_index_reset['tables_bad_sites'] = data_index_reset['errors'].apply(lambda x:\n                                                                       build_table(x['bad_sites'],\n                                                                                   bad_sites_names,\n                                                                                   bad_sites_exit_codes))\n"

In [12]:
print len(good_sites_names),  len(good_sites_exit_codes) 
print len(bad_sites_names),  len(bad_sites_exit_codes) 

69 58
147 56


In [10]:
# Flatten good/bad site features
data_index_reset['tables_good_sites_flatten'] = data_index_reset['tables_good_sites'].apply(lambda x: 
                                                                                          build_table_flatten(x))


data_index_reset['tables_bad_sites_flatten'] = data_index_reset['tables_bad_sites'].apply(lambda x: 
                                                                                        build_table_flatten(x))

# Combined flatten good/bad site features
data_index_reset['tables_combined_sites_flatten'] =  data_index_reset.apply(lambda x:
                                                                           combine_features(x,
                                                                                            'tables_good_sites_flatten',
                                                                                            'tables_bad_sites_flatten'),
                                                                           axis=1)

In [76]:
data_index_reset.head()

Unnamed: 0,task_name,errors,parameters,splitting,splitting_encoded,xrootd,xrootd_encoded,memory,memory_encoded,action,action_encoded,action_split,action_split_encoded,action_binary,action_binary_encoded,tables_good_sites,tables_bad_sites,tables_good_sites_flatten,tables_bad_sites_flatten,tables_combined_sites_flatten
0,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,,acdc_1x,2,acdc,0,T0_CH_CERN T0_CH_CSCS_HPC T1_DE_KIT ...,NoReportedSite T0_CH_CERN T0_CH_CERN_...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,,acdc_1x,2,acdc,0,T0_CH_CERN T0_CH_CSCS_HPC T1_DE_KIT ...,NoReportedSite T0_CH_CERN T0_CH_CERN_...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,,acdc_1x,2,acdc,0,T0_CH_CERN T0_CH_CSCS_HPC T1_DE_KIT ...,NoReportedSite T0_CH_CERN T0_CH_CERN_...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"{u'good_sites': {u'85': {u'T1_UK_RAL': 1}}, u'...","{u'action': u'acdc', u'cores': u'', u'xrootd':...",1x,0,enabled,0,,3,acdc,,acdc_1x,2,acdc,0,T0_CH_CERN T0_CH_CSCS_HPC T1_DE_KIT ...,NoReportedSite T0_CH_CERN T0_CH_CERN_...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"{u'good_sites': {u'-1': {u'T2_DE_RWTH': 1, u'T...","{u'action': u'acdc', u'cores': u'', u'sites': ...",1x,0,,2,,3,acdc,,acdc_1x,2,acdc,0,T0_CH_CERN T0_CH_CSCS_HPC T1_DE_KIT ...,NoReportedSite T0_CH_CERN T0_CH_CERN_...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [13]:
data_out = data_index_reset.drop(['task_name', 'errors', 'parameters', 'tables_good_sites', 'tables_bad_sites'], 1)

In [37]:
data_out.head()

Unnamed: 0,splitting,splitting_encoded,xrootd,xrootd_encoded,memory,memory_encoded,action,action_encoded,action_split,action_split_encoded,action_binary,action_binary_encoded,tables_combined_sites_flatten
0,1x,0,,2,,3,acdc,,acdc_1x,2,acdc,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,1x,0,,2,,3,acdc,,acdc_1x,2,acdc,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,1x,0,,2,,3,acdc,,acdc_1x,2,acdc,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1x,0,enabled,0,,3,acdc,,acdc_1x,2,acdc,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1x,0,,2,,3,acdc,,acdc_1x,2,acdc,0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [14]:
data_out.to_hdf('/bigdata/shared/AIErrorHandling/baseline.h5', 'frame')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->['splitting', 'xrootd', 'memory', 'action', 'action_split', 'action_binary', 'tables_good_sites_flatten', 'tables_bad_sites_flatten', 'tables_combined_sites_flatten']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)


## 3. Load and prepare the w2v file

In [22]:
w2v = pd.read_csv('/nfshome/llayer/data/df_word2vec_exitcodes.csv')

In [23]:
# Convert the word vectors from string back to float
def str_to_float(row):
    log_msg = row['w2v']
    msg = list(np.float_(log_msg.replace('[','').replace(']', '').split(',')))
    return msg
w2v['w2v'] = w2v.apply(str_to_float, axis=1)

In [24]:
# Create lists with the error, site, message per taskname
w2v_list = w2v.groupby(['task_name'], 
                  as_index=False)['error', 'site', 'w2v'].agg(lambda x: list(x))

In [25]:
w2v_list.head()

Unnamed: 0,task_name,error,site,w2v
0,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,[85],[T1_UK_RAL],"[[-0.838090360204, 0.457981392001, 0.254296811..."
1,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,[50664],[T2_DE_RWTH],"[[-0.182677972648, 0.899555946735, 0.567435338..."
2,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,[50664],[T2_DE_RWTH],"[[-0.182677972648, 0.899555946735, 0.567435338..."
3,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"[99400, 50664]","[NoReportedSite, T2_DE_RWTH]","[[-0.484312460839, -0.207195970489, 0.34691437..."
4,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,[50664],[T2_DE_RWTH],"[[-0.182677972648, 0.899555946735, 0.567435338..."


## 4. Merge the two frames and build the matrices

In [26]:
df = pd.merge( data_index_reset, w2v_list, on = ['task_name'], how='left')

In [27]:
print len(df)

25090


In [46]:
test = df[:100]

In [47]:
dim_w2v = len(w2v['w2v'][0])

In [48]:
template_df = pd.DataFrame(columns=unique_sites, index=unique_codes).fillna(value=0).sort_index()

In [88]:
# Build the site-error matrix table
def build_table(row):
    
    errors = row['errors']
    sites_good = errors['good_sites'] 
    sites_bad = errors['bad_sites']
    log_sites = row['site']
    log_errors = row['error']
    log_msg = row['w2v']
    
    # Create df
    sparse_df = template_df * 0.0 
    #pd.DataFrame(columns=unique_sites, index=unique_codes).fillna(value=0).sort_index()
    sparse_df = sparse_df.astype(object)

    # Add exit code
    # Good sites
    for exit_code, site_dict in zip(sites_good.keys(), sites_good.values()):
        for site, count in site_dict.items():
            #print site, exit_code, count
            #print site, count
            sparse_df.at[exit_code.encode('utf-8'),site.encode('utf-8')] = 0 if math.isnan(count) else count
    # Bad sites
    for exit_code, site_dict in zip(sites_bad.keys(), sites_bad.values()):
        for site, count in site_dict.items():
            #print site, exit_code, count
            #print site, count
            sparse_df.at[exit_code.encode('utf-8'), site.encode('utf-8')] = 0 if math.isnan(count) else count
    
    # Add word vectors
    if isinstance(log_sites, (list,)):
    #if not math.isnan(log_sites):

        for i in range(len(log_sites)):
            #print i, log_errors[i], log_sites[i]
            #msg = list(np.float_(log_msg[i].replace('[','').replace(']', '').split(',')))
            #msg = [1,2,3]
            #print msg
            if log_sites[i] == 'NoReportedSite':
                continue
            count = sparse_df.at[str(log_errors[i]), str(log_sites[i])] 
            if count != 0:
                #print log_msg[i]
                count_msg = [count] + log_msg[i]
                sparse_df.at[str(log_errors[i]), str(log_sites[i])] = count_msg
    
    return sparse_df

In [89]:
test['table'] = test.apply(build_table, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [109]:
pd.set_option('display.max_columns', 500)
test['table'].iloc[5].shape

(64, 151)

## 5. Flatten the matrices

In [91]:
zero_dummy = [0] * (dim_w2v)

In [133]:
def flat_row(row):

    flat_row = []
    for field in row:
        if isinstance(field, (list,)) == False:
            v = [field] + zero_dummy
        flat_row = flat_row + v
    return flat_row

In [134]:
def flatten(x):
    

    
    #x_T = x.T 
    flat_matrix = x.apply(lambda row: flat_row(row), axis = 1)
    
    #print len(flat_matrix[0])
    
    """
    flat_matrix = []

    for column in x: # 60 columns (i.e. sites)
        flat_site = []
        for item in x[column]:

            if isinstance(item, (list,)) == False:
                dummy = [item] + [0] * (dim_w2v)
                #if item != 0:
                #    print dummy
            flat_site = flat_site + dummy
        flat_matrix.append(flat_site)
    """
    return list(flat_matrix)

In [128]:
# Flatten good/bad site features
test['table_flattened'] = test['table'].apply(lambda x: flatten(x))

1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661
1661


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [123]:
vec = test.iloc[0]

In [130]:
print len(vec['table_flattened'][0])

1661


In [131]:
test.head()

Unnamed: 0,task_name,errors,parameters,splitting,splitting_encoded,xrootd,xrootd_encoded,memory,memory_encoded,action,action_encoded,action_split,action_split_encoded,action_binary,action_binary_encoded,tables_good_sites,error,site,w2v,table,table_flattened
0,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,0,acdc_1x,2,acdc,0,Unknown T3_US_PuertoRico T2_FI_HIP T...,,,,Unknown T3_US_PuertoRico T2_FI_HIP T2_UK...,"[[0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0, 0..."
1,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,0,acdc_1x,2,acdc,0,Unknown T3_US_PuertoRico T2_FI_HIP T...,,,,Unknown T3_US_PuertoRico T2_FI_HIP T2_UK...,"[[0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0, 0..."
2,/amaltaro_Run2016D-v2-DoubleMuonLowMass-07Aug1...,"{u'good_sites': {}, u'bad_sites': {u'-1': {u'T...","{u'action': u'acdc', u'sites': [u'T1_US_FNAL']...",1x,0,,2,,3,acdc,0,acdc_1x,2,acdc,0,Unknown T3_US_PuertoRico T2_FI_HIP T...,,,,Unknown T3_US_PuertoRico T2_FI_HIP T2_UK...,"[[0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0, 0..."
3,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"{u'good_sites': {u'85': {u'T1_UK_RAL': 1}}, u'...","{u'action': u'acdc', u'cores': u'', u'xrootd':...",1x,0,enabled,0,,3,acdc,0,acdc_1x,2,acdc,0,Unknown T3_US_PuertoRico T2_FI_HIP T...,[85],[T1_UK_RAL],"[[-0.838090360204, 0.457981392001, 0.254296811...",Unknown T3_US_PuertoRico T2_FI_HIP T2_UK...,"[[0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0, 0..."
4,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"{u'good_sites': {u'-1': {u'T2_DE_RWTH': 1, u'T...","{u'action': u'acdc', u'cores': u'', u'sites': ...",1x,0,,2,,3,acdc,0,acdc_1x,2,acdc,0,Unknown T3_US_PuertoRico T2_FI_HIP T...,[50664],[T2_DE_RWTH],"[[-0.182677972648, 0.899555946735, 0.567435338...",Unknown T3_US_PuertoRico T2_FI_HIP T2_UK...,"[[0.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0, 0..."


In [36]:
input_ml = df.drop(['task_name', 'errors', 'parameters', 'table'], 1)

In [82]:
input_ml.head()

Unnamed: 0,task_name,table_flattened,splitting,splitting_encoded,action,action_encoded,target_label,target_encoded,action_label
0,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1x,2,acdc,0,acdc_1x,2,acdc
1,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1x,2,acdc,0,acdc_1x,2,acdc
2,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1x,2,acdc,0,acdc_1x,2,acdc
3,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1x,2,acdc,0,acdc_1x,2,acdc
4,/amaltaro_Run2018A-v1-DoubleMuon-17Sep2018_102...,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1x,2,acdc,0,acdc_1x,2,acdc


## Store the input in chunks

In [137]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))

for counter, chunk in enumerate(chunker(df,1000)):

    print 'Start with chunk', counter
    chunk['table'] = chunk.apply(build_table, axis=1)
    print 'Created matrix'
    chunk['table_flattened'] = chunk['table'].apply(lambda x: flatten(x))
    print 'Flattened matrix'
    data_out = chunk.drop(['task_name', 'errors', 'parameters', 'table'], 1)
    data_out.to_hdf('/bigdata/shared/AIErrorHandling/w2v_matrix' + str(counter) + '.h5', 'test')
    print 'Stored output'

Start with chunk 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Created matrix


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Flattened matrix
Stored output
Start with chunk 1
Created matrix
Flattened matrix
Stored output
Start with chunk 2
Created matrix
Flattened matrix
Stored output
Start with chunk 3
Created matrix
Flattened matrix
Stored output
Start with chunk 4
Created matrix
Flattened matrix
Stored output
Start with chunk 5
Created matrix
Flattened matrix
Stored output
Start with chunk 6
Created matrix
Flattened matrix
Stored output
Start with chunk 7
Created matrix
Flattened matrix
Stored output
Start with chunk 8
Created matrix
Flattened matrix
Stored output
Start with chunk 9
Created matrix
Flattened matrix
Stored output
Start with chunk 10
Created matrix
Flattened matrix
Stored output
Start with chunk 11
Created matrix
Flattened matrix
Stored output
Start with chunk 12
Created matrix
Flattened matrix
Stored output
Start with chunk 13
Created matrix
Flattened matrix
Stored output
Start with chunk 14
Created matrix
Flattened matrix
Stored output
Start with chunk 15
Created matrix
Flattened matrix
St

In [None]:
from multiprocessing import Pool

nprocs = 4

pool = Pool(nprocs)

for chunk in chunker(df, nprocs):
    data = pool.map(myfunction, chunk)
    data.domorestuff()