In [1]:
from sklearn import model_selection, preprocessing, naive_bayes, metrics
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn import decomposition, ensemble
from nltk.stem import PorterStemmer
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
import nltk
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')
from multiprocessing import Pool

from imblearn.metrics import geometric_mean_score as gmean
from imblearn.metrics import make_index_balanced_accuracy as iba
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

### Read in and prepare data.

In [15]:
train_file_path='../../dataset/df_ntee_universal/train/'
file_list=os.listdir(train_file_path)
df_train=pd.DataFrame()
for file in file_list:
    df_train=pd.concat([df_train, pd.read_pickle(train_file_path+file, compression='gzip')])
len(df_train)

154424

In [16]:
# Code as 10 broad categories.
broad_cat_dict={'I': ['A'],
                'II': ['B'],
                'III': ['C', 'D'],
                'IV': ['E', 'F', 'G', 'H'],
                'V': ['I', 'J', 'K', 'L', 'M', 'N', 'O', 'P'],
                'VI': ['Q'],
                'VII': ['R', 'S', 'T', 'U', 'V', 'W'],
                'VIII': ['X'],
                'IX': ['Y'],
                'X': ['Z'],
               }
def ntee2cat(string):
    global broad_cat_dict
    return [s for s in broad_cat_dict.keys() if string in broad_cat_dict[s]][0]

df_train['mission_prgrm']=df_train['TAXPAYER_NAME']+' '+df_train['mission']+' '+df_train['prgrm_dsc']
df_train['mission_prgrm_spellchk']=df_train['TAXPAYER_NAME']+' '+df_train['mission_spellchk']+' '+df_train['prgrm_dsc_spellchk'] # Using spell-checked.
df_train['broad_cat']=df_train['NTEE1'].apply(ntee2cat)
len(df_train['mission_prgrm_spellchk']), len(df_train['NTEE1'].drop_duplicates()), len(df_train['broad_cat'].drop_duplicates())

(154424, 25, 9)

In [4]:
# # Check if the sampling criteria can be satisfied.
# small_num=0
# while small_num<500: # Make sure each category in training dataset has at least 500 records.
#     trainDF, valDF = model_selection.train_test_split(df_train, test_size=.2)
#     small_num=trainDF.groupby('broad_cat').count().sort_values('EIN').iloc[0]['EIN']

trainDF, testDF = model_selection.train_test_split(df_train, test_size=.2)
# See the composition by broad category.
print(trainDF.groupby('broad_cat').count()['EIN'], '\n'*2, testDF.groupby('broad_cat').count()['EIN'])

broad_cat
I       13568
II      20593
III      6051
IV      13527
IX       5321
V       37404
VI       1577
VII     21872
VIII     3626
Name: EIN, dtype: int64 

 broad_cat
I       3442
II      5234
III     1511
IV      3309
IX      1319
V       9330
VI       410
VII     5390
VIII     940
Name: EIN, dtype: int64


### Prepare classifier function

In [22]:
# Redefine vectorizer.
def text_vectorizer(tokenizer_type=None, vectorizer_type=None):
    ########################################################
    ######### Define and choose tokenizers #################
    def porter_tokenizer(str_input): # '''Pay attention to the input: this is string input, not token!'''
        tokens = word_tokenize(str_input)
        return [PorterStemmer().stem(token) for token in tokens]
    # Lemmatize using POS tags, assume improving accuracy.
    # Ref: 
    #   - https://stackoverflow.com/questions/15586721/wordnet-lemmatization-and-pos-tagging-in-python
    #   - https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
    def get_wordnet_pos(treebank_tag):
        if treebank_tag.startswith('J'):
            return wordnet.ADJ
        elif treebank_tag.startswith('V'):
            return wordnet.VERB
        elif treebank_tag.startswith('N'):
            return wordnet.NOUN
        elif treebank_tag.startswith('R'):
            return wordnet.ADV
        else:
            return wordnet.NOUN
    def lemma_tokenizer(str_input): # '''Pay attention to the input: this is string input, not token!'''
        tokens=word_tokenize(str_input)
        return [WordNetLemmatizer().lemmatize(word=word, pos=get_wordnet_pos(pos)) for word, pos in nltk.pos_tag(tokens)]
    # Choose tokenizer using parameter passed.
    if tokenizer_type=='lemma':
        tokenizer=lemma_tokenizer
    elif tokenizer_type=='porter':
        tokenizer=porter_tokenizer
    ########################################################
    ######### Define and choose vectorizer #################
    # 1. Use word level, character level does not make sense for current situation.
    # 2. Use count (freq) and tf-idf vectorizer. see: 
    # Bengfort, B., Bilbro, R., & Ojeda, T. (2018). Applied Text Analysis with Python: Enabling Language-Aware Data Products with Machine Learning (1 edition). Beijing Boston Farnham Sebastopol Tokyo: O’Reilly Media.
    # Page: 67.
    if vectorizer_type=='count':
        ##### Token counts #####
        vectorizer = CountVectorizer(stop_words='english', 
                                     tokenizer=tokenizer, 
                                     analyzer='word'
                                    )
        return vectorizer
    elif vectorizer_type=='tfidf':
        ##### TF-IDF #####
        vectorizer = TfidfVectorizer(stop_words='english', 
                                     tokenizer=tokenizer, 
                                     analyzer='word'
                                    )
        return vectorizer
    ########################################################

# Define resample strategy.
def func_resample(method, sampling_strategy, x_train_vect, y_train, categorical_features):
    if method=='ADASYN':
        from imblearn.over_sampling import ADASYN
        resample = ADASYN(sampling_strategy=sampling_strategy, random_state=42)
    elif method=='RandomOverSampler':
        from imblearn.over_sampling import RandomOverSampler
        resample = RandomOverSampler(sampling_strategy=sampling_strategy, random_state=42)
    elif method=='SMOTE':
        from imblearn.over_sampling import SMOTE
        resample = SMOTE(sampling_strategy=sampling_strategy, random_state=42)
    # SMOTENC not used.
    # --> 947         X_continuous = check_array(X_continuous, accept_sparse=['csr', 'csc'])
    # ...
    # ValueError: Found array with 0 feature(s) (shape=(123539, 0)) while a minimum of 1 is required.
    # Must have continuous feature, but all features (columns) in x_train_vect are specified as categorical.
#     elif method=='SMOTENC':
#         from imblearn.over_sampling import SMOTENC
#         resample = SMOTENC(sampling_strategy=sampling_strategy, random_state=42, categorical_features=categorical_features)
    elif method=='SMOTEENN':
        from imblearn.combine import SMOTEENN
        resample = SMOTEENN(sampling_strategy=sampling_strategy, random_state=42)
    elif method=='SMOTETomek':
        from imblearn.combine import SMOTETomek
        resample = SMOTETomek(sampling_strategy=sampling_strategy, random_state=42)
    x_train_vect_res, y_train_res = resample.fit_resample(x_train_vect, y_train)
    return [x_train_vect_res, y_train_res]

# Compile the workflow as a function.
def func_classifier(param_list):
    global gmean, iba, lb
    # Pass parameters.
    x_train, y_train, x_test, y_test, resample_method, sampling_strategy, classifier, tokenizer, vect_type = param_list
    # Encode text input.
    vectorizer=text_vectorizer(tokenizer_type=tokenizer, vectorizer_type=vect_type)
    vectorizer.fit(x_train.append(x_test)) # Fit on all texts.
    x_train_vect=vectorizer.transform(x_train)
    x_test_vect=vectorizer.transform(x_test)
    # Encode class labels -- Not necessary for NB/RF algorithms: https://stats.stackexchange.com/questions/288095/what-algorithms-require-one-hot-encoding
    # See test results below.
#     y_train_vect=le.fit_transform(y_train)
#     y_test_vect=le.fit_transform(y_test)
    # Resample imbalanced dataset.
    categorical_features=list(range(0, x_train_vect.shape[1])) # All indices are categorical.
    resample_x_y=func_resample(method=resample_method, sampling_strategy=sampling_strategy,
                               x_train_vect=x_train_vect, y_train=y_train, categorical_features=categorical_features)
    classifier.fit(resample_x_y[0], resample_x_y[1])
    predictions = classifier.predict(x_test_vect)
    gmean = iba(alpha=0.1, squared=True)(gmean)

    return {'resample_method':resample_method,
            'sampling_strategy':sampling_strategy,
            'classifier':str(classifier), 
            'tokenizer':tokenizer, 
            'vect_type':vect_type, 
            'weighted_acc': gmean(y_true=y_test, y_pred=predictions, average='weighted')
           }

In [6]:
param_llist=[]
x_train, y_train, x_test, y_test = [trainDF['mission_prgrm_spellchk'], trainDF['broad_cat'],
                                    testDF['mission_prgrm_spellchk'], testDF['broad_cat']]
for resample_method in ['ADASYN', 'RandomOverSampler', 'SMOTE', 'SMOTEENN', 'SMOTETomek']:
    for sampling_strategy in ['minority','not minority','not majority','all']:
        for classifier in [naive_bayes.MultinomialNB(), naive_bayes.ComplementNB(), ensemble.RandomForestClassifier()]:
            for tokenizer in ['lemma', 'porter']:
                for vect_type in ['count', 'tfidf']:
                    param_llist+=[[x_train, y_train, x_test, y_test, resample_method, sampling_strategy, classifier, tokenizer, vect_type]]
print('total parameter combinations:', len(param_llist))

total parameter combinations: 240


In [7]:
param_llist_1, param_llist_2, param_llist_3, param_llist_4, param_llist_5 = [param_llist[0:48], param_llist[48:48*2], param_llist[48*2:48*3], 
                                                                             param_llist[48*3:48*4], param_llist[48*4:48*5],
                                                                            ]

In [8]:
p=Pool(48)
result_dicts=p.map(func_classifier, param_llist_1)

In [9]:
pd.DataFrame(result_dicts).to_pickle('../../output/df_result_dicts_broad_cat_1.pkl')
pd.DataFrame(result_dicts)

Unnamed: 0,classifier,resample_method,sampling_strategy,tokenizer,vect_type,weighted_acc
0,"MultinomialNB(alpha=1.0, class_prior=None, fit...",ADASYN,minority,lemma,count,0.742963
1,"MultinomialNB(alpha=1.0, class_prior=None, fit...",ADASYN,minority,lemma,tfidf,0.530346
2,"MultinomialNB(alpha=1.0, class_prior=None, fit...",ADASYN,minority,porter,count,0.739831
3,"MultinomialNB(alpha=1.0, class_prior=None, fit...",ADASYN,minority,porter,tfidf,0.519062
4,"ComplementNB(alpha=1.0, class_prior=None, fit_...",ADASYN,minority,lemma,count,0.751159
5,"ComplementNB(alpha=1.0, class_prior=None, fit_...",ADASYN,minority,lemma,tfidf,0.717964
6,"ComplementNB(alpha=1.0, class_prior=None, fit_...",ADASYN,minority,porter,count,0.746316
7,"ComplementNB(alpha=1.0, class_prior=None, fit_...",ADASYN,minority,porter,tfidf,0.716104
8,"RandomForestClassifier(bootstrap=True, class_w...",ADASYN,minority,lemma,count,0.627893
9,"RandomForestClassifier(bootstrap=True, class_w...",ADASYN,minority,lemma,tfidf,0.650016


```Python
## Try encoding/not encoding label, almost identical.
# Label not encoded experiment 1.
In : func_classifier(param_list=param_llist[3])
Out: {'resample_method': 'ADASYN',
      'sampling_strategy': 'minority',
      'classifier': 'MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)',
      'tokenizer': 'porter',
      'vect_type': 'tfidf',
      'weighted_acc': 0.49227231236613467}
    
# Label not encoded experiment 2.
In : func_classifier(param_list=param_llist[3])
Out: {'resample_method': 'ADASYN',
      'sampling_strategy': 'minority',
      'classifier': 'MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)',
      'tokenizer': 'porter',
      'vect_type': 'tfidf',
      'weighted_acc': 0.5016141725625118}

# Label encoded experiment 1.
In : func_classifier(param_list=param_llist[3])
Out: {'resample_method': 'ADASYN',
      'sampling_strategy': 'minority',
      'classifier': 'MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)',
      'tokenizer': 'porter',
      'vect_type': 'tfidf',
      'weighted_acc': 0.5070028997209108}
    
# Label encoded experiment 2.
In : func_classifier(param_list=param_llist[3])
Out: {'resample_method': 'ADASYN',
      'sampling_strategy': 'minority',
      'classifier': 'MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)',
      'tokenizer': 'porter',
      'vect_type': 'tfidf',
      'weighted_acc': 0.5015052917827343}
```

### Analyze best parameters.

In [49]:
folder_path='../../output/result_dicts/'
result_file_list=[s for s in os.listdir(folder_path) if 'broad' in s]

df_results=pd.concat([pd.read_pickle(folder_path+file_name) for file_name in result_file_list], ignore_index=True)
df_results.sort_values('weighted_acc', ascending=False)[0:5]

Unnamed: 0,classifier,resample_method,sampling_strategy,tokenizer,vect_type,weighted_acc
100,"ComplementNB(alpha=1.0, class_prior=None, fit_...",ADASYN,minority,lemma,count,0.751159
148,"ComplementNB(alpha=1.0, class_prior=None, fit_...",SMOTETomek,minority,lemma,count,0.750759
4,"ComplementNB(alpha=1.0, class_prior=None, fit_...",SMOTE,minority,lemma,count,0.750036
102,"ComplementNB(alpha=1.0, class_prior=None, fit_...",ADASYN,minority,porter,count,0.746316
150,"ComplementNB(alpha=1.0, class_prior=None, fit_...",SMOTETomek,minority,porter,count,0.746314


### Retrain model using 100% UCF-Training and test on UCF-Testing.

In [18]:
train_file_path='../../dataset/df_ntee_universal/train/'
file_list=os.listdir(train_file_path)
df_train=pd.DataFrame()
for file in file_list:
    df_train=pd.concat([df_train, pd.read_pickle(train_file_path+file, compression='gzip')])

test_file_path='../../dataset/df_ntee_universal/test/'
file_list=os.listdir(test_file_path)
df_test=pd.DataFrame()
for file in file_list:
    df_test=pd.concat([df_test, pd.read_pickle(test_file_path+file, compression='gzip')])
    
len(df_train), len(df_test)

(154424, 38607)

In [19]:
# Code as 10 broad categories.
broad_cat_dict={'I': ['A'],
                'II': ['B'],
                'III': ['C', 'D'],
                'IV': ['E', 'F', 'G', 'H'],
                'V': ['I', 'J', 'K', 'L', 'M', 'N', 'O', 'P'],
                'VI': ['Q'],
                'VII': ['R', 'S', 'T', 'U', 'V', 'W'],
                'VIII': ['X'],
                'IX': ['Y'],
                'X': ['Z'],
               }
def ntee2cat(string):
    global broad_cat_dict
    return [s for s in broad_cat_dict.keys() if string in broad_cat_dict[s]][0]

df_train['mission_prgrm']=df_train['TAXPAYER_NAME']+' '+df_train['mission']+' '+df_train['prgrm_dsc']
df_train['mission_prgrm_spellchk']=df_train['TAXPAYER_NAME']+' '+df_train['mission_spellchk']+' '+df_train['prgrm_dsc_spellchk'] # Using spell-checked.
df_train['broad_cat']=df_train['NTEE1'].apply(ntee2cat)
print(len(df_train['mission_prgrm_spellchk']), len(df_train['NTEE1'].drop_duplicates()), len(df_train['broad_cat'].drop_duplicates()))

df_test['mission_prgrm']=df_test['TAXPAYER_NAME']+' '+df_test['mission']+' '+df_test['prgrm_dsc']
df_test['mission_prgrm_spellchk']=df_test['TAXPAYER_NAME']+' '+df_test['mission_spellchk']+' '+df_test['prgrm_dsc_spellchk'] # Using spell-checked.
df_test['broad_cat']=df_test['NTEE1'].apply(ntee2cat)
print(len(df_test['mission_prgrm_spellchk']), len(df_test['NTEE1'].drop_duplicates()), len(df_test['broad_cat'].drop_duplicates()))

154424 25 9
38607 25 9


In [21]:
x_train=df_train['mission_prgrm_spellchk']
y_train=df_train['broad_cat']
x_test=df_test['mission_prgrm_spellchk']
y_test=df_test['broad_cat']
resample_method='ADASYN'
sampling_strategy='minority'
classifier=naive_bayes.ComplementNB()
tokenizer='lemma'
vect_type='count'
param_list=[x_train, y_train, x_test, y_test, resample_method, sampling_strategy, classifier, tokenizer, vect_type]

In [23]:
func_classifier(param_list)

{'resample_method': 'ADASYN',
 'sampling_strategy': 'minority',
 'classifier': 'ComplementNB(alpha=1.0, class_prior=None, fit_prior=True, norm=False)',
 'tokenizer': 'lemma',
 'vect_type': 'count',
 'weighted_acc': 0.7529699033595111}

In [26]:
vectorizer=text_vectorizer(tokenizer_type='lemma', vectorizer_type='count')
vectorizer.fit(x_train.append(x_test)) # Fit on all texts.
x_test_vect=vectorizer.transform(x_test)

In [27]:
predictions = classifier.predict(x_test_vect)

In [28]:
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_true=y_test, y_pred=predictions))

                   pre       rec       spe        f1       geo       iba       sup

          I       0.82      0.86      0.98      0.84      0.92      0.83      4291
         II       0.84      0.82      0.97      0.83      0.89      0.78      6419
        III       0.77      0.86      0.99      0.81      0.92      0.84      1861
         IV       0.76      0.81      0.97      0.78      0.88      0.77      4329
         IX       0.86      0.83      0.99      0.84      0.91      0.81      1701
          V       0.83      0.81      0.92      0.82      0.87      0.74     11723
         VI       0.25      0.76      0.97      0.38      0.86      0.73       436
        VII       0.83      0.73      0.97      0.78      0.84      0.69      6749
       VIII       0.73      0.55      0.99      0.63      0.74      0.52      1098

avg / total       0.81      0.80      0.96      0.80      0.87      0.75     38607



### Verify numbers

In [46]:
df_test_result=pd.DataFrame([predictions, y_test]).T.rename(columns={0:'pred', 1:'true'})
df_test_result.loc[df_test_result[df_test_result.pred==df_test_result.true].index, 'equal']=1
df_test_result['count']=1

In [47]:
df_test_result.groupby('true').count()
# equal/count=recall (true positive)

Unnamed: 0_level_0,pred,equal,count
true,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
I,4291,3687,4291
II,6419,5245,6419
III,1861,1597,1861
IV,4329,3498,4329
IX,1701,1408,1701
V,11723,9528,11723
VI,436,333,436
VII,6749,4903,6749
VIII,1098,606,1098


In [48]:
df_test_result.groupby('pred').count()
# equal/count=precision (True predicted positive)

Unnamed: 0_level_0,true,equal,count
pred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
I,4482,3687,4482
II,6222,5245,6222
III,2063,1597,2063
IV,4598,3498,4598
IX,1633,1408,1633
V,11548,9528,11548
VI,1330,333,1330
VII,5902,4903,5902
VIII,829,606,829


In [50]:
df_test_result

Unnamed: 0,pred,true,equal,count
0,V,V,1.0,1
1,I,I,1.0,1
2,V,V,1.0,1
3,I,I,1.0,1
4,IX,IX,1.0,1
5,I,I,1.0,1
6,IV,IV,1.0,1
7,IV,IV,1.0,1
8,III,III,1.0,1
9,VII,VII,1.0,1


### Draft code

```Python
# Manually check if acc is correct.
In :func_naive_bayes(2)
    df_performance['accuracy']
Out:0    0.340417
    Name: accuracy, dtype: float64
In :t=pd.DataFrame([classifier.predict(x_valid_vect), y_valid]).T.rename(columns={0:'a', 1:'b'})
    len(t[t.a==t.b])/len(t)
Out:0.34041666666666665
''' Looks correct, scale the computing '''
```

In [30]:
from imblearn.metrics import geometric_mean_score as gmean
from imblearn.metrics import make_index_balanced_accuracy as iba
gmean = iba(alpha=0.1, squared=True)(gmean)
gmean(y_true=y_valid, y_pred=predictions, average='weighted')

0.5659455417103673

```Python
print(classification_report_imbalanced(y_true=y_valid, y_pred=predictions)) # SMOTEENN(sampling_strategy='auto', random_state=42)
```
                       pre       rec       spe        f1       geo       iba       sup

              I       0.74      0.83      0.96      0.78      0.89      0.79      4291
             II       0.79      0.70      0.96      0.74      0.82      0.66      6419
            III       0.44      0.93      0.94      0.60      0.94      0.88      1861
             IV       0.54      0.83      0.91      0.65      0.87      0.75      4329
             IX       0.37      0.91      0.93      0.52      0.92      0.84      1701
              V       0.94      0.31      0.99      0.47      0.56      0.29     11723
             VI       0.13      0.78      0.94      0.23      0.86      0.72       436
            VII       0.81      0.54      0.97      0.65      0.73      0.50      6749
           VIII       0.37      0.79      0.96      0.50      0.87      0.75      1098

    avg / total       0.75      0.61      0.96      0.61      0.75      0.57     38607

```Python
print(classification_report_imbalanced(y_true=y_valid, y_pred=predictions)) # strategy='minority'
```
                       pre       rec       spe        f1       geo       iba       sup

              I       0.83      0.81      0.98      0.82      0.89      0.78      4291
             II       0.82      0.75      0.97      0.78      0.85      0.71      6419
            III       0.82      0.79      0.99      0.81      0.89      0.77      1861
             IV       0.81      0.66      0.98      0.73      0.80      0.63      4329
             IX       0.82      0.69      0.99      0.75      0.83      0.67      1701
              V       0.77      0.81      0.90      0.79      0.85      0.72     11723
             VI       0.11      0.88      0.92      0.19      0.90      0.80       436
            VII       0.79      0.63      0.96      0.70      0.78      0.59      6749
           VIII       0.75      0.33      1.00      0.46      0.58      0.31      1098

    avg / total       0.79      0.73      0.95      0.75      0.83      0.68     38607

```Python
print(classification_report_imbalanced(y_true=y_valid, y_pred=predictions)) # strategy='auto', = 'not majority'
```
                       pre       rec       spe        f1       geo       iba       sup

              I       0.77      0.83      0.97      0.80      0.90      0.80      4291
             II       0.82      0.71      0.97      0.76      0.83      0.67      6419
            III       0.47      0.94      0.95      0.63      0.94      0.89      1861
             IV       0.62      0.82      0.94      0.71      0.87      0.76      4329
             IX       0.44      0.92      0.95      0.59      0.93      0.86      1701
              V       0.88      0.53      0.97      0.66      0.72      0.49     11723
             VI       0.20      0.75      0.97      0.32      0.85      0.71       436
            VII       0.81      0.56      0.97      0.66      0.74      0.52      6749
           VIII       0.40      0.79      0.97      0.53      0.87      0.75      1098

    avg / total       0.76      0.68      0.96      0.69      0.80      0.64     38607

```Python
print(classification_report_imbalanced(y_true=y_valid, y_pred=predictions)) # No resampling.
```

                       pre       rec       spe        f1       geo       iba       sup

              I       0.81      0.84      0.98      0.83      0.91      0.81      4291
             II       0.79      0.79      0.96      0.79      0.87      0.75      6419
            III       0.80      0.82      0.99      0.81      0.90      0.80      1861
             IV       0.77      0.78      0.97      0.77      0.87      0.74      4329
             IX       0.82      0.68      0.99      0.74      0.82      0.65      1701
              V       0.78      0.84      0.89      0.81      0.87      0.74     11723
             VI       0.41      0.09      1.00      0.15      0.30      0.08       436
            VII       0.76      0.70      0.95      0.73      0.82      0.65      6749
           VIII       0.65      0.62      0.99      0.64      0.78      0.59      1098

    avg / total       0.78      0.78      0.95      0.77      0.85      0.72     38607