In [1]:
# Imports
import HSToken as tokenizer
import HSGoogleHelper as googleAPI
import HSUtilCSV as csvHelper

import unicodedata     
import nltk

import pandas as pd

In [2]:
def extract_features(image):
    
    #Call Google API
    google_detection= googleAPI.get_text_from_google(image)
    text_areas = googleAPI.collect_text_areas(google_detection)
    count_low, count_med, count_high = googleAPI.count_text_areas_by_size(text_areas)
    
    #Tokenize and turn into Features
    text = google_detection.text
    text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore')
    
    tokens, numbers = tokenizer.tokenize(text)
    total_len = len(tokens)
    tokens, count_prod, count_invoice = tokenizer.recognize_my_items(tokens)
    stemmed_tokens, tagged_tokens = tokenizer.stem_and_tag(tokens)

    nouns, verbs, adjectives, conjunctions, numbers = tokenizer.count_types(tagged_tokens)
    
    features = {}
    features["product_ids"]=count_prod
    features['numbers'] = numbers
    features['invoice_ids']=count_invoice
    features['nouns']=nouns
    features['verbs']=verbs
    features['adjectives']= adjectives
    features['conjunctions']= conjunctions
    features['small_blocks']=count_low
    features['med_blocks']=count_med
    features['large_blocks']=count_high
    features['total_words']=total_len
    return features

In [3]:
#features = extract_features("../RPA/invoice.jpg")
features = extract_features("./emails/email02.png")
features

{'adjectives': 37,
 'conjunctions': 1,
 'invoice_ids': 0,
 'large_blocks': 7,
 'med_blocks': 6,
 'nouns': 137,
 'numbers': 8,
 'product_ids': 0,
 'small_blocks': 6,
 'total_words': 220,
 'verbs': 23}

In [4]:
dataframe = csvHelper.create_feature_df()
dataframe = csvHelper.append_to_dataframe(dataframe, features, "invoice")
dataframe

Unnamed: 0,label,product_ids,numbers,invoice_ids,nouns,verbs,adjectives,conjunctions,small_blocks,med_blocks,large_blocks,total_words
0,invoice,0,8,0,137,23,37,1,6,6,7,220


In [5]:
csvHelper.save_to_file(dataframe, 'out.csv')

In [6]:
dataframe = csvHelper.create_feature_df()

#score all the invoices
features = extract_features("./invoices/inv01.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "invoice")
features = extract_features("./invoices/inv02.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "invoice")
features = extract_features("./invoices/inv03.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "invoice")
features = extract_features("./invoices/inv04.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "invoice")
features = extract_features("./invoices/inv05.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "invoice")
features = extract_features("./invoices/inv06.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "invoice")

#score all of the emails
features = extract_features("./emails/email01.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "email")
features = extract_features("./emails/email02.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "email")
features = extract_features("./emails/email03.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "email")
features = extract_features("./emails/email04.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "email")
features = extract_features("./emails/email05.png")
dataframe = csvHelper.append_to_dataframe(dataframe, features, "email")

#save 
csvHelper.save_to_file(dataframe, 'out.csv')

In [23]:
chosen = dataframe.iloc[:1]
chosen

random = csvHelper.generate_random(chosen, 0.3, 0)
random


Unnamed: 0,label,product_ids,numbers,invoice_ids,nouns,verbs,adjectives,conjunctions,small_blocks,med_blocks,large_blocks,total_words
0,invoice,0,14.1159,0,27.0557,4.90347,5.23559,0,9.847,15.7227,1.95708,52.2826


In [24]:
dataframe = csvHelper.fill_random(dataframe, 1000, 0.3)
dataframe

   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.80969           0  50.2545  11.9505    12.1023   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0     2.3468      5.54775     89.7396  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  17.1854           0  34.5276  3.15694    7.32595   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      18.9944    7.62338      2.93855     45.2726  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  5.67826           0  31.5727  7.19375    12.5227   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      16.4498    5.31017      3.62488     57.7125  
   label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  9.5092           0  65.4334  6.84493    7.93357   

  

     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  9.20126           0  25.4874  8.81172    8.04612   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      11.9548    7.75034      3.03142     63.5356  
   label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  2.2181           0  79.2167  9.45211    26.4956   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0     0.870619    3.79885      14.2221     163.799  
     label product_ids  numbers invoice_ids   nouns    verbs adjectives  \
0  invoice           0  21.5463           0  46.552  11.8308    12.0591   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      20.9925    4.40358      5.05492     79.8249  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  11.8576           0  18.8243  4.82192    8.80126   



     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  7.75185           0  32.8361  5.81451    11.1237   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      15.0286     6.2196       4.7307     64.2306  
   label product_ids  numbers invoice_ids   nouns    verbs adjectives  \
0  email           0  1.78118           0  99.508  16.2108    36.5648   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      1.12881    4.37301      10.9236      183.13  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  5.42006           0  71.2026  18.6128    11.9394   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      4.45991    8.15231      7.33018     125.415  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  11.4654           0  18.5603  2.83035     5.9367   

  

     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  11.4137           0  41.9794  3.10189    7.75915   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      19.6985    6.34858      2.24716     55.8692  
     label product_ids  numbers invoice_ids  nouns    verbs adjectives  \
0  invoice           0  6.49416           0  36.28  8.37765    8.55415   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      11.3448    6.94519       4.9114     58.1739  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  7.30995           0  33.4134  10.3697    16.0379   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    3.81624       6.2351     67.1238  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  9.95577           0  34.1133  6.81141    12.7277   



     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  8.63076           0  30.2053  2.66334    5.88168   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      21.8854     8.4971      2.30914     51.5204  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  9.02733           0  34.4948  5.68136     8.8582   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      10.8481    7.38586      4.47241     45.8769  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  15.4995           0  26.1724  7.25032    5.15544   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      14.3143    14.1583      2.19759     56.6887  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  15.9272           0  36.0641  10.3695    14.3

0            0            0    2.56983      7.58548     61.5247  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  9.43596           0  21.7807  3.20875    4.98689   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      17.4321    8.25516      2.38178     40.7391  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  2.11655           0  58.1512  10.3497    28.8245   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0     0.884461    3.90267      10.5959     182.604  
   label product_ids  numbers invoice_ids   nouns    verbs adjectives  \
0  email           0  7.27729           0  70.272  6.29795    11.8797   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    4.38212      5.04892      84.964  
     label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  invoice    

0            0      21.1184    4.27935      5.77638     89.3565  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  9.87806           0  30.4341  7.06168    13.0816   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      15.4634    6.87221      4.06467     75.5414  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  17.1144           0  41.1618  8.02807    13.4911   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      27.3914    6.45131      6.08078     121.221  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  22.8776           0  64.7271  8.18157    18.4904   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      26.6992    7.71479      5.60221     95.5899  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  

     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  11.9623           0  20.2236  4.51912    5.20461   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      8.58637    12.2774      1.75658     46.4992  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  14.6389           0  57.9258  11.1209    15.6737   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      26.3187    4.87473      4.46396     121.357  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  9.86888           0  47.5266  8.36905    8.66677   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    3.19806      5.06863     87.2639  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.64932           0  136.114  29.2646    44.4756   



     label product_ids  numbers invoice_ids   nouns    verbs adjectives  \
0  invoice           0  14.1164           0  45.244  5.02566    12.4847   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      15.9431    10.1487      2.83303     77.5495  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  17.3604           0  19.7113  5.40433    6.57036   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      15.0084    9.61017      1.96908     63.1395  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  5.68521           0  47.5934  13.2577    13.6686   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    2.14434      5.90222      86.393  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  2.09486           0  96.4942  16.5427    26.5514   

  

0            0      3.84869    7.57627      7.00332     148.525  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  8.83145           0  119.771  17.6548    27.2635   

  conjunctions small_blocks med_blocks large_blocks total_words  
0      1.06332      6.15793    6.58155      6.64629     213.617  
     label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  invoice           0  14.7034           0  38.9497  5.0487    10.8638   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      20.7897    6.18115      3.71098     61.1992  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  13.1564           0  37.9137  10.5345     13.257   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      21.3507    6.01262      6.45875     90.5032  
     label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  invoice

     label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  16.282           0  34.0254  4.38705    10.0464   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0       17.917    9.07087      2.93148      72.999  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  6.13546           0  34.1045  5.88956    10.9444   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      14.6802    5.87545      4.05203      72.729  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  14.3118           0  27.7467  2.09472    7.81992   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      10.9235    4.85205      3.40763     35.5277  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  10.9591           0  25.5372  1.71789    11.629

     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  14.8803           0  25.8791  1.54508    8.55611   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      12.3791    5.83415      5.15282     58.0045  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  5.83683           0  49.1686  11.2539    14.8351   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    3.18939      6.51051     72.1577  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  11.2537           0  15.4334  7.44692    7.48887   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      15.3748    12.8705        2.436     46.7464  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.49286           0  38.0398  11.1883    12.3656   



0            0      11.4783    13.1179      1.66073     41.4885  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  15.0039           0  25.4764  3.86636    12.1377   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      23.0002    6.72425      2.35553     58.5048  
     label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  18.584           0  39.6781  12.2094    13.9084   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      16.2769    4.43989      6.20983     80.4219  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  9.35202           0  118.536  25.6389    44.2053   

  conjunctions small_blocks med_blocks large_blocks total_words  
0       1.0194      6.90008    4.64617      6.03746     271.857  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoic

   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  2.19127           0  63.0437  13.0941    44.0891   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0     0.971177    4.56244      11.1573     106.801  
     label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  13.951           0  32.8388  1.49455    10.9831   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      15.0774    4.36358      4.07589     45.3239  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  5.17275           0  66.1235  20.7707    11.9808   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0       4.2675     7.4295      8.27325     138.064  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  14.9277           0  39.3296  4.71166    7.84396   

  

   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  8.02574           0  103.589  29.5931    27.8519   

  conjunctions small_blocks med_blocks large_blocks total_words  
0      1.28934      6.75588    7.56054      5.87462     273.415  
   label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  email           0  7.21427           0  84.6548  28.684    11.0716   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      4.70743    7.08288      7.18707     142.951  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  19.0211           0  46.5394  13.3565    18.4454   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      24.3381    4.55193      5.00124     71.1727  
   label product_ids  numbers invoice_ids    nouns  verbs adjectives  \
0  email           0  2.42511           0  72.9324  9.401    35.0647   

  conjunct

     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  7.21739           0  31.9138  3.27737    7.44411   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      22.4273    9.18025      1.51456      54.042  
   label product_ids  numbers invoice_ids   nouns    verbs adjectives  \
0  email           0  1.74014           0  60.331  13.4781    32.6861   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0     0.760343    5.18576      10.8199      172.81  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  5.65935           0  157.883  23.2748    26.3799   

  conjunctions small_blocks med_blocks large_blocks total_words  
0     0.945884      7.38857    6.03655      6.76305     187.021  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  15.3337           0  21.3115  1.57148    10.1479   

  

   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  4.37985           0  32.2507  13.9382     12.595   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    2.99622      5.74555     84.1476  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  8.86843           0  18.6564  3.54294    6.66776   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      16.8459     11.188      2.48524     48.4079  
     label product_ids  numbers invoice_ids   nouns    verbs adjectives  \
0  invoice           0  14.2336           0  24.305  5.97921    6.92685   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      14.4625     15.163      1.80363      47.234  
   label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  email           0  11.4308           0  45.8918  6.6828    13.1962   

  co

   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  11.8248           0  62.7985  5.83371    11.1062   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    4.42614      7.48053     83.4218  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  16.5584           0  24.3481  7.28485    8.57956   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      12.0296    9.80929      2.59313     41.0646  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.38502           0  97.9902  23.1629    31.7823   

  conjunctions small_blocks med_blocks large_blocks total_words  
0     0.718085       6.8667    6.70071      5.53903     206.895  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.45842           0  46.8137  10.2592    15.8836   

  co

   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  8.79143           0  47.9004  8.41821    10.2155   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    5.04055       7.0035     69.0713  
     label product_ids  numbers invoice_ids    nouns  verbs adjectives  \
0  invoice           0  10.1423           0  40.8632  3.765    12.0184   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      16.8455    10.2783      3.76214     59.8533  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.92408           0  84.3459  23.2018    10.7792   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      5.17188    11.3188       12.361     93.5194  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  14.5553           0  30.5625  4.94639    12.3309   

  co

   label product_ids  numbers invoice_ids    nouns  verbs adjectives  \
0  email           0  7.19658           0  32.5948  9.744    12.7196   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    2.32986      5.05113     72.3004  
     label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  invoice           0  8.52597           0  28.1466  2.6842    6.73589   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      15.0086    10.9553      1.79808     46.6149  
     label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  invoice           0  12.9277           0  15.6786  6.9234    8.80474   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      12.4528    11.8889      1.65311      46.789  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  17.1989           0  36.5473  11.8188    18.3675   

  co

0     0.764926      5.55555    6.42285       7.4202     196.425  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  2.07759           0  81.5234  15.5004     36.176   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0     0.980341    4.43925      10.8011     129.001  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  5.63986           0  176.222  28.5377    45.2636   

  conjunctions small_blocks med_blocks large_blocks total_words  
0     0.764331      5.93527    7.21542      7.87463     236.261  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  10.5815           0  30.3262  3.03754    11.2499   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      20.4321    7.84303      3.18795     54.0778  
   label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  email      

   label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  1.6722           0  81.1907  12.4668     35.884   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      1.29177    4.81262      8.88201     131.329  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  10.1759           0  23.7414  2.23613    7.55764   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      16.1313    5.24604      4.12782     54.2146  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  4.34427           0  92.5571  27.5633    15.5647   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      3.71353    11.6862      9.49879     158.601  
   label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.7771           0  43.7536  15.1059    11.9204   

  conjun

   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  8.25151           0  55.3819  6.44601    12.7897   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    3.54018      7.49748     83.2653  
     label product_ids  numbers invoice_ids   nouns    verbs adjectives  \
0  invoice           0  9.89153           0  29.739  2.89511    7.54764   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      15.9799     8.6043      1.76023     33.6729  
   label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  email           0  5.65535           0  64.1435  25.223    10.7678   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      3.91276    9.11161       11.117     128.862  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  7.65157           0  54.4623  8.70517    11.2564   

  conjun

     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  15.6069           0  31.7274  3.83939    10.1027   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      16.9998    9.32804      2.78303      58.995  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  7.18893           0  165.692  22.0144    32.5942   

  conjunctions small_blocks med_blocks large_blocks total_words  
0     0.782304      7.58919    7.04362      7.58067     191.416  
   label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  email           0  5.99788           0  147.678  22.608    39.0617   

  conjunctions small_blocks med_blocks large_blocks total_words  
0     0.881957      6.49267     7.3614      7.93464      243.38  
   label product_ids  numbers invoice_ids    nouns   verbs adjectives  \
0  email           0  7.72163           0  39.6162  11.061    10.0714   

  conjun

     label product_ids numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  9.9092           0  24.5812  1.87929    9.27132   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      16.9023    4.37431      4.19188     52.9625  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  6.23834           0  29.9117  8.68359    10.2973   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      13.4858    4.41429      3.98998      75.987  
   label product_ids  numbers invoice_ids   nouns    verbs adjectives  \
0  email           0  2.58901           0  94.728  14.7119    42.1338   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0     0.913954    4.12662      11.8149     125.858  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  16.2458           0  54.1941  9.90344    16.2905   



     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  13.6559           0  46.6031  4.77734    8.50792   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      25.1633    6.94813      3.27488     68.4238  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.94293           0  55.2934  13.7696    14.2374   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0            0    2.75002      6.70199     57.1618  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  11.5191           0  21.4028  2.17116    6.85485   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      24.7216    6.78431      2.39709     58.8853  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  20.8203           0  43.8579  9.70183     14.278 

     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  11.0917           0  18.6336  3.08477    7.90788   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      24.6914    10.5414      1.76562     44.3107  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  20.4823           0  61.7014  11.5627      10.81   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      20.3691    5.79889      5.15222     80.2924  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  9.94021           0  97.3817  24.1503    32.2193   

  conjunctions small_blocks med_blocks large_blocks total_words  
0      1.04308       4.3202    4.72264      8.19954     262.177  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  7.00769           0  56.6708  16.3389    13.3542   



   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  5.41885           0  75.4366  18.0015    15.4753   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      4.61931    11.2549      12.1609     96.7552  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  17.6755           0  16.4475  7.19136    8.89303   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      13.3272    15.3376      2.00045     51.5881  
     label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  invoice           0  18.8018           0  26.5058  6.79537    5.00596   

  conjunctions small_blocks med_blocks large_blocks total_words  
0            0      13.2578    14.4285      2.30904     57.3976  
   label product_ids  numbers invoice_ids    nouns    verbs adjectives  \
0  email           0  6.14348           0  100.139  26.6335    38.7255   



Unnamed: 0,label,product_ids,numbers,invoice_ids,nouns,verbs,adjectives,conjunctions,small_blocks,med_blocks,large_blocks,total_words
0,invoice,0,15,0,21,6,7,0,12,13,2,50
1,invoice,0,12,0,26,2,9,0,14,5,4,50
2,invoice,0,18,0,51,11,15,0,23,6,5,100
3,invoice,0,9,0,26,3,7,0,21,9,2,47
4,invoice,0,8,0,33,7,11,0,15,6,4,59
5,invoice,0,14,0,36,4,10,0,21,8,3,64
6,email,0,2,0,82,13,35,0,1,4,12,142
7,email,0,8,0,137,23,37,1,6,6,7,220
8,email,0,6,0,44,12,13,0,0,3,6,77
9,email,0,10,0,59,8,11,0,0,4,6,92


In [25]:
#shuffle
dataframe = dataframe.sample(frac=1).reset_index(drop=True)
#save 
csvHelper.save_to_file(dataframe, 'out.csv')