## Notebook Content

In this notebook, topic modeling for the scanned dataset that comes from PDF files then starts and multiple iterations of removing the domain-specific words, then finally labeling the data to start supervised learning procedures.

## Import Libraries

In [1]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from nltk.stem import WordNetLemmatizer
import pandas as pd
from collections import Counter
import re
import pickle
import string
from sklearn.decomposition import NMF
import spacy
from sklearn.multiclass import OneVsRestClassifier
from sklearn.naive_bayes import GaussianNB
import numpy as np
from corextopic import corextopic as ct

### Methods to Use Amoung File

In [2]:
def lemmatize_verbs(words):
    """Lemmatize verbs in list of tokenized words"""
    lemmatizer = WordNetLemmatizer()
    lemmas = []
    for word in words:
        lemma = lemmatizer.lemmatize(word, pos='v')
        lemmas.append(lemma)
    return ' '.join(lemmas)

In [3]:
def only_word(list_):
    """
    This method is to return list of words only in the passed list.
    """
    list_to_return = []
    for ele in list_:
        if len(ele) >= 3:
            list_to_return.append(ele)
    return ' '.join(list_to_return)

In [4]:
def clean_method(text):
    """This method depends on lemmatize_verbs, remove_stopwords and clean_text to apply them on passed parameter"""
    if len(text) <= 100:
        return 'd'
    stopwords = nltk.corpus.stopwords.words('english')
#     text = text.lower() 
    text = text.replace('\\n', ' ').replace('\\r', ' ').replace('\n', ' ').replace('\r', ' ').replace('\xef', ' ').replace('\\xef', ' ')
    text = re.sub('http\S+\s*', ' ', text)
    text = re.sub(r'(.)\1+', r'\1', text)
    text = re.sub('[^a-zA-Z]+', ' ', text)
    text = only_word(text.split())
    text = lemmatize_verbs(text.split())
    text = [word for word in text.split() if word not in stopwords]
    return ' '.join(text)

In [5]:
def display_topics(model, feature_names, no_top_words, topic_names=None):
    """...."""
    for ix, topic in enumerate(model.components_):
        if not topic_names or not topic_names[ix]:
            print("\nTopic ", ix)
        else:
            print("\nTopic: '",topic_names[ix],"'")
        print(", ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))

In [6]:
def get_freq_word(TEXT_list):
    """This method get count the frequency of each word in the passed parameter"""
    Vocab_list = ' '.join(TEXT_list)
    temp_dict = Counter(Vocab_list.split()).items()
    freq_df = pd.DataFrame(temp_dict)
    freq_df = freq_df.sort_values(1, ascending = False)
    return freq_df

In [7]:
def write_pickle(file_name, df_to_write):
    """This method is to save the passed dataframe as a pickle file with the passed name"""
    with open(file_name, 'wb') as handle:
        pickle.dump(df_to_write, handle, protocol = pickle.HIGHEST_PROTOCOL)

In [8]:
def read_pickle(file_name):
    """...."""
    with open(file_name , 'rb') as handle:
        return pickle.load(handle)

In [9]:
def remove_domain_stop_words(stop_words, df_edit):
    """Remove domain specific stop words from the passed parameter"""
    df_after = df_edit.copy()
    for i in range(len(df_after)):
        for word in stop_words:
            pattern = r'\b' + word + r'\b'
            new_text = re.sub(pattern,'', df_after.TEXT[i])
            df_after.TEXT[i] = new_text
    return df_after

### Read Data from pickle

In [10]:
data = read_pickle('df_1_after_cleaning.pickle')

### Explore Data

In [11]:
## this step is to display all text in each row
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

  pd.set_option('max_colwidth', -1)


In [12]:
data.sample(2, random_state = 1)

Unnamed: 0,TEXT
60,resume althab husain althabhusainm contact abu dubai contact mohamed ibnul erasaliyapuram kadayanalur tirunelveli dist tamilnadu dob age sex male nationality pasportno marital maried visa visa visa exp know proficient english hindi mil carier objective chalenging devote fulfilment competitive era wiling hard eager learn technologies profile computer hardware network computer science enginer computer science enginering sns technology coimbatore pasing hsc darusalam higher secondary pasing technical program visual basic operate system windows package ofice word excel powerpoint outlok multimedia tols html adobe photoshop database ace oracle sql computer proficiency hardware network verse windows proficiency ofice understand internet tols employers tech profile hardware software maintenance avenue property management profile management curent organization avenue contract property management abu dhabi designation incharge duration march responsibilites implement maintain monitor necesary hardware network service implement maintain network router switch printer finger print machine troubleshoting hardware software problems intain log require repair maintenance organize overal operations ensure stable secure operation infrastructure network acounts paswords require instalation configuration setup hardwar data backup area technician security ctv system asembling instalation computer operator comitment sincerity punctuality desire inovation declaration hereby declare information give true corect belief althab husain
1055,jones marshal david musicbrothers kulasekara alwar contact marai malai nagar tamilnadu objective develop background music musicianship wide range educational vocational music component institution desigination loyola cbse band trainer vadamelpakam john dvanchery band trainer drum teacher mary matriculation higher secondary gudvanchery band trainer christ ing girls matriculation tambaram band trainer litle flower matriculation higher secondary kundrathur band trainer stephen matriculation band trainer drum palavaram teacher lord int ernational band trainer drum chenai teacher matriculation higher secondary band trainer chrompet holy quen matriculation band trainer chrom pet violet matriculation band trainer chitlapakam boaz public sembakam band trainer vimala matriculation higher secondary trichy band trainer ans matriculation mangadu band trainer extracuricular cultural cordinator organize many cultural program eastern band trop active trainer cult ural events visit htp youtube watch gtnzwxnq educational qualification institution percentage state josephs matriculation higher secondary state marai malai nagar electronics ana arm enginering electrical enginering technology profile jones marshal david father andrews priya kumar gend male birth nationality hobies play drum saxophone information true sincerely jones marshal david


In [13]:
data.head(2)

Unnamed: 0,TEXT
0,retnawati pasir ris retna objective obtain exposure chalenging oportunity establish organization require tea player self motivate enthusiastic able continuous positive contribution organization degre comerce wolongong australia finance atained distinctions acounting organisations principles comerce investment analysis finance significant simulation socialy inovative enterprise analyse cause efects busines decisions graduate distinction august diploma busines information technology nanyang polytechnics apr apr achiev overal cumulativ gpa final create social media platform java program langua busines service centre student club nanyang polytechnics bartley secondary apointed vice president ibrary club bartley secondary organise events outdor indor xperience truston management pte fund acountant asist reconciliation fund fund adminis trator client nvestor service prepar fund transfer instructions upload updat hedge fund database comunicat clients busines party resolve open ide ntify exceptions problems fecting acounting recor price comunicating management asisting resolution trade setlement broker calculate net value clients fund eview data iregularities fun activity price securities acordance aplicable valuation policy require payment fund expense aplicable system acord ance organization control cordinate resources ensure caried sla meet basi cash projection ensure suficient fund wekly newsleter investors come main contact person finance operations citco fund service operation analyst october trade cash reconciliation compile nav pack client monitor analyse price resolve discrepancies reconciliation items break betwen citco broker clients comunicated client broker cals resolve open junior hire partic ipated pre onth end counter otc master agrements confirmations liais departments within citco hoc data conectivity system contact person fun reconciliations analyse profit los fund nav pack suport conversion clients exist migrations suported busines units peak periods ibm sal internship reduce number non respond oportunities caled aproached every client solve problems help solve query data entries require coleague compet encies computers databases aplications paladyne mysql ace blomberg windows axi investor aexeo ofice word powerpoint excel ace speak writen english excelent bahasa indonesia excelent bahasa melayu excele teochew excelent mandarin basic nationality atributes notice period indonesian strong analytical leader player interpersonal omunication decision problem solvin presure systematic highly orient month
1,curiculam vitae hirok jyoti borah instrumentation enginering mail borahirokjyoti objective loking prestigious previous capabilities put eficient pose efective organiz ational ability supervise develop final output father prodip borah permanent aders vil bamun pukhuri pachauni gaon bamun pukhuri pin dist jorhat state asam birt religion hindu nationalities marit unmaried sex male language know asamese english hindi curent employe techno canada inc surface client oil designation daq perator swt equipments operate presure monitor temperature monitor writen field read shet field analysis colecting liquid sample data header analysis analysis salinity analysis reflect meter liquid flow rate dep stick api analysis instrumentation conection rig swt equipments rig swt equipments load load inventory swt equipments spare maintain progres previous employe dreser rand pvt client oil natural gas corporation nazira asam operation maintenance gas compresor designation instrumentation technician period calibration instrument like presure gaug presure transmiter presure switch pneumatic controler main tain repair logic control system gas compresor check stop type pump acording case emergency conection field instru ments haz ardous area control panel maintain progres aprenticeship aprenticeship oil corporation qualification examination grade pasing diploma instrumentation scte hrh prince wales enginering technology hslc seba bamun pukhuri hslc science ahsec strength optimistic energetic build liaison leadership enjoy responsibility like acept chalenges declaration declare statements aplication true corect jorhat regard hirok jyoti borah


In [14]:
data.tail(2)

Unnamed: 0,TEXT
1532,rice iversity houst texas master science statistics cumulative gpa pected coursew ork advance statistical method financial series data analysis machine learn gre quan titati verbal linois rbana champaign urban linois bachel ience tural resource environ mental ciences gpa coursework aplied statistical methods natural resource economics gi natural resource management sit math write verbal experien rice houston fal quantitative financial analytics extensive research analysis financial statements available sec file devise compare selection strategies fundamental analysis historical data wrds strategy outperform benchmark percent cagr rice iversity houst sumer research asistant alyzed pulation census data sas excel poison gresio alyses evalence rate hepatitis respe age hnicity liver cancer inciden rate hypothesis heavy metal polution data gather houston ship chanel find city houston environmental council mini stry environmen satel lite vironment center beijin sumer gi data analy utilize arcgis excel analyze geographical data create individual map grind census data satelite image data past nature reserve maintain natural nature reserve database information improve acuracy eficiency sql excel asisted trans lation state nat ions cosystems chinese contain word champaign sumer campus mbasador overs campa ign result ver upload hero udy latform donation boks africa hero drive movement leverage viral market social media facilitate significant increase active user trafic initiate establish partnership campus organization profesors earn hero hon rol mon october outstanding performan highes number oks donate ils act ivities technic arcgis blomberg jmp matlab python sas sql language fluent mandarin glish swimin cokin video game poker
1533,mohamed fahim sulthan mechanical enginer contact mfahim skype mfahim hot mail dubai uae objective obtain mechanical enginer chalenging environment utilize suports organization growth profile mechanical enginer mechanical system design execution hvac system pose depth proposal hvac design ashrae cod hvac load calculation ventilation calculation ducting pip layout create draw isuing boq supervision quantity survey biling verification cordination field enginering poja aircons daikin dealer chenai hvac enginer march months enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units ducted air units fan coil units pmi enginering export pvt chenai enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units cei ling mount ducted units ceiling mount casete units wal mount split units sundaramorthy residency ranipet enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units ceiling mount ducted units ceiling mount casete units wal mount split units bajaj finance raheja tower chenai enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units duct rout air units fan coil units pmi solutions chenai role data shets specification material requisition hvac system vrv ahu package units ducts pump lves exhaust fan hvac input electrical civil instrumentation cordination design initial period change design duct rot way customer requirements vendor draw eply vendor queri folow vendors weks lok ahead months material enginering propose earn value wekly basis finalize upcoming identify forthcoming mising requirements manpower material design performance dev iations performance performance supervision quantify task duration depend productivity requirement quantity survey biling verification inspect progres produce contractor sub contractor initiate rfi inspect client erection client aproved itp inspection oversee quality control safety package hyd pipe rks internship operator traine january months valuthur gas power station ramnad role gas turbines boiler pump heat exchangers safety monitor operation water treatment pump firefighting pump periodical inspections pump check valves gate valves safety valves pipe line sumps presure read wekly asistant enginer area hvac fire fight manufacture academic qualification degre tech mechanical enginering abdur rahman chenai diploma diploma mechanical enginering dme mohamed sathak polytechnic kilakarai dote technical software mech autocad hap mcquay duct sizer creo ansys cad utility software microsoft ofice word excel powerpoint moviemaker adobe photoshop achievements ishrae student membership membership achievement automobile enginering cicp english language asesment cambridge academic strength improvement tig dresed mild stel design fabrication pneumatic backhoe design fabrication auto cradle strength lingual english tamil dedication highly comunicative interpersonal information ity shahul ham birth marital single permanent pasport visa athiyilai stret kilakarai ramnad visit visa hereby declare mention information corect sincerely mohamed fahim sulthan


### Add SPACY_TEXT Colum

In [15]:
nlp = spacy.load('en_core_web_sm')
data['SPACY_TEXT'] = list(nlp.pipe(data.TEXT))

In [16]:
data.head(2)

Unnamed: 0,TEXT,SPACY_TEXT
0,retnawati pasir ris retna objective obtain exposure chalenging oportunity establish organization require tea player self motivate enthusiastic able continuous positive contribution organization degre comerce wolongong australia finance atained distinctions acounting organisations principles comerce investment analysis finance significant simulation socialy inovative enterprise analyse cause efects busines decisions graduate distinction august diploma busines information technology nanyang polytechnics apr apr achiev overal cumulativ gpa final create social media platform java program langua busines service centre student club nanyang polytechnics bartley secondary apointed vice president ibrary club bartley secondary organise events outdor indor xperience truston management pte fund acountant asist reconciliation fund fund adminis trator client nvestor service prepar fund transfer instructions upload updat hedge fund database comunicat clients busines party resolve open ide ntify exceptions problems fecting acounting recor price comunicating management asisting resolution trade setlement broker calculate net value clients fund eview data iregularities fun activity price securities acordance aplicable valuation policy require payment fund expense aplicable system acord ance organization control cordinate resources ensure caried sla meet basi cash projection ensure suficient fund wekly newsleter investors come main contact person finance operations citco fund service operation analyst october trade cash reconciliation compile nav pack client monitor analyse price resolve discrepancies reconciliation items break betwen citco broker clients comunicated client broker cals resolve open junior hire partic ipated pre onth end counter otc master agrements confirmations liais departments within citco hoc data conectivity system contact person fun reconciliations analyse profit los fund nav pack suport conversion clients exist migrations suported busines units peak periods ibm sal internship reduce number non respond oportunities caled aproached every client solve problems help solve query data entries require coleague compet encies computers databases aplications paladyne mysql ace blomberg windows axi investor aexeo ofice word powerpoint excel ace speak writen english excelent bahasa indonesia excelent bahasa melayu excele teochew excelent mandarin basic nationality atributes notice period indonesian strong analytical leader player interpersonal omunication decision problem solvin presure systematic highly orient month,"(retnawati, pasir, ris, , retna, , objective, obtain, exposure, chalenging, , oportunity, , establish, organization, require, tea, player, self, motivate, enthusiastic, able, , continuous, positive, contribution, organization, , degre, comerce, , wolongong, australia, , finance, atained, , distinctions, acounting, organisations, principles, , comerce, investment, analysis, , finance, significant, , simulation, socialy, inovative, enterprise, analyse, cause, efects, busines, decisions, graduate, distinction, august, diploma, busines, information, technology, nanyang, polytechnics, , apr, apr, achiev, overal, cumulativ, gpa, final, , create, social, media, platform, , java, program, langua, , busines, service, centre, , student, club, nanyang, polytechnics, bartley, secondary, , apointed, vice, president, ibrary, ...)"
1,curiculam vitae hirok jyoti borah instrumentation enginering mail borahirokjyoti objective loking prestigious previous capabilities put eficient pose efective organiz ational ability supervise develop final output father prodip borah permanent aders vil bamun pukhuri pachauni gaon bamun pukhuri pin dist jorhat state asam birt religion hindu nationalities marit unmaried sex male language know asamese english hindi curent employe techno canada inc surface client oil designation daq perator swt equipments operate presure monitor temperature monitor writen field read shet field analysis colecting liquid sample data header analysis analysis salinity analysis reflect meter liquid flow rate dep stick api analysis instrumentation conection rig swt equipments rig swt equipments load load inventory swt equipments spare maintain progres previous employe dreser rand pvt client oil natural gas corporation nazira asam operation maintenance gas compresor designation instrumentation technician period calibration instrument like presure gaug presure transmiter presure switch pneumatic controler main tain repair logic control system gas compresor check stop type pump acording case emergency conection field instru ments haz ardous area control panel maintain progres aprenticeship aprenticeship oil corporation qualification examination grade pasing diploma instrumentation scte hrh prince wales enginering technology hslc seba bamun pukhuri hslc science ahsec strength optimistic energetic build liaison leadership enjoy responsibility like acept chalenges declaration declare statements aplication true corect jorhat regard hirok jyoti borah,"(curiculam, vitae, hirok, jyoti, borah, instrumentation, enginering, mail, borahirokjyoti, , objective, loking, , prestigious, , previous, , capabilities, put, eficient, , pose, efective, organiz, ational, , ability, supervise, develop, final, output, , father, , prodip, borah, permanent, aders, vil, bamun, pukhuri, pachauni, gaon, bamun, pukhuri, pin, dist, jorhat, state, asam, , birt, religion, hindu, nationalities, , marit, , unmaried, sex, male, language, know, asamese, english, hindi, , curent, employe, techno, canada, inc, , surface, , client, oil, , designation, daq, perator, , swt, equipments, operate, presure, monitor, temperature, monitor, writen, field, read, shet, field, analysis, colecting, liquid, sample, data, header, ...)"


In [17]:
data.tail(2)

Unnamed: 0,TEXT,SPACY_TEXT
1532,rice iversity houst texas master science statistics cumulative gpa pected coursew ork advance statistical method financial series data analysis machine learn gre quan titati verbal linois rbana champaign urban linois bachel ience tural resource environ mental ciences gpa coursework aplied statistical methods natural resource economics gi natural resource management sit math write verbal experien rice houston fal quantitative financial analytics extensive research analysis financial statements available sec file devise compare selection strategies fundamental analysis historical data wrds strategy outperform benchmark percent cagr rice iversity houst sumer research asistant alyzed pulation census data sas excel poison gresio alyses evalence rate hepatitis respe age hnicity liver cancer inciden rate hypothesis heavy metal polution data gather houston ship chanel find city houston environmental council mini stry environmen satel lite vironment center beijin sumer gi data analy utilize arcgis excel analyze geographical data create individual map grind census data satelite image data past nature reserve maintain natural nature reserve database information improve acuracy eficiency sql excel asisted trans lation state nat ions cosystems chinese contain word champaign sumer campus mbasador overs campa ign result ver upload hero udy latform donation boks africa hero drive movement leverage viral market social media facilitate significant increase active user trafic initiate establish partnership campus organization profesors earn hero hon rol mon october outstanding performan highes number oks donate ils act ivities technic arcgis blomberg jmp matlab python sas sql language fluent mandarin glish swimin cokin video game poker,"( , rice, iversity, houst, texas, master, science, statistics, cumulative, gpa, pected, coursew, ork, advance, statistical, method, financial, , series, data, analysis, machine, learn, gre, quan, titati, verbal, , linois, rbana, champaign, urban, linois, , bachel, ience, tural, resource, environ, mental, ciences, , gpa, coursework, aplied, statistical, methods, natural, resource, economics, gi, natural, resource, management, sit, math, write, verbal, experien, , rice, , houston, fal, quantitative, financial, analytics, , extensive, research, analysis, financial, statements, available, sec, file, devise, compare, , selection, strategies, fundamental, analysis, , historical, data, wrds, , strategy, outperform, benchmark, percent, , cagr, rice, iversity, houst, sumer, research, asistant, ...)"
1533,mohamed fahim sulthan mechanical enginer contact mfahim skype mfahim hot mail dubai uae objective obtain mechanical enginer chalenging environment utilize suports organization growth profile mechanical enginer mechanical system design execution hvac system pose depth proposal hvac design ashrae cod hvac load calculation ventilation calculation ducting pip layout create draw isuing boq supervision quantity survey biling verification cordination field enginering poja aircons daikin dealer chenai hvac enginer march months enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units ducted air units fan coil units pmi enginering export pvt chenai enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units cei ling mount ducted units ceiling mount casete units wal mount split units sundaramorthy residency ranipet enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units ceiling mount ducted units ceiling mount casete units wal mount split units bajaj finance raheja tower chenai enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units duct rout air units fan coil units pmi solutions chenai role data shets specification material requisition hvac system vrv ahu package units ducts pump lves exhaust fan hvac input electrical civil instrumentation cordination design initial period change design duct rot way customer requirements vendor draw eply vendor queri folow vendors weks lok ahead months material enginering propose earn value wekly basis finalize upcoming identify forthcoming mising requirements manpower material design performance dev iations performance performance supervision quantify task duration depend productivity requirement quantity survey biling verification inspect progres produce contractor sub contractor initiate rfi inspect client erection client aproved itp inspection oversee quality control safety package hyd pipe rks internship operator traine january months valuthur gas power station ramnad role gas turbines boiler pump heat exchangers safety monitor operation water treatment pump firefighting pump periodical inspections pump check valves gate valves safety valves pipe line sumps presure read wekly asistant enginer area hvac fire fight manufacture academic qualification degre tech mechanical enginering abdur rahman chenai diploma diploma mechanical enginering dme mohamed sathak polytechnic kilakarai dote technical software mech autocad hap mcquay duct sizer creo ansys cad utility software microsoft ofice word excel powerpoint moviemaker adobe photoshop achievements ishrae student membership membership achievement automobile enginering cicp english language asesment cambridge academic strength improvement tig dresed mild stel design fabrication pneumatic backhoe design fabrication auto cradle strength lingual english tamil dedication highly comunicative interpersonal information ity shahul ham birth marital single permanent pasport visa athiyilai stret kilakarai ramnad visit visa hereby declare mention information corect sincerely mohamed fahim sulthan,"(mohamed, fahim, sulthan, mechanical, enginer, contact, , mfahim, , skype, mfahim, hot, mail, , dubai, uae, , objective, obtain, , mechanical, enginer, chalenging, environment, utilize, , suports, organization, growth, profile, mechanical, enginer, , mechanical, system, , design, execution, hvac, system, pose, depth, , proposal, , hvac, design, , ashrae, cod, hvac, load, calculation, ventilation, calculation, ducting, pip, layout, create, draw, isuing, boq, , supervision, quantity, survey, biling, verification, cordination, field, enginering, , poja, aircons, daikin, dealer, chenai, , hvac, enginer, march, months, , enginering, technical, asistance, procurement, design, aprox, tons, refrigeration, capacity, consist, vrv, units, ducted, air, , units, fan, ...)"


In [18]:
data.sample(2)

Unnamed: 0,TEXT,SPACY_TEXT
654,curiculum vitae mailarapa ulal main road cras gnyanajyothi nagar bangalore mail mailarapa cal objective seking chalenging organization would enab enhance curent comitment problem solve qualities augment abilities comunication interpersonal critical ustomer infrastructure also contribute suces organization around nine electrical electronics products active filter pasive filter static switch stabilizer power quality audit also facilities deportment breakers transformers diesel generators hvac heat ventilation air condition like ahu split unit package units vrf variable refrigerant flow amf auto mains failure panel fire alarm panel public adresing systems lift server hubs roters repeaters switch fire hydrants sprinkler syst ems reverse osmosis wtp water treatment technical ave kva systems hitachi hirel power electronics chloride emerson network power neowat consul bori products solar inverters grid solar inverter grid solar inverters power quality audit thermograph check harmonics power factor eficiency electrical systems like chilers panel active pasive filter facility instal comisioned static swit ches power factor imprudent filter harmonics controle panel diagnose repair component pcb capacitors igbt power components kind bateries like smf vrla plante nicad acid tubular bateries industrial software service provider like oil gas power plastic sugar cement stel softw like hewlet packard ibm busines management tech mahindra goldman sachs juniper anz tata motor ftp net macafe dusters total solutions service pvt facility enginer sea gate technology pvt devarabesanahali bangalore til units casete units precision air condition water novac systems fas systems systems fire hydrants sprinklers monitor build manag ire alarm system schedule maintain history card wekly energy consumption reakdown rca dedicate achieve ims information management service standard guidelines enginering technicians plumb carpentry electrical tec hnicians vendor management control critical non resources conservation schedule round table mom clients basis resolve operation previous jones lang lasale ifm property consultancy pvt facility enginer hsbc ban erghata road bang alore operation maintenance facility equipment transformers diesel nerators daikin split unit package units casete units precision air condition systems fas systems systems fire hydrants sprinklers apfc capacitor system schedule maintain history card wekly energy consumption reakdo rca ovsc view service control portal incident dedicate achieve ims independent manag ement service standard guidelines enginering technicians plumb carpentry electrical technicians bms operators locker management vendor management control critical non schedule protective safeties fire protection systems ems natural resources conservation schedul round table mom clients basis resolve operation tracker like open ovsc tracker open incident tracker invoice tracker tracker break tracker nrgp rgp gate pas track materials inventory tracker energy comparison tracker monitor electrical hvac systems plumb carpentry ordinate anagement run operation smothly ordinate management busines ontinuity bcp declare flexibility hours track bil cordinating vendor suplying spar consumables timely human resource ability motivate subor dinate management interpersonal previous consul neowat power solutions pvt enginer responsbility field failure analysis solar inverte static switch active filter stabilizers service operations instalations comisioning break preventive maintenance cals power quality audit ensure preventive maintena nce cals maintain proper spar consumptions inventory execute previous emerson networ power pvt enginer responsbility power operation service enginer cover entire south region efectively trouble shoot instal comisioning systems active pasive filter static switch solar inverters grid grid several batery replacements comisioning part rvice power quality audit emerson visit type customers like intel ibm goldman sack honeywel juniper acenture jsw stel get several apreciation customers give technical input product service previous hitachi hirel electronics enginer description service enginer trouble shoot instalation comisioning systems active pasive filter static switch bms kva clients like tata comunication bahrati airtel sugar cement stel iocl mrpl mcfl fact power part service power quality audit hirel previous apolo power systems pvavite bangalore period enginering facility maintenance designation maintenance shift supervisor description maintenance supervisor honeywel adarshya tech park devarabhisanahali outer ring road marathahali bangalore acordingly wil give respect technician carpenter wil close within stipulate manager aring kva capacity load auto synchronize load share maintain operate air circuit breaker chiler air unit hot water boilers recycle water liquid oxygen vacum compresor split air condition duct split operational maintenance operational maintenance apc kva kva kva kva kva kva kva electrical motor starters epab ericson telephone line operational maintenance rmu operational maintenance water suply maintenance plumb maintenance lift operational maintenance apfc capacitors standard power factor maintenance panel maintenance rmu operational maintenance water pump house water suply recycle water operational maintenance system educatinal qualification bms cole bangalore pursue diploma electrical electronics eng govt polytechnic raichur iti electrician govt iti lingasugur govt kavithal technical certification fire safety first aid loto lock tag health safety induction ben chloride product computer operate systems windows vista software package ofice basapa birth marital single abilities speak kanada english hindi telugu tamil hereby declare information true faithfuly mailarapa lace angalore,"(curiculum, vitae, mailarapa, ulal, main, road, cras, gnyanajyothi, nagar, bangalore, mail, mailarapa, , cal, objective, seking, chalenging, , organization, would, enab, enhance, curent, , comitment, , problem, solve, , qualities, augment, abilities, , comunication, interpersonal, , critical, ustomer, infrastructure, also, contribute, suces, organization, , around, nine, , electrical, electronics, products, active, filter, pasive, filter, static, switch, stabilizer, power, quality, audit, also, , facilities, deportment, , breakers, transformers, diesel, generators, hvac, heat, ventilation, air, condition, like, ahu, split, unit, package, units, vrf, variable, refrigerant, flow, amf, auto, mains, failure, panel, fire, alarm, panel, public, adresing, systems, lift, server, hubs, roters, repeaters, ...)"
1353,heng pei lin stacia age birth gender female race hinese marital maried number taciaheng blk bukit purmei singapo nationality highest degre diploma acounting acounting yrs admin cum acount asistant yrs etail asi stant months employment emplo yed curent latest acount asistant industry fcl management service curent last salary anum months mths expect salary remuneration package negotiable notice period month notice objectives widen venture outside world hard diligence sumary compesation acountancy academy ongoing cat certify acountancy technician institut delta lci ertificate completion lci diploma sekolah menengah kebangsan pangkor sijil pelajaran malaysia spm gce qualification pitman qualificatio pas acounting lci pas busines statistics fcl management service pte acount asistant ful acounts curent ful acounts acounts payables acount receivable maintain debtor age liaise mal personel lawyers chase long outstanding debts deposit payments tenant purchaser reconciliation transactions post suplier invoice payment promptly within credit term suplier vendor ensure proper documentation invoice delivery service order purchase order payments reconcile suplier vendor statement acounts advise supervisor discrepancies folow ction gst return pety cash claim manage fix asets month end acrual audit schedule invoice bilings reit non reit mals achievement long service academic profile knight frank estate management pte acounts asistant ful acounts mar ful acounts fix asets close journal entries reconciliation gst reconciliation sumary submision gst quarterly pety cash claim expense reimbursement cash cheque receipt yearly projection yearly audit liaise ofice stafs external auditor tax agent generate biling statement acount reminder folow demand leter overdue acounts verify income expe nses asisting manager require ataining cals enquiries residents ensure timely acurate month end end close maintain relationship clients achievement testimonial asistant acount manager robinson pte acounts asistant mar john litle outlets outlets sale sumary taly colection mode slip net credit card setlement slip voucher sales vouchers management folowing outlets sales cash colection cash discrepancies isue reconciliation credit card cash bok net exces shortages cash net liaise setlement isue ataining cals enquiries outlets reconcile sub ledger debit note promotion partn rental schedule submision turnover landlord journal entries post acounting system payrol journal entries achievement atendance cid think service think hours active gnc acounts asistant retail wholesale inventory control general function store sale sumary taly colection mode slip net credit card setlement slip vouchers folowing outlets sales cash colection cash discrepancies journal entries post acounting system monthl reconciliation receipt entries cash bok credit card debit note partner import mail order export sales invoice module acounting system extract retail sales staf purchase ata export text file import module acounting system ensure invoice enter module generate wholesale customer mustafa qaf brunei clear outlet colection module transfer acounts inter debit credit note interco confirmation reconciliation reminder overdue invoice initiate claim via smal claim court necesary tie age control acounts month end revaluation print cash bok colection ofice wholesale staf purchase timely statement acounts asist sales dept monitor due compute product promotion bsidy mail order sales extract inventory adjustment entries check match average cost alocate acounting entries check purchase sales inventory adjustment freze data acounting software check negati stock month end close adjustment module acounting software inventory adjustments enter physical count variance acounting software pety cash claim col ect staf purchase reconcile invoice upload sales management monitor transaction livewel card reconciliation mas expres pte admin cum acounts asistant manage operations remitances curency exchange rate cash management form sales acount payable function isuance cheque customer service customer file administration pertain remitance telephone operator inward outward ship documentation permit declaration delivery order quotation customer fol advice customer shipment singapo crocodile pte retail asistant customer service explanation customer wekly administrative acounts promote sales manage stock inventory atributes able motivate towards achieve organizational objectives able presure tight deadlines manage chalenges able cohesion directi active teamwork quick learner self motivate wiling learn able fast pace environment language speak english mandarin malay writen english chinese malay information technology microsoft word excel power point biztrack actech money changer system ubs myob wms pos system emas oracle sap qualificatio microsoft word excel power point ubs computeris acounting stock payrol microsoft ofice specialist ofice excel expert,"(heng, pei, lin, stacia, age, , birth, , gender, female, race, hinese, marital, , maried, , number, , taciaheng, , blk, bukit, purmei, singapo, nationality, , highest, degre, diploma, , acounting, , acounting, yrs, admin, cum, acount, asistant, yrs, etail, asi, stant, months, employment, , emplo, yed, curent, latest, , acount, asistant, , industry, , fcl, management, service, curent, last, salary, anum, months, mths, expect, salary, , remuneration, package, negotiable, notice, period, month, notice, , objectives, widen, , venture, outside, , world, hard, , diligence, , sumary, compesation, , acountancy, academy, ongoing, cat, certify, acountancy, technician, institut, delta, lci, ertificate, ...)"


In [19]:
# Delete any Entities 
for i in range(0, len(data)):
    ents = data.SPACY_TEXT[i]
    j = 0
    for ent in ents.ents:
        pattern = r'\b' + ent.text + r'\b'
        new_text = re.sub(pattern,' ', data.TEXT[j])
        data.TEXT[j] = new_text
        j += 1

In [20]:
data.head(2)

Unnamed: 0,TEXT,SPACY_TEXT
0,ris retna objective obtain exposure chalenging oportunity establish organization require tea player self motivate enthusiastic able continuous positive contribution organization degre comerce wolongong finance atained distinctions acounting organisations principles comerce investment analysis finance significant simulation socialy inovative enterprise analyse cause efects busines decisions graduate distinction diploma busines information technology polytechnics apr apr achiev overal cumulativ final create social media platform java program langua busines service centre student club polytechnics bartley secondary apointed vice president ibrary club bartley secondary organise events outdor indor xperience truston management pte fund acountant asist reconciliation fund fund adminis trator client nvestor service prepar fund transfer instructions upload updat hedge fund database comunicat clients busines party resolve open ide ntify exceptions problems fecting acounting recor price comunicating management asisting resolution trade setlement broker calculate net value clients fund eview data iregularities fun activity price securities acordance aplicable valuation policy require payment fund expense aplicable system acord ance organization control cordinate resources ensure caried sla meet basi cash projection ensure suficient fund wekly newsleter investors come main contact person finance operations citco fund service operation analyst trade cash reconciliation compile nav pack client monitor analyse price resolve discrepancies reconciliation items break betwen citco broker clients comunicated client broker cals resolve open junior hire partic ipated pre onth end counter otc master agrements confirmations liais departments within citco hoc data conectivity system contact person fun reconciliations analyse profit fund nav pack suport conversion clients exist migrations suported busines units peak periods ibm sal internship reduce number non respond oportunities caled aproached every client solve problems help solve query data entries require coleague compet encies computers databases aplications paladyne mysql ace windows axi investor aexeo word powerpoint excel ace speak writen excelent bahasa excelent bahasa melayu excele teochew excelent mandarin basic nationality atributes notice period strong analytical leader player interpersonal omunication decision problem solvin presure systematic highly orient,"(retnawati, pasir, ris, , retna, , objective, obtain, exposure, chalenging, , oportunity, , establish, organization, require, tea, player, self, motivate, enthusiastic, able, , continuous, positive, contribution, organization, , degre, comerce, , wolongong, australia, , finance, atained, , distinctions, acounting, organisations, principles, , comerce, investment, analysis, , finance, significant, , simulation, socialy, inovative, enterprise, analyse, cause, efects, busines, decisions, graduate, distinction, august, diploma, busines, information, technology, nanyang, polytechnics, , apr, apr, achiev, overal, cumulativ, gpa, final, , create, social, media, platform, , java, program, langua, , busines, service, centre, , student, club, nanyang, polytechnics, bartley, secondary, , apointed, vice, president, ibrary, ...)"
1,curiculam vitae hirok instrumentation enginering mail borahirokjyoti objective loking prestigious previous capabilities put eficient pose efective organiz ational ability supervise develop final output father prodip borah permanent aders vil bamun pukhuri pachauni gaon bamun pukhuri pin dist jorhat state asam birt religion hindu nationalities marit unmaried sex male language know asamese hindi curent employe techno inc surface client oil designation daq perator swt equipments operate presure monitor temperature monitor writen field read shet field analysis colecting liquid sample data header analysis analysis salinity analysis reflect meter liquid flow rate dep stick api analysis instrumentation conection rig swt equipments rig swt equipments load load inventory swt equipments spare maintain progres previous employe dreser rand pvt client oil natural gas corporation nazira asam operation maintenance gas compresor designation instrumentation technician period calibration instrument like presure gaug presure transmiter presure switch pneumatic controler main tain repair logic control system gas compresor check stop type pump acording case emergency conection field instru ments haz ardous area control panel maintain progres aprenticeship aprenticeship oil corporation qualification examination grade pasing diploma instrumentation scte hrh prince wales enginering technology hslc seba bamun pukhuri hslc science ahsec strength optimistic energetic build leadership enjoy responsibility like acept chalenges declaration declare statements aplication true corect jorhat regard hirok,"(curiculam, vitae, hirok, jyoti, borah, instrumentation, enginering, mail, borahirokjyoti, , objective, loking, , prestigious, , previous, , capabilities, put, eficient, , pose, efective, organiz, ational, , ability, supervise, develop, final, output, , father, , prodip, borah, permanent, aders, vil, bamun, pukhuri, pachauni, gaon, bamun, pukhuri, pin, dist, jorhat, state, asam, , birt, religion, hindu, nationalities, , marit, , unmaried, sex, male, language, know, asamese, english, hindi, , curent, employe, techno, canada, inc, , surface, , client, oil, , designation, daq, perator, , swt, equipments, operate, presure, monitor, temperature, monitor, writen, field, read, shet, field, analysis, colecting, liquid, sample, data, header, ...)"


In [21]:
vectorizer = CountVectorizer(max_features=20000,
                             stop_words='english', token_pattern="\\b[a-z][a-z]+\\b",
                             binary=True)

doc_word = vectorizer.fit_transform(data.TEXT)
words = list(np.asarray(vectorizer.get_feature_names()))



In [22]:
topic_model = ct.Corex(n_hidden=6, words=words, seed=1)
topic_model.fit(doc_word, words=words, docs=data.TEXT)

<corextopic.corextopic.Corex at 0x7fe7317465b0>

In [23]:
# Print all topics from the CorEx topic model
topics = topic_model.get_topics()
for n,topic in enumerate(topics):
    topic_words,_,_ = zip(*topic)
    print('{}: '.format(n) + ','.join(topic_words))


0: celebrities,remix,cloutieremix,cienega,cloutier,angeles,vogue,magazine,avenue,editorial
1: busines,financial,fund,finance,investment,management,acounting,market,audit,mandarin
2: enginer,equipment,instalation,electrical,enginering,comisioning,safety,power,systems,quality
3: roduct,apostolic,mutena,leadsrus,masterfile,roxas,turkish,mandaluyong,jaleco,aque
4: society,patients,medicine,profesor,publications,felowship,children,sciences,hospital,conf
5: setup,primary,newly,online,potential,custom,acurately,product,creation,promote


In [24]:
tfidf = TfidfVectorizer(stop_words='english')
doc_words = tfidf.fit_transform(data.TEXT)
tfidf_df = pd.DataFrame(doc_words.toarray(),columns=tfidf.get_feature_names())

In [25]:
topic_nums = list(np.arange(3, 11, 1))
for num in topic_nums:
    nmf = NMF(num)
    nmf.fit_transform(doc_words)
    display_topics(nmf, tfidf.get_feature_names(), 100)
    print("------------------------------------------")




Topic  0
fund, audit, financial, management, investment, busines, acounting, service, finance, market, clients, client, trade, analysis, equity, tax, acounts, risk, ensure, compliance, research, manage, cash, manager, valuation, sales, control, acountant, credit, statements, chinese, industry, performance, asociate, pte, hedge, analyst, liaise, deal, asisted, ofice, acount, asist, securities, administration, operations, develop, data, monitor, estate, suport, real, regulatory, charter, diligence, reconciliation, asistant, managers, budget, intern, mandarin, external, maintain, fluent, stock, advisory, investments, firm, blomberg, identify, development, english, price, comite, end, economics, structure, excel, australia, microsoft, timely, cfa, malaysia, return, proficient, exchange, public, ing, investors, statutory, products, asurance, transactions, broker, quarterly, month, legal, acros, acountants, secretarial

Topic  1
philipines, mandaluyong, terminals, request, client, city, bic




Topic  0
audit, financial, acounting, fund, acounts, management, tax, service, control, ensure, compliance, acountant, statements, clients, busines, finance, pte, secretarial, statutory, cash, asistant, malaysia, liaise, client, manager, manage, asist, acountants, acount, chinese, asurance, duties, budget, ofice, risk, charter, file, ful, reconciliation, month, maters, engagements, microsoft, external, certify, asociate, administration, quarterly, regulatory, acountancy, general, cpa, end, asisted, aca, engagement, auditors, charge, standards, australia, function, maintain, public, english, timely, return, entities, salary, monitor, consolidation, word, statement, staf, proficient, invoice, excel, payment, ernst, mas, suport, acordance, curent, executive, ing, performance, secretary, payrol, young, mandarin, administrative, powerpoint, language, hoc, expense, overseas, local, fluent, members, operations, able

Topic  1
philipines, mandaluyong, terminals, request, client, city, jaleco,




Topic  0
audit, financial, acounting, fund, acounts, management, tax, service, control, ensure, compliance, acountant, statements, busines, clients, finance, pte, secretarial, statutory, asistant, malaysia, cash, liaise, client, manager, acountants, manage, asist, acount, chinese, asurance, duties, budget, risk, ofice, charter, file, ful, month, reconciliation, engagements, maters, microsoft, certify, asociate, external, acountancy, quarterly, administration, regulatory, general, cpa, engagement, aca, asisted, end, standards, charge, australia, auditors, public, english, maintain, function, return, salary, entities, timely, consolidation, word, statement, monitor, staf, invoice, proficient, excel, ernst, payment, mas, executive, curent, acordance, secretary, ing, payrol, young, suport, mandarin, performance, powerpoint, language, administrative, overseas, expense, hoc, local, members, fluent, able, paper

Topic  1
philipines, mandaluyong, terminals, request, client, city, bicutan, apo




Topic  0
audit, financial, acounting, tax, management, acounts, control, busines, service, compliance, finance, statutory, malaysia, statements, secretarial, asurance, acountant, pte, charter, budget, engagements, clients, ensure, asistant, chinese, risk, acountants, asociate, engagement, acountancy, certify, manager, manage, cpa, ful, consolidation, standards, public, aca, asisted, microsoft, charge, ernst, liaise, australia, entities, general, acount, duties, ofice, english, young, quarterly, regulatory, estate, maters, file, return, month, asist, salary, word, deloite, proficient, kpmg, comite, statement, local, nanyang, cash, external, real, mas, paper, firm, overseas, secretary, sap, client, industry, analysis, industries, payrol, powerpoint, excel, identify, forecast, function, taxation, ing, staf, curent, draft, property, auditor, language, fluent, mandarin, ifrs, executive

Topic  1
philipines, mandaluyong, terminals, request, client, city, bicutan, pasay, apostolic, jaleco, p




Topic  0
audit, financial, acounting, tax, control, management, asurance, acounts, statements, engagements, finance, compliance, acountant, risk, service, engagement, asociate, malaysia, busines, acountants, certify, budget, charter, acountancy, ernst, standards, cpa, consolidation, aca, young, estate, deloite, charge, australia, real, kpmg, entities, analysis, clients, ensure, asisted, statutory, manager, nanyang, chinese, mas, industries, regulatory, public, industry, ifrs, microsoft, firm, quarterly, pte, identify, auditor, pricewaterhousecopers, gap, manufacture, statement, property, local, client, manage, intern, fund, touche, proficient, investment, pwc, fluent, forecast, paper, overseas, sap, english, asistant, frs, taxation, analytical, members, consolidate, acordance, cost, junior, efectivenes, partner, performance, sgx, comite, word, return, mandarin, ful, proceses, powerpoint, advance, club, excel

Topic  1
philipines, mandaluyong, terminals, request, client, city, apostoli




Topic  0
audit, financial, acounting, tax, control, management, asurance, acounts, statements, finance, engagements, compliance, acountant, risk, service, engagement, asociate, busines, malaysia, acountants, certify, budget, charter, acountancy, ernst, cpa, standards, aca, consolidation, young, estate, deloite, charge, australia, real, kpmg, asisted, analysis, entities, clients, ensure, manager, statutory, chinese, nanyang, mas, industries, regulatory, public, microsoft, industry, ifrs, firm, pte, quarterly, identify, auditor, pricewaterhousecopers, statement, gap, property, intern, local, fund, manage, client, proficient, touche, manufacture, investment, fluent, pwc, forecast, asistant, overseas, paper, english, sap, taxation, frs, analytical, members, consolidate, acordance, comite, junior, cost, efectivenes, partner, word, performance, mandarin, sgx, return, ful, powerpoint, proceses, advance, student, club

Topic  1
philipines, mandaluyong, terminals, request, client, city, jaleco




Topic  0
audit, financial, acounting, tax, control, management, asurance, acounts, statements, finance, engagements, compliance, acountant, risk, service, engagement, busines, asociate, malaysia, acountants, certify, budget, charter, acountancy, ernst, cpa, standards, aca, consolidation, young, estate, deloite, charge, australia, real, kpmg, asisted, analysis, entities, ensure, clients, manager, statutory, chinese, nanyang, industries, mas, regulatory, public, microsoft, industry, ifrs, firm, pte, quarterly, identify, auditor, pricewaterhousecopers, statement, gap, property, intern, client, local, manage, fund, proficient, manufacture, touche, investment, fluent, pwc, forecast, asistant, overseas, sap, paper, english, taxation, frs, analytical, members, consolidate, acordance, cost, comite, junior, efectivenes, partner, performance, word, mandarin, sgx, return, ful, powerpoint, proceses, advance, student, club

Topic  1
philipines, mandaluyong, terminals, request, client, city, aposto



> As we can see in the previous result some topics contains domain specific stop words so, we will remove some of them. 

### Fisrt Itration of Removing Domain Specific Word

In [26]:
stop_words = ['format', 'want', 'contact', 'adres', 'relevant', 'avoid', 'aplying', 
              'demonstrate', 'examples', 'employer', 'refer', 'reference', 'phone', 
              'start', 'colege', 'posible', 'sure', 'important', 'use', 'number', 'verbs', 
              'style', 'cover', 'include', 'action', 'clas', 'type', 'leter', 'kep', 
              'separate', 'font', 'draft', 'person', 'aply', 'acomplishments', 'thre', 'speak', 
              'way', 'read', 'ned', 'degre', 'employers', 'volunter', 'begin', 'past',
              'language', 'aditional', 'submit', 'guide', 'candidate',
              'clases', 'highlight', 'make', 'copy', 'oportunity',
              'think', 'aid', 'title', 'lok', 'location', 'organize', 'serve', 'print', 
              'supervise', 'atention', 'contribute', 'expect', 'seking', 'chronological',
              'ask', 'reverse', 'compile', 'white', 'explain', 'coursework',  'easy',
              'note', 'honors', 'brief', 'emphasize', 'erors', 'short', 'construct', 'writen', 
              'author', 'place', 'abroad', 'consider', 'gain', 'choose', 'advise', 
              'know', 'speling', 'internship', 'recent','asociation', 'research', 
              'curiculum', 'program', 'public', 'council', 
              'publications', 'comunity',  'awards',  
              'science', 'practice', 'asociate', 
              'presentations', 'academy', 'evaluation',  'social', 'chair',  'foundation', 
              'studies', 'activities',  'asesment', 'special', 
              'workshop', 'study', 'grant', 'human', 'work', 'world', 
              'felow', 'year', 'honors', 
              'undergraduate', 'life',  'chapter', 
              'seminar', 'non', 'private', 'policy', 'cordinator', 'curent',   
              'future', 'clas',  'conference', 
              'united', 'county', 'home', 'family', 'pres', 'symposium', 'forum', 'john',
              'isues', 'bok', 'case', 'series', 'scholarship', 'history', 
              'graduate', 'editor', 'outstanding', 'suport', 'learning', 'regional',
              'administrative',  'young', 'second', 'thre', 'court', 'initiative'
              'available', 'request', 'personal', 'english', 'references', 'template',
              'example', 'loking', 'info', 'question', 'read', 'qualifications', 'welcome', 
              'prior', 'websites',   'permision', 'download', 'distribute',
              'link', 'dayjob', 'copyright', 'able', 'ccopyright', 'areas', 'duties', 'key', 
              'expertise',  'competencies', 'hons', 'coventry',  'sumary', 
              'details',  'highly', 'nationality',  'jobsekers', 'profesional',
              'driving', 'birmingham', 'license', 'british', 'environment', 'central',
              'work', 'yes', 'suitable', 'nuneaton',  
              'senior', 'working', 'history',  
              'deliver',  'ambitious',  'having',
              'creative', 'providing', 'atention', 'people', 'coleagues', 'carer', 'drive',
              'extensive', 'particular', 'road', 'pose', 'contact', 'sekers', 'run',
              'dob', 'right', 'individual', 'prove', 
              'confident', 'diploma', 'asisting', 'motivate', 'big' ,'interpreted', 
              'arbitrated', 'shaped', 'invented', 'delegated', 'explained', 'surveyed', 
              'wrote', 'inspected',  'checked', 'calculated', 'compared', 
              'stimulated', 'repaired', 'weight', 'fabricated', 'scheduled', 'specialized', 
              'programed', 'contracted', 'adresed', 'educated', 'advertised', 'atained', 
              'modified', 'reinforced', 'examined', 'synthesized', 'acomplished', 'estimated',
              'criteria', 'solved', 'operated', 'directed', 'asembled',
              'influenced' , 'executed', 'persuaded', 'verified', 
              'adapted', 'computed', 'determined', 'fod', 'john', 'aranged', 'entertained', 'established', 
              'instituted', 'revised', 'separate', 'fel', 'furnished', 'originated', 'body',
              'status', 'conceived', 'aplicants', 'published', 'thank', 'unified', 'facilitated', 
              'guidelines', 'transmited', 'familiarized', 'say', 'wish', 'demonstrated', 
              'hired', 'discovered', 'indicate', 'instaled', 'anticipated', 'streamlined', 
              'projected', 'formulated', 'age', 'researched', 'underline', 'cover', 'asesed',
              'gathered', 'employer', 'proposed', 'sure', 'adreses', 'qualified', 'expanded',
              'detected', 'pionered', 'right', 'fashioned', 'clarified', 'indian', 'hindi', 'maried', 'male']

In [27]:
df_after_2 = remove_domain_stop_words(stop_words = stop_words, df_edit = data)

In [28]:
df_after_2

Unnamed: 0,TEXT,SPACY_TEXT
0,ris retna objective obtain exposure chalenging establish organization require tea player self enthusiastic continuous positive contribution organization comerce wolongong finance distinctions acounting organisations principles comerce investment analysis finance significant simulation socialy inovative enterprise analyse cause efects busines decisions distinction busines information technology polytechnics apr apr achiev overal cumulativ final create media platform java langua busines service centre student club polytechnics bartley secondary apointed vice president ibrary club bartley secondary organise events outdor indor xperience truston management pte fund acountant asist reconciliation fund fund adminis trator client nvestor service prepar fund transfer instructions upload updat hedge fund database comunicat clients busines party resolve open ide ntify exceptions problems fecting acounting recor price comunicating management resolution trade setlement broker calculate net value clients fund eview data iregularities fun activity price securities acordance aplicable valuation require payment fund expense aplicable system acord ance organization control cordinate resources ensure caried sla meet basi cash projection ensure suficient fund wekly newsleter investors come main finance operations citco fund service operation analyst trade cash reconciliation nav pack client monitor analyse price resolve discrepancies reconciliation items break betwen citco broker clients comunicated client broker cals resolve open junior hire partic ipated pre onth end counter otc master agrements confirmations liais departments within citco hoc data conectivity system fun reconciliations analyse profit fund nav pack conversion clients exist migrations suported busines units peak periods ibm sal reduce respond oportunities caled aproached every client solve problems help solve query data entries require coleague compet encies computers databases aplications paladyne mysql ace windows axi investor aexeo word powerpoint excel ace excelent bahasa excelent bahasa melayu excele teochew excelent mandarin basic atributes notice period strong analytical leader player interpersonal omunication decision problem solvin presure systematic orient,"(retnawati, pasir, ris, , retna, , objective, obtain, exposure, chalenging, , oportunity, , establish, organization, require, tea, player, self, motivate, enthusiastic, able, , continuous, positive, contribution, organization, , degre, comerce, , wolongong, australia, , finance, atained, , distinctions, acounting, organisations, principles, , comerce, investment, analysis, , finance, significant, , simulation, socialy, inovative, enterprise, analyse, cause, efects, busines, decisions, graduate, distinction, august, diploma, busines, information, technology, nanyang, polytechnics, , apr, apr, achiev, overal, cumulativ, gpa, final, , create, social, media, platform, , java, program, langua, , busines, service, centre, , student, club, nanyang, polytechnics, bartley, secondary, , apointed, vice, president, ibrary, ...)"
1,curiculam vitae hirok instrumentation enginering mail borahirokjyoti objective prestigious previous capabilities put eficient efective organiz ational ability develop final output father prodip borah permanent aders vil bamun pukhuri pachauni gaon bamun pukhuri pin dist jorhat state asam birt religion hindu nationalities marit unmaried sex asamese employe techno inc surface client oil designation daq perator swt equipments operate presure monitor temperature monitor field shet field analysis colecting liquid sample data header analysis analysis salinity analysis reflect meter liquid flow rate dep stick api analysis instrumentation conection rig swt equipments rig swt equipments load load inventory swt equipments spare maintain progres previous employe dreser rand pvt client oil natural gas corporation nazira asam operation maintenance gas compresor designation instrumentation technician period calibration instrument like presure gaug presure transmiter presure switch pneumatic controler main tain repair logic control system gas compresor check stop pump acording emergency conection field instru ments haz ardous area control panel maintain progres aprenticeship aprenticeship oil corporation qualification examination grade pasing instrumentation scte hrh prince wales enginering technology hslc seba bamun pukhuri hslc ahsec strength optimistic energetic build leadership enjoy responsibility like acept chalenges declaration declare statements aplication true corect jorhat regard hirok,"(curiculam, vitae, hirok, jyoti, borah, instrumentation, enginering, mail, borahirokjyoti, , objective, loking, , prestigious, , previous, , capabilities, put, eficient, , pose, efective, organiz, ational, , ability, supervise, develop, final, output, , father, , prodip, borah, permanent, aders, vil, bamun, pukhuri, pachauni, gaon, bamun, pukhuri, pin, dist, jorhat, state, asam, , birt, religion, hindu, nationalities, , marit, , unmaried, sex, male, language, know, asamese, english, hindi, , curent, employe, techno, canada, inc, , surface, , client, oil, , designation, daq, perator, , swt, equipments, operate, presure, monitor, temperature, monitor, writen, field, read, shet, field, analysis, colecting, liquid, sample, data, header, ...)"
2,graduation george brown enginerin rice houston statistics psychology graduation george brown enginering rice houston tec hnical python sql sas unix script excel ace pig latin tableau mapreduce data mine quantitative financial analytics multivariate analysis financial stochastic risk management regr esion statistical compute sas certify programer sas large scale web graph procesing aws sumer hadop pig amazon analyze dataset web graph data bilion vertices compute histograms graph show stribut ion webpage linkages conectivity web graph web paterns page rank algorithm measure webp twets sentiment analysis movie recomendation sumer derive sentiment score real twets evaluate popular movies twets python twiter stream api evaluate similarity betwen movies calculate corelation sentiment score produ ced recomendations similar movies stock performance earn visualization tableau sumer python analysis stock earn performances ipo technology pharmaceutical ene rgy entertainment financial industries create interactive dashboa post ipo analysis industries file amount tableau direct comunication interpretation result ice heat oil futures ana lysis analysis excel trend seasonality forward curve heatin oil futures propose holistic evaluations phenomenon observe predict general trend heat oil futures evaluati market suporting market customer risk prediction nationwide insurance sumer busines analytics consultant market strategy initiate statistical mod eling combine psycholo gical theories sas customer risk tolerance facilitate efective segmentation target customers acquire proficiency data retrieval teradata database sql ensure concurency coherence qual ity analysis depth understand database stru cture insurance products methodology result executives create sesions methods tols promote ovative data analysis aproaches kagle data mine competition fal titanic survival prediction achieve acuracy identify survive pasengers prediction system sing feature scale manipulation gausian kernel svm algorithm integr ated close inspection specific trend data overules general algorithm computer vision construc ted clasification system ensemble kern svm logistic regresion random forest algorithms clasify image categories achieve suces rate huricane damage rice frequency huricane ocurence damage result mix poison regresion methods predictions basis risk management windstorm insurance,"( , gpa, graduation, , george, brown, , enginerin, rice, , houston, texas, degre, statistics, psychology, gpa, graduation, , george, brown, , enginering, rice, , houston, texas, , qualifications, tec, hnical, , python, sql, sas, unix, script, excel, ace, pig, latin, tableau, mapreduce, coursework, , science, data, mine, quantitative, financial, analytics, multivariate, analysis, financial, , series, stochastic, , risk, management, regr, esion, statistical, compute, qualifications, sas, certify, programer, sas, , large, scale, web, graph, procesing, aws, sumer, , hadop, pig, amazon, analyze, dataset, web, graph, data, bilion, vertices, compute, degre, histograms, graph, show, stribut, ion, webpage, linkages, demonstrate, conectivity, web, graph, ...)"
3,cienega avenue angeles cloutie remix pati makeup artist cloutier remix patiramseybortoli celebrities aron fresh adam carola adrian brody amy adams antonio sabato apolo ohno bily blank bily ray cyrus blythe daner bob newhart bodhi elfman brian setzer oke burke bruce dern charlote cheryl burke chet adkins chris noth cody linley corey feldman daryl hanah denis miler derek luke dony osmond draw draw lachey dylan mcdermot edyta sliwinska eliot gould emit smith erin andrews franscico rod riquez gary dourdan george wendt hary hamlim holy madison ian ziering ice cube jack osbourne jery spinger joe pantoliano joey fatone karina smirnof kate goslin kely preston kely osbourne kery fisher kristi yamaguchi kym johnson lanc bas leza gibons mat blanc mathew mconaughey hamer nastasja kinski nick canon nicole egert nigel barker omar sharif peter frampton regina king robert wagner ron livingston suzane sumers tery chin tisha campbel waren sap man zac osen television dance sta season abc makeup celebrity circus ful season abc makeup dance war bruno carie ful season abc nitelife nicolodeon makeup bob newhart lifetime makeup film face value michael miler makeup determination dea michael miler makeup mother testimony julian chojnacki makeup riperman phil sear makeup tough deadly david katz makeup leprechaun rodman flender makeup pol scot bagley makeup zipers clown palace dan golden makeup aint nothin without shop art david katz makeup cheyene warior mark grifith makeup blade warior katz asistant secrets lake suces frank fisher makeup intern cienega avenue angeles cloutie remix pati makeup artist comercials calaway golf dany levinson makeup odysey golf dany levinson makeup payles shoe dany levinson makeup wels fargo dany levinson makeup hamburger helper dany levinson makeup chicken helper dany levinson makeup tuna helper dany levinson makeup music vidoes jaguar ice cube grag makeup ice frogy frog grag makeup empire sportswear catalog fit pregnancy glamour headshots muscle catalog kaiser permanente confidential natural health ocean parent readers digest robinson show beauty suit oscars golden globes emys sag amas emy tanding makeup dance star emy nomination makeup dance star,"(cienega, avenue, los, angeles, california, cloutie, remix, , pati, ramsey, bortoli, makeup, artist, cloutier, remix, , patiramseybortoli, celebrities, aron, fresh, adam, carola, adrian, brody, amy, adams, antonio, sabato, apolo, ohno, bily, blank, bily, ray, cyrus, blythe, daner, bob, newhart, bodhi, elfman, brian, setzer, oke, burke, bruce, dern, charlote, martin, cheryl, burke, chet, adkins, chris, noth, cody, linley, corey, feldman, daryl, hanah, denis, miler, derek, luke, dony, osmond, draw, draw, lachey, dylan, mcdermot, edyta, sliwinska, eliot, gould, emit, smith, erin, andrews, franscico, rod, riquez, gary, dourdan, george, wendt, hary, hamlim, holy, madison, ian, ziering, ice, cube, jack, osbourne, jery, spinger, joe, ...)"
4,page ahendra penwal barechina almora mahendra singh penwal goal orient land survey operations management survey draw operations civil construction management schedule survey documentation technical data mainte nance flair adopt modern methodologies systems compliance quality standards ancho ring survey infrastructure activ ities distinguish abilities understand scope technical discusio selection equipment ensure compliance prescribe clients statutory regu lations deftnes schedule monitor respect cost resource deployment quality compliance ensure timely execution asignments skiled design implement stringent systems quality manuals ensur quality standards stag adept inspect alyzing propose ensure caried specific standards implement hse ensure meet quality standards specifications materials technical submitals methodology rect construction eficient teach long implementation standards skiled development apropriate methods along improvement exist ones distinguish abilities maintain iso standards sound latest sur vey instrument total station auto along techniques onshore oil gas exploration dgps operations excavation blast methods rock deft colecting survey data highway survey onsite const ruction layout survey traverse survey crew supervision check gradient ogl colection alignment mark quantity check colection efective comunicator ability view larger picture skiled take busines critical decisions enact improvements align culture organization strategy vision mision objectives since gvk power infrastructures telangana surveyor apr shiv vani oil gas exploration service surveyor enginers surveyor aprentice abm enginering surveyor gurgaon traine surveyo enginering operations skil profile management survey construction operations page overal operations execute construction within cost norms organise metings evaluate progres botlenecking anchor construction ensure completion within cost parameters efective resource utilization maximize output survey structural draw operations maintain sketch map legal des criptions survey describe certify asume liability data chart plot map survey kind survey build con struction infrastructure knowledgeable autocad ofice software land survey operations data shape contour gravitation elevation dimension land land feature near earth surface enginering mapmaking mine land construction purpose direct survey establish legal boundaries properties legal deds write descriptions property boundary survey deds lease legal compute geodetic measurements interpret survey data determine shape elevat ions geom orphic topographic feature result survey shape contour elevation dime nsions land land feature monitor overal operations execute construction within cost norms metings evaluate progr botlenecking oversee progres entail progres enginering procurement construction comisioning schedule joint survey client consult ant order achieve target carying buil survey total station dgps system transfer shi fting cordinates bench mark carying section section permanent structure come route line intain sketch map legal descriptions survey order describe certify asume liability academic civil enginering karnataka state open mysore iti land survey govt iti skil autocad auto loter gogle earth glober maper arc gi ofice word powerpoint leica survey ofice geomax survey ofice seismic birth please anexure execute anexure gvk power infrastructures traverse traverse adjustment bench mark shift topographical survey prop osed power colecting ogl data border alignment revenue survey dam indo border bro section section quantity calculation biling page supervision wapcos proje hep goriganga river supervision excavation survey data entry curve fix ordinate establish bench mark traverse data dam calculatio survey conversant dgps system total station auto survey equipment survey verify cal pro file topographical survey cisa airport mumbai modification layout leve ling survey runway structure dgps survey traverse shiv vani oil gas exploration service seismic data acquisition survey oil tank lump area seismic data acquisition survey shiv vani ongc jorhat basin khowai kalyanpur block adjoin bangladesh border west tripura enginers topographical survey propose power generation reliance power generation dadari railway line survey propose pasenger cariage speed track khurja railway station mathura railway station metro station layout survey isbt utam nagar dmrc pier layout underground tunel layout underground utility survey dmrc rou survey dmrc direction dilshad garden noida sector length cng pipe line route survey dhoulakuan azadpur physical survey propose multi stories build plot rohini sector aprox acre fly lay survey molchand raja garden south extn ring lay dlf shop complex mal multi stories build gurgao lay dlf shop complex mal ulti stories build gurgao survey ntpc dadri pow mark bore hole propose draw contour survey bsf shoting range campus indore survey dmrc isbt utam nagar isbt rohi ier layout survey raja arden utam agar dmrc water pipe line route survey rutland island man nicobar island wapcos andaman harbor ahw transmision line route survey chandigarh panchkula aprox transmision line route survey nhpc contra basis tawaghat pithoragarh aprox utarakhand abm enginering alignment survey cpwd border division indo pak border guj rat sector dlf sewerage gurgaon rvey widen jalandhar bhogpur stretch punjab rites survey stel asansol west bengal isco rites aprox area acre topographical survey prop osed mihan multi hub airport nagpur nagpur maharashtra state consultancy rambol aprox area acre topographical survey propose airport ship park frequency radar establish aval seabird karwar karnataka state area acre mahendra singh penwal,"(page, ahendra, penwal, , barechina, almora, mahendra, singh, penwal, goal, orient, , land, survey, operations, management, survey, draw, operations, civil, construction, , management, schedule, , survey, , documentation, technical, data, mainte, nance, flair, adopt, modern, methodologies, systems, compliance, quality, standards, ancho, ring, , survey, infrastructure, activ, ities, distinguish, abilities, understand, , scope, , technical, discusio, advise, selection, equipment, ensure, compliance, prescribe, , clients, statutory, regu, lations, deftnes, , schedule, monitor, , respect, cost, resource, deployment, , run, quality, compliance, ensure, timely, execution, asignments, skiled, design, implement, stringent, systems, quality, , manuals, ensur, , quality, standards, stag, , adept, inspect, alyzing, ...)"
...,...,...
1529,tan xing rui mark markraytan deloite touche general audit intern audit fieldwork client sit stock take statutory audit require fieldwork asisted control mind map flow ensure apropriate control cast financial statements unite overseas uob trade operations centre troc import trade admin asistant contract procesed aplications clean invoice finance retirement acounts receipt due liaise external customers aplication setlement instructions dbs vickers securities equity setlements intern asisted setlement trade acounts contra acounts sort mail acount statements caried hoc melbourne comerce double acounting finance polytechnic apr mar financial service financial trade certifications computer competent microsoft word powerpoint excel fluent mandarin advance leadership workforce development agency wda september curicular mindef comisioned oficer arm force saf october capacity platon comander batal ion artilery asisted supervision sea game fun pack pack distribution polytechnic lifesaving society spls apr lifesaving competitions events certify lifeguard lifesaving society bronze medalion certify ability multitask manage adaptive situations interpersonal comunication,"(tan, xing, rui, mark, markraytan, , deloite, touche, , general, audit, intern, , audit, fieldwork, , client, sit, , stock, take, statutory, audit, require, fieldwork, asisted, , control, mind, map, , flow, ensure, apropriate, control, , cast, financial, statements, unite, overseas, , uob, , trade, operations, centre, troc, import, trade, admin, asistant, contract, procesed, aplications, clean, invoice, finance, retirement, acounts, , receipt, due, liaise, , external, customers, aplication, setlement, instructions, dbs, vickers, securities, , equity, setlements, , intern, asisted, setlement, trade, acounts, contra, acounts, sort, mail, acount, statements, caried, , hoc, duties, , administrative, suport, , melbourne, , comerce, double, ...)"
1530,tih investment management investment analyst tih close end equity fund exchange strategic partner parent hedge fund argyle stret management asm tih ben invest alongside asm situation deal acros tih asm manage asets exces bilion fund milion vanila loan indonesian conglomerate sucesfuly close within wek execution negotiation term investment proposal extensively legal documentation fund milion structure loan indonesian conglomerate face term liquidity squeze investment proposal investment return scenarios extensively legal documentation analysis publicly trade bond result signi cant within manage fund analysis close end fund sucesfuly result activist investment generate aproximately absolute return month period investment memo due diligence ntech startup thailand sed nancing round fund venture fund manage tih streamline back save man hours provident partner jakarta indonesia analyst provident investment management busines southeast analyst ben aspects source structure nancing optimise monitor investments culture entrepreneurial strong network particularly indonesia general finance investment analysis rst cut analysis often nancial sucinct investment memos range busineses acros industries invest telecomunications infrastructure technology metal mine unconventional oil gas biomas trade restructure growth startup operations malaysia indonesia deploy digital advertise technologies facilitate infusion substantial money investors create inventory management cash provident biomas busines encapsulate sales forecast inventory ows cash movements enable uper management beter alocate sales ciently manage valuation unconventional oil gas focus production eagle ford shale basin usa specialist determine typical curve compare management estimate arps decline formula sensitivity analysis mezanine nancing return feasibility grade indonesian coper asx cash waterfal mechanisms enable discovery range tolerable payment kind pik cash margin identify cash swep amount repayment horizon valuation potential mvna virtual network agregator busines aim expand operations south east structure actively engage parent executives input busines development primary secondary payment landscape indonesia principals strategic directions acquire debt market finance transactions closely management investment lawyers two bond isuances achieve highest subscribers indonesian tower bersama infrastructure tbk tbig click linkedin recomendations page junguang tan cfa junguang tbig inaugural bond milion regs paper isued par tbig bond milion regs paper isued par part tbig execute nancing debt programe facility agrements amount bilion syndication hedge counter party tbig market capitalisation aproximately bilion foreign curency risk management analyse investe rate hedge strategy recomended aproach swap option structure meet hedge objectives achieve target cost monitor price diferent hedge instrument worth bilion notional value result competitive bid save treasury milions dolars apic technologies founder apic pioner wireles queue management system restaurants ticktok ticktok invaluable operational tol restaurants manage large crowd efectively without compromise service quality service sit milion diners end recently expand spearhead busines development achieve kpis ful vest spring fund tranches acquire close merchant partnerships pitch ticktok numerous venture fund strategic investors advance subscription agrement ticktok acquire chope late undisclosed seven gure consideration deply aspects exit negotiation valuation acounting legal general asembly data part mas michigan coursera python ace web data python data structure charter financial analyst cfa pased cfa exams atempt management magna cum laude dean double degres acountancy busines management finance chian smu agship ful merit pro ciencies microsoft suite blomberg service python capabilities native pro ciency pro ciency mandarin ing hanban busines mandarin advance acreditation conversational bahasa indonesia click linkedin recomendations page,"( , tih, investment, management, , investment, analyst, tih, close, end, , equity, fund, , exchange, strategic, partner, parent, hedge, fund, argyle, stret, management, , asm, tih, ben, invest, alongside, asm, , special, situation, deal, acros, , tih, asm, manage, asets, exces, bilion, , fund, milion, vanila, loan, indonesian, conglomerate, sucesfuly, close, within, wek, execution, , negotiation, term, , investment, proposal, extensively, , legal, documentation, , fund, milion, structure, loan, indonesian, conglomerate, face, short, term, liquidity, squeze, , investment, proposal, , investment, return, , scenarios, extensively, , legal, documentation, , research, analysis, publicly, trade, bond, result, signi, ca, nt, , within, ...)"
1531,wamala simon peter clif mail theglobeug hotmail clif val yahomail mob profile enthusiastic enjoy part sucesful productive quick grasp ideas concepts developi inovative solutions problems initiative motivation require meet tightest deadlines objective build technical within suitably chalenging role ken achieve development makerere iversity ordinary telecomunications enginering uganda comunication technology nakawa radio television electronics technology prt nakawa vocational computer hardware network repair maintenances nakawa vocational uganda advance uganda cleaner production reneweable energy low cost productions uganda cleaner production unido demonstrable competence ind ependently ability wilingnes folow protocol enginering ethics span control staf apraisals depth analysis fault diagnose machine demonstrable competence industrial health safety environmental concepts fire fight computer literate comunication skil maintenance supervisor sadolin paint uganda plot stret industrial area box kampala uganda management overse repair factory overse maintenance factory outsource part asign maintenance dairy wekly maintenance overse machine operation overse health safety factory supervisor fire uganda plot faraday bugolobi box kampala uganda overse ordinate contractors source materials health safety instalations electrical efluent treatment supervisor phenix logistics uganda supervisor efluent treatment electrical section asist mulation implementation production execute instalations service maintenance repair machine function machine operate quality control package products enforce safety control measure ensure operational safety machine equipment safety staf initiate production improvement aproval head generator electrical generaly mechanical units pre operational inspection generator system adjust protective relay circuit breakers meet system requirement necesary require form necesary paper timely maner require spond user inquire courteous maner information area asignment resolve complaints eficient timely maner estimate materials equipment require asigned requisition materials require operate facility emergencies acordance establish necesary power management electrical electronics repair maintenance transformers switch gear boiler operation trouble shoting boiler maintenance repair compresor maintenance repair meter power distribution basic mechanics basic weld water treatment take keping safety precautions protection hardwar technician technologies repair maintenance computers trouble shoting computer instalations network repair software instalations coen oberholster enginering manager kansaiplascon buyungo charles manager phenix logistics semakula joseph manager core network uganda telecom,"(wamala, simon, peter, clif, mail, theglobeug, hotmail, , clif, val, yahomail, , mob, profile, enthusiastic, , enjoy, part, , sucesful, productive, , quick, grasp, , ideas, concepts, developi, inovative, creative, solutions, problems, able, , initiative, demonstrate, , motivation, require, meet, tightest, deadlines, objective, loking, build, technical, , within, suitably, chalenging, role, ken, achieve, , development, , qualifications, makerere, iversity, ordinary, diploma, telecomunications, enginering, uganda, , comunication, technology, nakawa, , radio, television, electronics, technology, prt, nakawa, vocational, , computer, hardware, network, repair, maintenances, nakawa, vocational, , uganda, advance, , uganda, , cleaner, production, reneweable, energy, low, cost, productions, uganda, cleaner, production, ...)"
1532,rice iversity houst texas master statistics cumulative gpa pected coursew ork advance statistical method financial data analysis machine learn gre quan titati verbal linois rbana champaign urban linois bachel ience tural resource environ mental ciences gpa aplied statistical methods natural resource economics gi natural resource management sit math write verbal experien rice houston fal quantitative financial analytics analysis financial statements available sec file devise compare selection strategies fundamental analysis historical data wrds strategy outperform benchmark percent cagr rice iversity houst sumer asistant alyzed pulation census data sas excel poison gresio alyses evalence rate hepatitis respe hnicity liver cancer inciden rate hypothesis heavy metal polution data gather houston ship chanel find city houston environmental mini stry environmen satel lite vironment center beijin sumer gi data analy utilize arcgis excel analyze geographical data create map grind census data satelite image data nature reserve maintain natural nature reserve database information improve acuracy eficiency sql excel asisted trans lation state nat ions cosystems chinese contain word champaign sumer campus mbasador overs campa ign result ver upload hero udy latform donation boks africa hero movement leverage viral market media facilitate significant increase active user trafic initiate establish partnership campus organization profesors earn hero hon rol mon october performan highes oks donate ils act ivities technic arcgis blomberg jmp matlab python sas sql fluent mandarin glish swimin cokin video game poker,"( , rice, iversity, houst, texas, master, science, statistics, cumulative, gpa, pected, coursew, ork, advance, statistical, method, financial, , series, data, analysis, machine, learn, gre, quan, titati, verbal, , linois, rbana, champaign, urban, linois, , bachel, ience, tural, resource, environ, mental, ciences, , gpa, coursework, aplied, statistical, methods, natural, resource, economics, gi, natural, resource, management, sit, math, write, verbal, experien, , rice, , houston, fal, quantitative, financial, analytics, , extensive, research, analysis, financial, statements, available, sec, file, devise, compare, , selection, strategies, fundamental, analysis, , historical, data, wrds, , strategy, outperform, benchmark, percent, , cagr, rice, iversity, houst, sumer, research, asistant, ...)"


In [29]:
tfidf2 = TfidfVectorizer(stop_words='english')
doc_words2 = tfidf2.fit_transform(df_after_2.TEXT)
tfidf_df2 = pd.DataFrame(doc_words2.toarray(),columns=tfidf2.get_feature_names())



In [30]:
topic_nums = list(np.arange(10, 20, 1))
for num in topic_nums:
    nmf = NMF(num)
    nmf.fit_transform(doc_words2)
    display_topics(nmf, tfidf2.get_feature_names(), 100)
    print("------------------------------------------")




Topic  0
investment, market, busines, financial, analysis, management, finance, equity, sales, analyst, valuation, deal, credit, industry, diligence, risk, clients, develop, build, trade, client, structure, stock, real, cfa, advisory, sector, potential, acros, strategy, development, institutional, products, intern, economics, estate, product, consumer, debt, target, manage, acquisition, pitch, blomberg, milion, ipo, loan, analyse, analyze, transaction, coverage, sel, investments, usd, securities, strategic, identify, exchange, manager, focus, data, gpa, oportunities, strategies, buy, investors, transactions, dcf, relationship, revenue, raise, retail, performance, service, sectors, asisted, create, select, beijing, rat, wealth, chinese, cros, comite, trend, shanghai, nus, relationships, fluent, firm, mandarin, generate, price, achieve, ideas, student, competition, share, term, bond

Topic  1
philipines, mandaluyong, terminals, client, city, pasay, apostolic, jaleco, bicutan, pasword, f




Topic  0
investment, market, busines, financial, analysis, management, finance, equity, sales, analyst, valuation, deal, credit, industry, diligence, risk, clients, build, develop, structure, real, client, stock, trade, cfa, sector, advisory, potential, strategy, acros, institutional, development, intern, products, estate, economics, product, consumer, debt, target, acquisition, pitch, blomberg, ipo, milion, manage, coverage, analyse, sel, transaction, analyze, loan, usd, investments, strategic, securities, identify, exchange, focus, oportunities, gpa, buy, manager, strategies, dcf, data, investors, transactions, relationship, raise, retail, revenue, sectors, performance, asisted, select, create, rat, wealth, service, chinese, comite, trend, beijing, cros, shanghai, nus, ideas, relationships, mandarin, fluent, generate, share, firm, achieve, student, competition, bond, term, comercial

Topic  1
philipines, mandaluyong, terminals, client, city, jaleco, apostolic, pasay, bicutan, paswor




Topic  0
investment, market, busines, financial, analysis, management, finance, equity, sales, analyst, valuation, deal, credit, industry, diligence, risk, clients, build, develop, structure, client, real, stock, trade, cfa, sector, advisory, potential, strategy, acros, institutional, development, intern, products, estate, economics, product, consumer, debt, acquisition, target, pitch, blomberg, ipo, milion, manage, coverage, analyse, sel, transaction, analyze, loan, usd, investments, strategic, securities, identify, exchange, focus, oportunities, gpa, buy, manager, strategies, dcf, data, investors, transactions, relationship, retail, raise, revenue, sectors, performance, asisted, select, create, rat, wealth, service, chinese, trend, beijing, comite, cros, shanghai, nus, ideas, relationships, mandarin, fluent, generate, share, firm, competition, achieve, bond, student, term, comercial

Topic  1
philipines, mandaluyong, terminals, client, city, pasay, bicutan, jaleco, apostolic, paswor




Topic  0
investment, market, busines, financial, analysis, management, finance, equity, sales, analyst, valuation, deal, credit, industry, diligence, risk, clients, build, develop, structure, client, real, stock, trade, cfa, sector, advisory, potential, strategy, acros, institutional, products, development, intern, estate, economics, product, consumer, debt, acquisition, target, pitch, blomberg, ipo, milion, manage, coverage, analyse, sel, transaction, loan, analyze, usd, investments, strategic, securities, identify, exchange, focus, oportunities, gpa, buy, manager, strategies, dcf, investors, data, transactions, relationship, revenue, retail, raise, sectors, performance, select, asisted, rat, create, wealth, service, chinese, trend, beijing, cros, comite, shanghai, nus, ideas, relationships, mandarin, generate, fluent, share, firm, achieve, competition, bond, term, student, comercial

Topic  1
philipines, mandaluyong, terminals, client, city, apostolic, bicutan, pasay, jaleco, paswor




Topic  0
investment, market, busines, financial, analysis, finance, management, equity, sales, analyst, valuation, deal, credit, industry, diligence, risk, clients, build, develop, client, structure, real, stock, trade, cfa, sector, advisory, potential, institutional, acros, strategy, products, intern, estate, economics, development, product, consumer, debt, acquisition, target, pitch, blomberg, ipo, milion, manage, coverage, analyse, sel, transaction, loan, analyze, investments, usd, strategic, securities, exchange, identify, focus, oportunities, gpa, buy, manager, strategies, dcf, investors, data, relationship, transactions, retail, raise, revenue, sectors, performance, asisted, select, service, rat, wealth, create, chinese, beijing, trend, cros, comite, shanghai, nus, ideas, relationships, mandarin, fluent, generate, share, firm, bond, achieve, competition, term, student, price

Topic  1
philipines, mandaluyong, terminals, client, city, apostolic, bicutan, pasay, jaleco, pasword, f




Topic  0
investment, financial, analysis, equity, finance, valuation, market, analyst, busines, deal, management, industry, diligence, cfa, real, sector, stock, estate, economics, structure, consumer, ipo, build, acquisition, advisory, credit, debt, investments, intern, blomberg, analyse, dcf, sel, gpa, fund, transaction, pitch, coverage, buy, potential, analyze, sectors, chinese, milion, transactions, strategy, select, risk, shanghai, usd, focus, exchange, asisted, beijing, acros, student, rat, fluent, mandarin, develop, trend, property, raise, loan, bilion, securities, data, investor, lbo, investors, native, competition, ideas, evaluate, sumer, quantitative, analysts, charter, fundamental, nus, institutional, trade, strategic, presentation, comparable, legal, consult, comite, performance, forecast, target, materials, value, master, sale, earn, write, share, honor, tmt

Topic  1
philipines, mandaluyong, terminals, city, client, bicutan, jaleco, pasay, apostolic, pasword, february, cu




Topic  0
investment, financial, analysis, equity, finance, valuation, market, analyst, busines, deal, management, industry, diligence, cfa, real, sector, stock, estate, economics, structure, consumer, ipo, build, acquisition, advisory, credit, debt, investments, intern, blomberg, analyse, dcf, sel, gpa, fund, transaction, pitch, coverage, buy, potential, analyze, sectors, chinese, milion, transactions, strategy, select, risk, shanghai, usd, focus, exchange, asisted, beijing, acros, student, rat, fluent, mandarin, develop, trend, property, raise, loan, bilion, securities, investor, lbo, data, investors, native, competition, ideas, evaluate, sumer, quantitative, analysts, charter, fundamental, nus, institutional, trade, strategic, presentation, comparable, legal, consult, comite, performance, forecast, target, materials, value, master, sale, earn, write, share, honor, tmt

Topic  1
philipines, mandaluyong, terminals, city, client, jaleco, pasay, apostolic, bicutan, pasword, february, cu




Topic  0
investment, market, busines, financial, analysis, finance, equity, management, sales, analyst, valuation, deal, credit, industry, diligence, risk, clients, build, develop, real, stock, structure, cfa, trade, sector, advisory, potential, institutional, acros, strategy, intern, economics, estate, products, consumer, client, debt, development, product, acquisition, pitch, blomberg, target, ipo, milion, manage, sel, analyse, coverage, transaction, analyze, loan, investments, usd, strategic, securities, exchange, gpa, oportunities, buy, focus, identify, dcf, manager, strategies, investors, transactions, data, sectors, raise, revenue, retail, relationship, asisted, performance, select, rat, chinese, wealth, beijing, create, trend, comite, service, cros, shanghai, nus, mandarin, ideas, fluent, share, relationships, firm, generate, competition, bond, term, bilion, investor, legal

Topic  1
philipines, mandaluyong, terminals, client, city, jaleco, bicutan, pasay, apostolic, pasword, f




Topic  0
investment, financial, analysis, equity, finance, valuation, analyst, market, busines, deal, management, industry, diligence, cfa, real, sector, stock, estate, economics, structure, consumer, ipo, build, acquisition, advisory, credit, debt, investments, intern, blomberg, dcf, analyse, fund, sel, gpa, transaction, pitch, coverage, buy, potential, analyze, sectors, chinese, milion, transactions, strategy, select, risk, shanghai, focus, usd, asisted, exchange, beijing, acros, student, rat, fluent, mandarin, trend, develop, property, raise, loan, bilion, investor, data, lbo, securities, investors, native, ideas, competition, evaluate, sumer, quantitative, analysts, charter, fundamental, nus, comparable, institutional, strategic, consult, legal, performance, comite, trade, presentation, forecast, target, materials, value, master, sale, earn, write, share, tmt, honor

Topic  1
philipines, mandaluyong, terminals, city, client, jaleco, apostolic, bicutan, pasay, pasword, february, cu


Topic  0
investment, financial, analysis, equity, finance, valuation, analyst, market, busines, deal, industry, management, diligence, cfa, sector, real, stock, estate, consumer, economics, ipo, structure, credit, acquisition, build, advisory, debt, intern, dcf, blomberg, analyse, gpa, sel, investments, transaction, coverage, buy, pitch, analyze, potential, sectors, chinese, transactions, milion, risk, select, strategy, shanghai, exchange, focus, asisted, beijing, usd, student, rat, loan, acros, fluent, property, trend, lbo, data, mandarin, bilion, develop, securities, native, evaluate, ideas, sumer, raise, competition, investor, quantitative, analysts, fundamental, charter, comparable, forecast, presentation, nus, comite, strategic, consult, earn, legal, trade, investors, performance, materials, write, tmt, master, fund, value, sale, merger, target, share, honor

Topic  1
philipines, mandaluyong, terminals, city, jaleco, bicutan, apostolic, pasay, client, pasword, february, customers



### Second Itration of Removing Domain Specific Word

In [31]:
stop_words_2 = ['resume', 'list', 'use', 'position',
                'job', 'present', 'include', 'special',
                'carer', 'adres', 'relevant', 'sumer', 'leter', 'fal',
                'state', 'parent','volunter', 'cover', 'wil',
                'section', 'comunity', 'member', 'objective','intern', 
                'society', 'title', 'make', 'clas', 'center', 'sample', 'august',
                'employer', 'internship', 'monash', 'page', 'graduate', 'profesional', 'asisted', 'language',
                'campus', 'honors', 'help', 'city', 'public', 'word', 'asociation', 'year', 'bachelor',
                'staf', 'contact','format', 'events', 'chicago', 'specific', 'date', 'social', 'nebraska', 
                'point', 'neds','project', 'use','team', 'end', 'create','proces','document','role',
                'experience', 'case', 'requirement', 'description', 'report', 'worked', 'used',
                'object', 'like', 'lead', 'source', 'custom', 'provide', 'enterprise', 'tol', 'hibernate',
                'new',  'team', 'representative', 'store', 'teritory', 
                'year','brand', 'experience', 'train', 'key', 'city', 'goals', 'profesional', 'years',
                'generate', 'present', 'regional', 'anual', 'quota', 'executive', 'representatives', 'lead',
                'achieved', 'build', 'skils', 'client', 'rep', 'oportunities', 'achievements', 
                'york', 'latin', 'region', 'achieve', 'texas', 'entries', 'knowledge', 
                'asociate', 'direct', 'industry', 'profitability', 'position', 'promotion', 'plan', 'consistently',
                'distribution', 'national', 'university', 'online', 'leadership', 'advertise',
                'kansas', 'profit', 'major', 'global', 'responsible', 'internet', 'bachelor', 'international',
                'statements','prepared', 'prepare', 'corporate', 'ledger', 'capital', 'company',
                'reconciliation', 'skils', 'experience','receivable','flow', 
                'internal', 'ability', 'preparation', 'entry', 'principles', 'parkland', 'general', 'project',
                'proces', 'plan', 'new', 'anual', 'quarterly','resume', 'cityland', 'review',
                'chicago', 'team', 'performed', 'work', 'cpa',
                'profesional', 'chartered', 'level', 'bachelor', 'fitzwiliam', 'email', 
                'strong', 'main', 'qualifications', 'performance', 'record', 'stret', 'senior', 'university'
                'exprience', 'details', 'months', 'les', 'description', 'maharashtra', 'year', 'company', 'pune',
                'january', 'monthscompany', 'mumbai', 'pvt', 'nagpur','india', 'june', 'universityhrskil', 
                'work', 'diploma', 'working', 'hibernate', 'worked', 'nashik', 'god', 'telangana', 'skils', 
                'requirement', 'learn', 'django', 'cricket', 'phule', 'savitribai', 'knowledge',
                'board', 'skil', 'amravati', 'secondary', 'university', 'role', 'servlet', 
                'institute', 'duration', 'title', 'solution', 'pradesh',
                'days', 'like', 'angular', 'job', 'solapur',
                'dayjob', 'personal', 'birmingham', 'template', 'coventry', 'able', 'use', 'info', 
                'available', 'work', 'skils', 'download', 'ability', 'distribute',
                'com', 'ccopyright', 'fod', 'copyright', 'make', 'permision', 'welcome', 
                'question', 'loking','prior', 'read', 'example', 'hons', 'cvs', 'page', 'help', 'nuneaton', 'gary',
                'company', 'duties','references', 'pas', 'jobsekers',
                'areas', 'relate', 'key', 'request', 'resume', 'competencies', 'ambitious', 'email',
                'driving', 'yes', 'restaurant', 'vyse', 'sekers', 'suitable', 'expertise', 'level',
                'qualifications', 'team', 'understand', 'pose', 'god', 'experience', 'maner',
                'knowledge', 'license', 'particular', 'highly', 'create', 'confident', 'nationality',
                'write', 'responsible', 'profesional', 'having', 'peg', 'staf', 'wel', 'excite', 
                'project', 'providing','project', 'record', 'environment',
                'train', 'company', 'performance',  'plan', 'implement', 'include', 'staf', 
                'team',  'new', 'resource', 'profesional', 'conduct', 'reduce', 'procedures',
                'relations', 'training', 'executive', 'personel', 'xae', 'senior', 'job', 'result', 
                'candidates', 'isues','department', 'division', 'improve','change', 'identify', 
                'leadership', 'lead', 'provide', 'time', 'work', 'level', 'save',
                'project', 'work','plant','team', 'specifications', 'plan', 'environmental','experience',
                'company', 'knowledge', 'include', 'india','phase', 'client',
                'complete', 'preparation', 'society','wind','scope', 'sub', 'completion', 
                'excelent', 'forte', 'god', 'skils', 'poses', 'key', 'air', 'structure', 'prepare',
                'farm','multitasking', 'agriculture', 'qualification', 'til', 'know', 'horticulture',
                'fluent', 'knowledge', 'profesional', 'template', 'florida', 'latin',
                'date', 'proficient', 'presentations', 'pest', 'gadgets', 'significant',
                'report', 'degre', 'achievement', 'player', 'resume', 'english', 'languages',
                'include', 'behavior', 'interests', 'skiled','various', 'cary', 'adept', 'bachelor', 'com',
                'crop', 'wild', 'extensive', 'stret', 'gmail', 'amigo',
                'nursery', 'supervisor', 'update', 'experience', 'miami', 
                'exceptionaly', 'wel', 'joye', 'diploma', 'harvest', 'won', 
                'mower', 'objective', 'sonata', 'carer', 'thompson', 'study', 'gren', 
                'display', 'understand','new', 'skils', 'qualifications', 'strong', 'open',
                'teler', 'cal', 'procedures', 'ability', 'work', 'knowledge',
                'check', 'phone', 'experience', 'asistant', 'excelent', 'client', 
                'god', 'georgia', 'representative', 'telers', 'daily', 'various', 
                'objective', 'asisted', 'sumary', 'profesional', 'provide', 'request', 'jersey', 
                'parkland', 'personalize', 'cals', 'policies', 'reference', 'cityland',
                'efective', 'position', 'handle', 'center', 'provided', 'carer',
                'handled', 'seking', 'fod', 'com', 'acount', 'email',
                'high', 'orient', 'interpersonal', 'properties', 'order', 'furnish', 'train', 'handling',
                'adept','york', 'present', 'nevada' ,'experience', 'campaign', 'frelance', 'online', 'work',
                'clients', 'xad','include', 'gogle','new', 'mobile', 'create',
                'facebok', 'galery', 'layout', 'university','mac', 'model', 'concept', 'layouts',
                'concepts', 'present', 'monash', 'team', 'york', 'lead', 'created', 
                'california', 'corporate', 'twiter', 'company', 'usability',
                'ideas', 'sample', 'client', 'skils', 'direction','care', 
                'asociation', 'center', 'comunity', 'xad', 'practice', 'present', 
                'university', 'member', 'department', 'terminology', 'eagan', 'staf', 'schol'
                'american', 'provide', 'education', 'public', 'colege', 'new'
                'carolina', 'york', 'conference', 'board', 'group', 
                'state', 'ability', 'experience', 'knowledge', 'kovner', 'perform', 'cordinator', 
                'profesional', 'condition', 'train', 'family', 'home', 'biling', 'registered', 
                'record', 'general', 'seidman', 'work','anual', 'ray', ]

In [32]:
df_after_3 = remove_domain_stop_words(stop_words = stop_words_2, df_edit = df_after_2)

In [33]:
tfidf3 = TfidfVectorizer(stop_words='english')
doc_words3 = tfidf3.fit_transform(df_after_3.TEXT)
tfidf_df3 = pd.DataFrame(doc_words3.toarray(),columns=tfidf3.get_feature_names())
tfidf_df3

Unnamed: 0,abac,abacus,abadian,abaete,abakus,abandon,abandonment,abas,abasc,abascivil,...,zurer,zurich,zwar,zwart,zycosoil,zydus,zyh,zymetry,zyngchen,zyrtec
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1530,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1531,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
topic_nums = list(np.arange(10, 20, 1))
for num in topic_nums:
    nmf = NMF(num)
    nmf.fit_transform(doc_words3)
    display_topics(nmf, tfidf3.get_feature_names(), 100)
    print("------------------------------------------")




Topic  0
investment, market, busines, financial, analysis, management, finance, equity, sales, analyst, valuation, deal, credit, risk, diligence, develop, trade, stock, real, cfa, advisory, acros, products, sector, potential, strategy, development, institutional, product, economics, estate, consumer, debt, manage, target, acquisition, milion, pitch, blomberg, ipo, loan, analyze, analyse, transaction, coverage, investments, sel, usd, manager, securities, strategic, focus, exchange, data, gpa, strategies, transactions, relationship, buy, investors, dcf, revenue, service, raise, retail, sectors, rat, wealth, beijing, select, cros, chinese, trend, comite, shanghai, nus, mandarin, relationships, price, firm, term, share, comercial, student, competition, bond, legal, solutions, asets, bilion, investor, materials, native, consult, evaluate, increase, forecast, asian, master, club

Topic  1
philipines, mandaluyong, terminals, pasay, apostolic, bicutan, jaleco, pasword, february, customers, pr




Topic  0
investment, market, busines, financial, analysis, management, finance, equity, sales, analyst, valuation, deal, credit, diligence, risk, develop, real, trade, stock, cfa, advisory, sector, institutional, potential, strategy, products, acros, development, product, estate, economics, consumer, debt, acquisition, target, pitch, milion, blomberg, ipo, manage, coverage, sel, transaction, analyse, investments, loan, usd, analyze, securities, strategic, exchange, focus, manager, gpa, strategies, buy, dcf, investors, relationship, data, transactions, retail, raise, revenue, sectors, service, select, rat, wealth, chinese, cros, trend, comite, beijing, shanghai, nus, mandarin, relationships, share, firm, legal, competition, bond, student, comercial, term, price, materials, investor, bilion, asets, consult, solutions, evaluate, native, asian, analysts, club, increase, quantitative

Topic  1
philipines, mandaluyong, terminals, bicutan, jaleco, pasay, apostolic, pasword, february, custome




Topic  0
investment, market, busines, financial, analysis, management, finance, equity, sales, analyst, valuation, deal, credit, diligence, risk, develop, real, trade, stock, cfa, advisory, sector, institutional, potential, acros, strategy, products, estate, development, product, economics, consumer, debt, acquisition, target, pitch, milion, blomberg, ipo, manage, coverage, transaction, sel, analyse, investments, loan, usd, analyze, securities, strategic, focus, exchange, manager, gpa, strategies, buy, dcf, investors, relationship, transactions, data, retail, raise, revenue, sectors, service, select, rat, wealth, chinese, cros, trend, comite, beijing, shanghai, nus, mandarin, share, relationships, firm, legal, bond, comercial, competition, term, student, price, investor, materials, bilion, asets, consult, evaluate, native, solutions, asian, analysts, quantitative, presentation, club

Topic  1
philipines, mandaluyong, terminals, jaleco, apostolic, bicutan, pasay, pasword, february, cus




Topic  0
investment, market, busines, financial, analysis, management, finance, equity, sales, analyst, valuation, deal, credit, diligence, risk, develop, trade, real, stock, cfa, advisory, sector, institutional, potential, acros, strategy, products, product, estate, economics, development, consumer, debt, acquisition, target, pitch, milion, blomberg, ipo, manage, transaction, sel, coverage, analyse, loan, investments, usd, analyze, securities, strategic, exchange, focus, manager, gpa, strategies, buy, dcf, investors, relationship, transactions, data, retail, revenue, raise, sectors, service, rat, wealth, select, chinese, trend, comite, beijing, cros, shanghai, nus, mandarin, relationships, share, firm, legal, bond, term, competition, comercial, price, investor, bilion, materials, asets, consult, native, evaluate, student, asian, solutions, analysts, quantitative, sale, club

Topic  1
philipines, mandaluyong, terminals, bicutan, jaleco, pasay, apostolic, pasword, february, customers, 




Topic  0
sales, market, busines, management, product, trade, products, service, manage, customer, risk, development, relationship, credit, develop, manager, branch, ensure, institutional, relationships, revenue, customers, acros, target, exist, wealth, solutions, strategies, operations, increase, strategy, maintain, compliance, head, fix, cros, strategic, requirements, loan, potential, seling, retail, insurance, local, regulatory, thailand, execute, income, delivery, quality, securities, finance, worth, launch, data, operational, stakeholders, sucesfuly, network, term, kyc, managers, apac, information, proceses, dealers, deal, mar, meet, line, closely, departments, initiatives, monitor, grow, australia, comunication, growth, treasury, timely, documentation, president, establish, exchange, promote, derivatives, franchise, price, implementation, oficer, director, comercial, vice, leader, midle, usd, institutions, uae, asets, investors

Topic  1
philipines, mandaluyong, terminals, bicuta




Topic  0
sales, market, busines, management, product, trade, products, service, manage, risk, customer, relationship, credit, develop, development, manager, branch, ensure, institutional, relationships, revenue, customers, acros, target, exist, wealth, solutions, strategies, operations, increase, strategy, maintain, compliance, head, fix, cros, strategic, requirements, loan, seling, potential, retail, insurance, local, regulatory, thailand, income, execute, securities, delivery, finance, launch, worth, operational, data, stakeholders, network, quality, term, kyc, sucesfuly, managers, information, apac, proceses, deal, mar, meet, dealers, line, closely, departments, initiatives, grow, australia, monitor, treasury, comunication, growth, exchange, president, oficer, establish, derivatives, promote, timely, price, franchise, director, implementation, vice, documentation, midle, usd, control, comercial, institutions, leader, uae, asets

Topic  1
philipines, mandaluyong, terminals, apostoli




Topic  0
sales, market, busines, management, product, trade, products, service, manage, risk, customer, relationship, credit, develop, development, manager, branch, ensure, institutional, relationships, revenue, customers, acros, target, exist, wealth, solutions, strategies, operations, increase, strategy, maintain, compliance, head, fix, cros, strategic, loan, requirements, potential, seling, retail, insurance, local, regulatory, thailand, income, execute, securities, delivery, finance, launch, worth, operational, data, stakeholders, term, kyc, sucesfuly, managers, network, quality, apac, information, proceses, deal, mar, meet, dealers, line, closely, departments, initiatives, grow, australia, monitor, treasury, comunication, president, establish, derivatives, exchange, growth, promote, timely, price, franchise, oficer, director, implementation, vice, documentation, midle, control, usd, institutions, leader, comercial, uae, asets

Topic  1
philipines, mandaluyong, terminals, bicutan,




Topic  0
sales, market, busines, management, product, trade, products, service, manage, risk, customer, relationship, credit, develop, development, manager, branch, institutional, ensure, relationships, revenue, customers, acros, target, exist, wealth, strategies, solutions, operations, increase, strategy, maintain, compliance, head, fix, cros, strategic, loan, requirements, potential, seling, retail, local, regulatory, insurance, income, thailand, execute, securities, delivery, finance, launch, worth, operational, data, stakeholders, kyc, term, sucesfuly, network, quality, managers, apac, information, proceses, deal, mar, meet, dealers, closely, departments, line, initiatives, grow, australia, monitor, treasury, comunication, timely, exchange, president, establish, derivatives, promote, growth, price, oficer, franchise, director, implementation, vice, documentation, midle, control, usd, leader, institutions, comercial, asets, investors

Topic  1
philipines, mandaluyong, terminals, pa




Topic  0
sales, market, busines, management, product, trade, products, service, manage, risk, customer, relationship, credit, develop, development, manager, branch, institutional, ensure, relationships, revenue, customers, acros, target, exist, wealth, strategies, solutions, operations, increase, strategy, maintain, compliance, head, fix, cros, strategic, loan, requirements, potential, seling, retail, local, regulatory, insurance, thailand, income, execute, securities, delivery, finance, launch, worth, operational, data, stakeholders, kyc, term, sucesfuly, quality, network, apac, managers, information, proceses, deal, mar, meet, dealers, closely, departments, line, initiatives, grow, australia, treasury, monitor, comunication, timely, president, derivatives, exchange, promote, establish, price, oficer, franchise, growth, director, implementation, vice, documentation, control, midle, usd, leader, institutions, comercial, asets, investors

Topic  1
philipines, mandaluyong, terminals, bi


Topic  0
sales, market, busines, management, product, products, trade, customer, manage, service, risk, relationship, credit, manager, develop, development, branch, institutional, ensure, relationships, revenue, acros, target, customers, strategies, exist, wealth, solutions, increase, compliance, maintain, operations, strategy, head, fix, strategic, loan, requirements, cros, seling, potential, retail, regulatory, local, insurance, income, thailand, execute, worth, launch, securities, operational, delivery, quality, term, kyc, investment, apac, deal, network, stakeholders, dealers, sucesfuly, finance, managers, departments, initiatives, grow, line, closely, meet, treasury, growth, usd, franchise, monitor, timely, information, control, australia, proceses, implementation, investors, asets, comercial, establish, midle, uae, documentation, price, mar, institutions, prospect, comunication, derivatives, milion, focus, aml, president, bangkok

Topic  1
philipines, mandaluyong, terminals, pas



### Third Itration of Removing Domain Specific Word

In [35]:
stop_words_3 =['diligence', 'real', 'sector', 'institutional','potential',  'acros',
               'acquisition', 'target', 'pitch', 'blomberg', 'coverage','focus', 'gpa',
               'relationship', 'mandaluyong','relationships',
               'raise', 'sectors', 'select', 'ra', 'chinese', 'cros', 'beijing', 'shanghai', 'nus', 'mandarin',
               'term','evaluate', 'native', 'asian', 'club', 'increase', 'quantitative' ,'philipines',
               'bicutan', 'jaleco', 'pasay', 'apostolic', 'pasword', 'february',
               'newly','ofers', 'centre',
               'utilize', 'profile','edemed', 'makati', 'adamson', 'leadsrus', 'sugestive', 'askim', 
               'teletech', 'turkish', 'roxas','masterfile', 'mutena',
               'aque', 'acomodating', 'impresion', 'yah', 'destinations', 'roduct', 'tutorial', 'deira', 'kfc', 'enrol',
               'para', 'philipine', 'merchants', 'satisfy', 'preference', 'character', 'discusing', 'roy',
               'prevention','skype', 'reward', 'detect', 'productivity', 'setup', 'fit', 'discusion', 'acurately',
               'proposal','dubai', 'uae', 'admin', 'items', 'rate', 'listen', 'employ', 'block', 'necesary',
               'promote', 'availability', 'acurate', 'december', 'exist', 'rat', 'standard',
               'dubai','designation', 'supervision', 'saudi', 'progres', 'pip', 'uae', 'aproved','structural', 'cable',
               'kenya', 'chenai', 'flor', 'taif',  'quantity','ksa', 'authority', 'calculation',
               'pile','pasport', 'tamil', 'delhi', 'wals', 'wekly', 'load','arabia', 'require', 'ering', 'gi', 'clearance', 'standards',
               'workers', 'area', 'comisioning', 'selection','auxiliary', 'foundations', 'split', 'aproval', 'erection', 'snag'
               'malaysia', 'charter', 'ernst', 'estate', 'standards', 'regulatory', 'real', 'charge', 'statutory', 'nanyang',
               'mas', 'chinese','australia', 'pricewaterhousecopers', 'consolidation', 'entities',
               'gap', 'touche', 'local', 'property', 'sgx', 'members', 'overseas', 'paper', 'junior', 'partner',
               'mandarin', 'club', 'students', 'student', 'acordance', 'proceses',
               'lim', 'recomendations', 'pte', 'diferent', 'honour', 'programe',
               'equity', 'involvement', 'weakneses', 'statement', 'advance', 'udit', 'cycle', 'cantonese', 'tight', 'evaluate', 'deadlines', 'ntu'
               'angeles', 'cloutier', 'remix', 'jenifer', 'avenue', 'cienega', 'los', 'ele', 'david', 'michael',
               'oreal', 'kate', 'marie', 'jesica', 'cloutieremix', 'cali', 'fornia', 'claire', 'kely', 'lopez',
               'beyonce', 'harper', 'bazar', 'elizabeth','jones', 'michele', 
               'diane', 'rachel', 'mathew', 'stone', 'christina', 'sarah', 'vanity', 'paul', 'jam', 'pantene', 'chris',
               'fair', 'kim', 'robert', 'smith', 'taylor', 'vanesa', 'melisa', 'wiliams', 'marcus',
               'tom', 'eva', 'lisa', 'mary', 'laura', 'rolston', 'davis', 'jeans', 'lauren', 'elen', 'julia', 'girl', 'peter', 'ryan', 'jane',
               'mark', 'nicole', 'graham', 'jason', 'wilson', 'italy', 'patrick', 'tyler', 'ashley', 'groming', 'andrew', 'miler',
               'hunter', 'scot', 'secret', 'wekly', 'fox', 'videos', 'victoria', 'ane','hedge', 'nav', 'operations',
               'compliance',  'resolve', 'citco', 'net','fes', 'calculation',  'blomberg','prime','regulatory', 'timely', 'external', 
               'setlement', 'equities','futures', 'calculations', 'redemption', 'pte','derivatives', 'agrements', 'valuations',
               'polytechnic', 'apr', 'party', 'geneva', 'closely', 'custodians', 'hoc', 'break',  'bond', 'migration',
               'london','acurate','function', 'midle', 'options','act', 'requirements', 'honour', 'acordance', 'eficiency', 
               'alternative', 'basis', 'launch', 'mar','fix' ,'maters', 'minutes','resolutions', 'file', 'legal',
               'liaise','incorporation','leave', 'malaysia', 'register', 'ofshore', 'pte', 'governance', 'chinese', 'reason', 
               'share', 'regulatory','acra', 'ful', 'notice','requirements', 'hoc', 'ing', 'form', 'month', 'act', 'comites', 'external',
               'march', 'proper', 'kuala', 'lumpur', 'return', 'cordinate', 'atend', 'viewpoint', 'function', 'agenda',
               'party', 'aranging', 'wpm', 'registration', 'spectrum', 'availability', 'range','bhd','sdn', 'travel', 'hkics',
               'require', 'tunku', 'keping','malaysian', 'cantonese',
               'authority', 'counsel', 'agrements', 'overseas', 'islands', 'wong', 'sin', 'lci','regulations', 'renewal','chenai',
               'declaration', 'declare', 'true', 'birth', 'hobies', 'marital',  'singh', 'jaipur',
               'tamil', 'play', 'pasing', 'pasport', 'hard', 'delhi', 'kumar', 'gender',
               'self', 'single', 'mentionvidyalaya','listen','dist', 'higher', 'percentage', 'engine', 'kerala', 'institution', 'belief',
               'cbse', 'kendriya','divyank', 'nagar', 'mark', 'mail',  'pin', 'corect', 'odisha', 'lucknow', 'sex', 
               'chalenging', 'strength', 'vitae', 'production', 'positive', 'cgpa', 'abilities', 
               'utilize', 'profile', 'permanent', 'operate', 'atitude', 'father', 'mob', 
               'extra', 'basic', 'topic', 'active', 'problem', 'auto', 'noida', 'curicular', 'quick' 
               'erection', 'relay', 'circuit', 'grid','equipments', 'transformers','earth', 'switch',
               'troubleshoting', 'wire', 'light', 'instrument', 'scada', 'engine','comisioningpanel', 'equipment',
               'saudi', 'thermal', 'mysore', 'breakers', 'line', 'stel', 'instrumentation', 'material',
               'schedule', 'field', 'breaker', 'inverter', 'gujarat', 'maintain', 'diesel',
               'water', 'execution', 'profile', 'capacity', 'batery', 'pcb', 'calibration', 'qatar',
               'generation', 'tech', 'termination', 'tamilnadu', 'operate', 'bms', 'lab', 'generator',
               'starters', 'sri', 'lay', 'inverters', 'suply','takahashi','indiana', 'pensylvania', 'sameshima', 'american',
               'delhi', 'sport','joint', 'paulo',  'bacal', 'ahmedabad', 'atended','ortho', 'ankit', 'hand', 'vadodara',
               'vitae', 'bangalore','september', 'nitrini', 'carameli', 'goacon', 'gujarat', 'kenya',
               'chenai', 'paper', 'usa', 'total', 'march', 'cadaveric', 'mckirgan', 
               'post', 'ioacon', 'iowa', 'comite', 'broklyn', 'poster',
               'posterior', 'november', 'october', 'unite', 'february', 'centre','princeton',
               'ganga', 'mangalore', 'maryland', 'presentation', 'brazil', 'tamilnadu', 'orthop',
               'kentucky', 'philipsburg','month','pte','ful', 'timely', 'payrol', 'file', 'gst', 'schedule', 'forecast', 'liaise',
               'return', 'close','statement', 'function', 'submision', 'external', 'basis','period', 'operations', 'reconcile', 'apr',
               'treasury', 'erp','branch', 'wekly', 'task','consolidation', 'hoc', 'query', 'myob', 'yearly', 'ing', 'cheque', 'consolidate',
               'polytechnic','claim', 'journal', 'subsidiaries', 'require', 'fix', 'verify', 'supliers', 'mar', 'kenya', 'receive', 'count',
               'powerpoint'] 

In [36]:
df_after_4 = remove_domain_stop_words(stop_words = stop_words_3, df_edit = df_after_3)

In [37]:
tfidf4 = TfidfVectorizer(stop_words='english')
doc_words4 = tfidf4.fit_transform(df_after_4.TEXT)
tfidf_df4 = pd.DataFrame(doc_words4.toarray(),columns = tfidf4.get_feature_names())
tfidf_df4

Unnamed: 0,abac,abacus,abadian,abaete,abakus,abandon,abandonment,abas,abasc,abascivil,...,zurer,zurich,zwar,zwart,zycosoil,zydus,zyh,zymetry,zyngchen,zyrtec
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1529,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1530,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1531,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
topic_nums = list(np.arange(7, 15, 1))
for num in topic_nums:
    nmf = NMF(num)
    nmf.fit_transform(doc_words4)
    display_topics(nmf, tfidf4.get_feature_names(), 100)
    print("------------------------------------------")




Topic  0
market, investment, busines, financial, analysis, management, finance, sales, analyst, credit, deal, valuation, risk, trade, develop, products, development, stock, strategy, advisory, cfa, economics, product, manage, consumer, debt, loan, analyze, milion, manager, transaction, analyse, ipo, usd, strategic, data, exchange, securities, strategies, sel, investments, revenue, retail, transactions, buy, investors, dcf, wealth, service, trend, solutions, price, comercial, budget, firm, asets, competition, materials, consult, master, insurance, bilion, president, head, track, execute, investor, growth, sucesfuly, analysts, income, value, thailand, customer, sale, information, cost, multiple, pacific, cordinated, economic, media, establish, depth, mba, rank, director, energy, worth, indonesia, fundamental, network, database, taiwan, honor, formulate, lbo, japan, vice, managers

Topic  1
terminals, customers, product, hotels, visa, card, hotel, rom, credit, service, analyst, fraudulen




Topic  0
market, investment, busines, financial, analysis, management, finance, sales, analyst, credit, deal, valuation, risk, trade, develop, products, stock, development, strategy, advisory, cfa, economics, product, manage, consumer, debt, loan, analyze, milion, manager, transaction, analyse, ipo, usd, data, strategic, exchange, securities, strategies, sel, investments, revenue, retail, transactions, buy, investors, dcf, wealth, service, trend, solutions, price, comercial, budget, firm, asets, competition, materials, consult, master, insurance, bilion, president, head, track, execute, investor, sucesfuly, growth, analysts, income, value, thailand, sale, customer, information, multiple, cost, pacific, cordinated, economic, media, establish, depth, energy, mba, rank, director, worth, indonesia, fundamental, network, database, taiwan, formulate, honor, lbo, japan, vice, managers

Topic  1
terminals, customers, product, hotels, visa, card, hotel, rom, credit, service, analyst, fraudulen




Topic  0
market, investment, busines, financial, analysis, management, sales, finance, analyst, deal, valuation, credit, risk, develop, trade, products, development, stock, strategy, advisory, cfa, product, economics, consumer, debt, manage, milion, loan, analyze, ipo, manager, usd, transaction, analyse, exchange, sel, strategic, securities, strategies, investments, retail, data, revenue, buy, dcf, investors, wealth, transactions, trend, service, solutions, firm, comercial, price, competition, materials, asets, consult, bilion, insurance, master, president, investor, head, analysts, growth, execute, track, thailand, sale, sucesfuly, value, media, pacific, multiple, energy, depth, cordinated, rank, income, worth, fundamental, establish, information, indonesia, taiwan, mba, lbo, economic, formulate, network, director, managers, japan, vice, budget, honor, customer, comparable, database

Topic  1
terminals, customers, product, hotels, visa, card, hotel, rom, credit, service, analyst, fra




Topic  0
investment, financial, analysis, finance, valuation, analyst, market, busines, deal, management, cfa, credit, stock, economics, advisory, ipo, consumer, debt, risk, investments, transaction, analyse, dcf, sel, analyze, buy, transactions, milion, loan, strategy, exchange, usd, trend, securities, data, bilion, develop, lbo, investor, analysts, energy, fundamental, investors, trade, consult, comparable, materials, strategic, competition, master, earn, value, tmt, depth, formulate, sale, multiple, pacific, price, merger, firm, media, budget, rank, retail, economic, propose, healthcare, asets, honor, comercial, restructure, fund, rmb, invest, excel, track, cordinated, industries, infrastructure, statistics, development, largest, precedent, portfolios, products, indonesia, ofering, vba, stake, japan, yield, game, mba, strategies, ebitda, asean, chalenge, long, president

Topic  1
terminals, customers, hotels, product, visa, card, hotel, rom, credit, analyst, service, fraudulent, ca




Topic  0
investment, financial, analysis, finance, valuation, analyst, market, busines, deal, management, cfa, credit, stock, economics, advisory, ipo, consumer, debt, risk, investments, transaction, analyse, dcf, sel, analyze, buy, transactions, milion, loan, strategy, exchange, usd, trend, securities, data, bilion, develop, lbo, investor, analysts, energy, fundamental, investors, consult, trade, comparable, materials, strategic, competition, earn, master, value, tmt, depth, formulate, sale, multiple, pacific, price, merger, firm, media, budget, rank, retail, economic, honor, healthcare, asets, fund, restructure, propose, comercial, rmb, invest, excel, track, cordinated, industries, infrastructure, statistics, largest, precedent, development, portfolios, products, indonesia, stake, vba, ofering, yield, game, japan, mba, ebitda, strategies, chalenge, asean, long, president

Topic  1
terminals, customers, hotels, product, visa, card, hotel, rom, credit, analyst, service, fraudulent, ca




Topic  0
investment, financial, analysis, finance, valuation, analyst, market, busines, deal, management, cfa, credit, stock, economics, advisory, ipo, consumer, debt, risk, investments, transaction, analyse, dcf, sel, analyze, buy, transactions, milion, loan, strategy, exchange, usd, trend, securities, data, bilion, develop, lbo, investor, analysts, energy, fundamental, investors, trade, consult, comparable, materials, strategic, competition, earn, master, value, tmt, depth, formulate, sale, multiple, pacific, price, merger, firm, media, budget, retail, rank, economic, honor, asets, healthcare, fund, restructure, propose, comercial, rmb, invest, excel, track, cordinated, industries, infrastructure, statistics, largest, development, precedent, portfolios, products, indonesia, stake, ofering, vba, yield, japan, game, mba, ebitda, strategies, asean, chalenge, long, president

Topic  1
terminals, customers, hotels, product, visa, card, hotel, rom, credit, analyst, service, fraudulent, ca




Topic  0
investment, financial, analysis, finance, valuation, analyst, market, busines, deal, management, cfa, credit, stock, economics, advisory, ipo, consumer, debt, risk, investments, transaction, analyse, dcf, sel, analyze, buy, transactions, milion, loan, strategy, exchange, usd, trend, securities, data, bilion, develop, lbo, investor, analysts, fundamental, investors, trade, consult, comparable, materials, strategic, competition, earn, master, value, tmt, formulate, depth, multiple, pacific, sale, price, merger, firm, media, budget, retail, rank, economic, energy, fund, asets, honor, healthcare, restructure, propose, rmb, comercial, invest, excel, track, cordinated, industries, infrastructure, statistics, precedent, largest, development, portfolios, products, indonesia, ofering, stake, vba, japan, yield, game, ebitda, strategies, chalenge, asean, mba, oil, president

Topic  1
terminals, customers, hotels, product, visa, card, hotel, rom, credit, analyst, service, fraudulent, cas


Topic  0
investment, financial, analysis, finance, valuation, analyst, market, busines, deal, management, cfa, credit, stock, economics, advisory, ipo, consumer, debt, risk, investments, transaction, analyse, dcf, sel, analyze, buy, transactions, milion, loan, strategy, exchange, usd, trend, securities, data, bilion, develop, lbo, investor, analysts, fundamental, consult, investors, comparable, materials, trade, strategic, competition, earn, master, value, tmt, formulate, depth, multiple, pacific, sale, price, merger, media, firm, economic, rank, fund, energy, budget, retail, propose, healthcare, honor, restructure, rmb, asets, invest, comercial, excel, track, cordinated, industries, precedent, infrastructure, largest, statistics, development, portfolios, stake, vba, indonesia, products, ebitda, ofering, yield, game, japan, mba, asean, oil, leverage, chalenge, strategies

Topic  1
terminals, customers, hotels, product, visa, card, hotel, rom, credit, analyst, service, fraudulent, cash



In [39]:
nmf = NMF(11)
doc_topic_nmf = nmf.fit_transform(doc_words4)
display_topics(nmf, tfidf4.get_feature_names(), 100)




Topic  0
investment, financial, analysis, finance, valuation, analyst, market, busines, deal, management, cfa, credit, stock, economics, advisory, ipo, consumer, debt, risk, investments, transaction, analyse, dcf, sel, analyze, buy, transactions, milion, loan, strategy, exchange, usd, trend, securities, data, bilion, develop, lbo, investor, analysts, energy, fundamental, investors, consult, trade, comparable, materials, strategic, competition, earn, master, value, tmt, depth, formulate, sale, multiple, pacific, price, merger, firm, media, budget, rank, retail, economic, honor, healthcare, asets, fund, restructure, propose, comercial, rmb, invest, excel, track, cordinated, industries, infrastructure, statistics, largest, precedent, development, portfolios, products, indonesia, stake, vba, ofering, yield, game, japan, mba, ebitda, strategies, chalenge, asean, long, president

Topic  1
terminals, customers, hotels, product, visa, card, hotel, rom, credit, analyst, service, fraudulent, ca



In [40]:
df_after_4.shape

(1534, 2)

### Data Labeling

In [41]:
topics_of_resume = pd.DataFrame(doc_topic_nmf, columns = ['Finance', 'Hospitality' , 
                                                          'Electrical_and_Mechanical_Engineering' ,
                                                          'Accounting' , 'Investment' ,
                                                          'Beauty_Artist', 
                                                          'Sales_and_Marketing' , 'Others' , 
                                                          'Secretarial' ,'Accounting' , 
                                                          'Health'])
topics_of_resume['Topic_Name'] = topics_of_resume.idxmax(axis=1)
topics_of_resume

Unnamed: 0,Finance,Hospitality,Electrical_and_Mechanical_Engineering,Accounting,Investment,Beauty_Artist,Sales_and_Marketing,Others,Secretarial,Accounting.1,Health,Topic_Name
0,0.009687,0.000000,0.011401,0.000000,0.175887,0.000000,0.028869,0.004299,0.000000,0.040244,0.000351,Investment
1,0.015300,0.000000,0.041701,0.000000,0.000000,0.001571,0.000000,0.013237,0.000000,0.009503,0.001021,Electrical_and_Mechanical_Engineering
2,0.056844,0.007926,0.011682,0.001453,0.000000,0.007448,0.020149,0.012745,0.000000,0.002218,0.013549,Finance
3,0.000000,0.000000,0.000000,0.000000,0.000000,0.154658,0.000000,0.006791,0.000000,0.000000,0.000000,Beauty_Artist
4,0.007469,0.001939,0.000000,0.000000,0.000000,0.001411,0.002370,0.137347,0.005156,0.003749,0.009514,Others
...,...,...,...,...,...,...,...,...,...,...,...,...
1529,0.000000,0.003313,0.000000,0.071415,0.009937,0.000000,0.013525,0.000538,0.000000,0.087486,0.000000,Accounting
1530,0.094719,0.000038,0.001573,0.000000,0.079198,0.001339,0.040604,0.001415,0.000000,0.000000,0.000634,Finance
1531,0.000000,0.001271,0.122962,0.001864,0.002761,0.003197,0.009317,0.021323,0.005336,0.002365,0.003010,Electrical_and_Mechanical_Engineering
1532,0.042409,0.008345,0.010665,0.000000,0.000000,0.004946,0.001534,0.005337,0.001020,0.012079,0.009350,Finance


### Store the Labeld Dataset into Pickle

In [42]:
write_pickle('Resumes_PDF.pickle', topics_of_resume)