In [42]:
import joblib
import gensim 
from gensim.parsing.preprocessing import remove_stopwords
import pickle
import pandas as pd

class Title_Classifier:

    def __init__(self):
        with open('dtm_models.pkl', 'rb') as f:
            self.dtm, self.dtm_it, self.dtm_icp, self.dtm_function_classifier, self.dtm_role_classifier,  self.dtm_level_classifier = pickle.load(f)

    def classify(self, path):
        df = pd.read_csv(path, encoding='latin1')
        df = self.preprocess_titles(df)
 
        title_vecs_function = self.dtm.transform(df['Title_Clean'])
        df['Job Function'] = self.dtm_function_classifier.predict(title_vecs_function)     

        title_vecs_level = self.dtm_icp.transform(df['Title_Clean'])
        df['Job Level'] = self.dtm_level_classifier.predict(title_vecs_level)
        
        df_it = df.loc[df['Job Function'].str.contains('IT')][['Record ID', 'Title_Clean']]
        title_vecs_role = self.dtm_it.transform(df_it['Title_Clean'])
        df_it['Job Role'] = self.dtm_role_classifier.predict(title_vecs_role)
        df_it.drop(columns=['Title_Clean'], inplace=True)
        df.drop(columns=['Title_Clean'], inplace=True)
        df = pd.merge(df, df_it, on='Record ID', how='left').fillna('N/A')
        return df

    def preprocess_titles(self, df):
        title_no_stopwords = df['Job Title'].apply(remove_stopwords)
        title_tokens = title_no_stopwords.apply(gensim.utils.simple_preprocess)
        df.insert(2, 'Title_Clean', title_tokens) 
        df['Title_Clean'] = df['Title_Clean'].str.join(" ")
        return df



    

In [44]:
classifier = Title_Classifier()
classifier.classify(path = 'test.csv')

Unnamed: 0,Record ID,Job Title,Job Function,Job Level,Job Role
0,1,Manager-Cybersecurity,IT General,Manager,Information Security
1,2,"Manager, Information Security",Information Security,Manager,
2,3,User Experience Analyst,Information Security,Contributor,
3,4,Network Specialist,IT General,Contributor,Networking
4,5,Director of Privacy and Compliance,Systems,Director,
5,6,"Assistant Vp, Network Architecture Ccie 1676",Information Security,Contributor,
6,7,"Director, Information Security",Information Security,Director,
7,8,CIO,IT General,C-level,Information Security
8,9,Director Cloud Operations,Information Security,Director,
9,10,"Director, Enterprise Applications",IT General,Director,Networking
