In [7]:
import pandas as pd
import joblib
import logging
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import xgboost as xgb
from sklearn.metrics import accuracy_score
from catboost import CatBoostClassifier, Pool
import numpy as np

In [8]:
logging.basicConfig(level=logging.INFO)

In [9]:
input_csv = '../csv/output_senior.csv'
df = pd.read_csv(input_csv)
logging.info(df.head())

INFO:root:   id                                               text     class  \
0   1  benefit advocate supervisor summary represent ...  ADVOCATE   
1   2  vzw customer tech advocate overview flexible i...  ADVOCATE   
2   4  personal banker(safe)1 business advocate profi...  ADVOCATE   
3   5  patient advocate summary customer service prof...  ADVOCATE   
4   6  patient advocate summary highly drive organize...  ADVOCATE   

   class_number  senior  
0             1       2  
1             1       0  
2             1       0  
3             1       0  
4             1       1  


In [10]:
def train(df):
    vectorizer = CountVectorizer()
    X_train_vec = vectorizer.fit_transform(df['text'])
    clf = CatBoostClassifier(
        max_depth=6,
        learning_rate=0.1,
        n_estimators=1000,
        l2_leaf_reg=0
    )
    clf.fit(X_train_vec, df['senior'])
    logging.info("Model trained!")
    return clf, vectorizer

In [11]:
model,vector = train(df)

0:	learn: 1.0216147	total: 93.7ms	remaining: 1m 33s
1:	learn: 0.9697059	total: 171ms	remaining: 1m 25s
2:	learn: 0.9287491	total: 243ms	remaining: 1m 20s
3:	learn: 0.8882130	total: 317ms	remaining: 1m 18s
4:	learn: 0.8611942	total: 398ms	remaining: 1m 19s
5:	learn: 0.8326531	total: 467ms	remaining: 1m 17s
6:	learn: 0.8142249	total: 537ms	remaining: 1m 16s
7:	learn: 0.7985662	total: 614ms	remaining: 1m 16s
8:	learn: 0.7813876	total: 686ms	remaining: 1m 15s
9:	learn: 0.7660875	total: 756ms	remaining: 1m 14s
10:	learn: 0.7552298	total: 829ms	remaining: 1m 14s
11:	learn: 0.7434094	total: 898ms	remaining: 1m 13s
12:	learn: 0.7355823	total: 970ms	remaining: 1m 13s
13:	learn: 0.7300038	total: 1.04s	remaining: 1m 13s
14:	learn: 0.7223202	total: 1.12s	remaining: 1m 13s
15:	learn: 0.7153724	total: 1.2s	remaining: 1m 13s
16:	learn: 0.7072523	total: 1.28s	remaining: 1m 14s
17:	learn: 0.6974734	total: 1.36s	remaining: 1m 14s
18:	learn: 0.6918650	total: 1.44s	remaining: 1m 14s
19:	learn: 0.6878576	t

INFO:root:Model trained!


In [12]:
joblib.dump(model,"../../../app/AIModel/domain/model_senior.pkl")
joblib.dump(vector,"../../../app/AIModel/domain/vector_senior.pkl")

['../../../app/AIModel/domain/vector_senior.pkl']