In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
df = pd.read_csv('cleaned_fake_job_dataset.csv')

print(df.shape)
df.head()

(17589, 17)


Unnamed: 0,title,company_profile,description,requirements,benefits,telecommuting,has_company_logo,has_questions,employment_type,required_experience,required_education,industry,function,fraudulent,country,state,city
0,Marketing Intern,"We're Food52, and we've created a groundbreaki...","Food52, a fast-growing, James Beard Award-winn...",Experience with content management systems a m...,No benefits listed,0,1,0,Other,Internship,Unknown,Unknown,Marketing,0,US,NY,New York
1,Customer Service - Cloud Video Production,"90 Seconds, the worlds Cloud Video Production ...",Organised - Focused - Vibrant - Awesome!Do you...,What we expect from you:Your key responsibilit...,What you will get from usThrough being part of...,0,1,0,Full-time,Not Applicable,Unknown,Marketing and Advertising,Customer Service,0,NZ,Unknown,Auckland
2,Commissioning Machinery Assistant (CMA),Valor Services provides Workforce Solutions th...,"Our client, located in Houston, is actively se...",Implement pre-commissioning and commissioning ...,No benefits listed,0,1,0,Unknown,Unknown,Unknown,Unknown,Unknown,0,US,IA,Wever
3,Account Executive - Washington DC,Our passion for improving quality of life thro...,THE COMPANY: ESRI – Environmental Systems Rese...,"EDUCATION: Bachelor’s or Master’s in GIS, busi...",Our culture is anything but corporate—we have ...,0,1,0,Full-time,Mid-Senior level,Bachelor's Degree,Computer Software,Sales,0,US,DC,Washington
4,Bill Review Manager,SpotSource Solutions LLC is a Global Human Cap...,JOB TITLE: Itemization Review ManagerLOCATION:...,QUALIFICATIONS:RN license in the State of Texa...,Full Benefits Offered,0,1,1,Full-time,Mid-Senior level,Bachelor's Degree,Hospital & Health Care,Health Care Provider,0,US,FL,Fort Worth


In [4]:
df.isnull().sum()

title                  0
company_profile        0
description            0
requirements           0
benefits               0
telecommuting          0
has_company_logo       0
has_questions          0
employment_type        0
required_experience    0
required_education     0
industry               0
function               0
fraudulent             0
country                0
state                  0
city                   0
dtype: int64

In [5]:
df['fraudulent'].value_counts()

0    16734
1      855
Name: fraudulent, dtype: int64

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Combine relevant text columns
df['combined_text'] = (
    df['title'] + ' ' +
    df['company_profile'] + ' ' +
    df['description'] + ' ' +
    df['requirements'] + ' ' +
    df['benefits']
)

# Vectorize
tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1, 2), stop_words='english')
X_text = tfidf.fit_transform(df['combined_text'])


In [7]:
df.nunique()

title                  11231
company_profile         1710
description            14801
requirements           11969
benefits                6206
telecommuting              2
has_company_logo           2
has_questions              2
employment_type            6
required_experience        8
required_education        14
industry                 132
function                  38
fraudulent                 2
country                   91
state                    325
city                    2388
combined_text          16065
dtype: int64

In [8]:
categorical_columns = ['employment_type', 'required_experience', 'required_education']
for col in categorical_columns:
    print(df[col].value_counts())

Full-time    11414
Unknown       3427
Contract      1514
Part-time      772
Temporary      237
Other          225
Name: employment_type, dtype: int64
Unknown             6961
Mid-Senior level    3768
Entry level         2631
Associate           2265
Not Applicable      1070
Director             382
Internship           372
Executive            140
Name: required_experience, dtype: int64
Unknown                              8008
Bachelor's Degree                    5096
High School or equivalent            1987
Unspecified                          1366
Master's Degree                       416
Associate Degree                      263
Certification                         165
Some College Coursework Completed     100
Professional                           73
Vocational                             47
Some High School Coursework            27
Doctorate                              26
Vocational - HS Diploma                 9
Vocational - Degree                     6
Name: required_educati

In [9]:
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(drop='first')
employment_type_encoded = ohe.fit_transform(df[['employment_type']])

In [10]:
from sklearn.preprocessing import OrdinalEncoder

experience_order = [[
    'Internship',
    'Entry level',
    'Associate',
    'Mid-Senior level',
    'Director',
    'Executive',
    'Not Applicable',
    'Unknown'
]]

oe = OrdinalEncoder(categories=experience_order)
df['experience_encoded'] = oe.fit_transform(df[['required_experience']])


In [11]:
from sklearn.preprocessing import OrdinalEncoder

education_order = [[
    'Some High School Coursework',
    'High School or equivalent',
    'Vocational - HS Diploma',
    'Some College Coursework Completed',
    'Associate Degree',
    'Vocational',
    'Vocational - Degree',
    'Certification',
    "Bachelor's Degree",
    "Master's Degree",
    'Professional',
    'Doctorate',
    'Unspecified',
    'Unknown'
]]

oe = OrdinalEncoder(categories=education_order)
df['education_encoded'] = oe.fit_transform(df[['required_education']])


In [12]:
categorical_columns2 = ['industry', 'function', 'country', 'state', 'city']
df['industry'].value_counts()

Unknown                                4836
Information Technology and Services    1708
Computer Software                      1362
Internet                               1054
Education Management                    819
                                       ... 
Shipbuilding                              1
Sporting Goods                            1
Museums and Institutions                  1
Wine and Spirits                          1
Ranching                                  1
Name: industry, Length: 132, dtype: int64

In [13]:
# Step 1: Group rare categories
industry_counts = df['industry'].value_counts()
rare_industries = industry_counts[industry_counts < 10].index
df['industry_grouped'] = df['industry'].replace(rare_industries, 'Other')

# Step 2: One-hot encode the grouped version
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
industry_encoded = ohe.fit_transform(df[['industry_grouped']])


In [14]:
df['function'].value_counts()

Unknown                   6362
Information Technology    1730
Sales                     1446
Engineering               1339
Customer Service          1176
Marketing                  815
Administrative             613
Design                     336
Health Care Provider       326
Other                      325
Education                  325
Management                 306
Business Development       226
Accounting/Auditing        210
Human Resources            201
Project Management         183
Finance                    165
Consulting                 138
Writing/Editing            131
Art/Creative               131
Production                 115
Product Management         113
Quality Assurance          110
Advertising                 90
Business Analyst            83
Data Analyst                82
Public Relations            76
Manufacturing               73
General Business            68
Research                    50
Strategy/Planning           46
Legal                       44
Training

In [15]:
function_counts = df['function'].value_counts()
rare_functions = function_counts[function_counts < 50].index
df['function_grouped'] = df['function'].replace(rare_functions, 'Other')

In [16]:
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
function_encoded = ohe.fit_transform(df[['function_grouped']])


In [17]:
df['country'].value_counts()

US    10451
GB     2329
GR      938
CA      450
DE      382
      ...  
SI        1
CO        1
SD        1
UG        1
KH        1
Name: country, Length: 91, dtype: int64

In [18]:
country_counts = df['country'].value_counts()
rare_countries = country_counts[country_counts < 50].index
df['country_grouped'] = df['country'].replace(rare_countries, 'Other')

In [19]:
df['state'].value_counts()

Unknown    2540
CA         2010
NY         1228
LND         991
TX          955
           ... 
ENF           1
D             1
061           1
82            1
NLE           1
Name: state, Length: 325, dtype: int64

In [20]:
state_counts = df['state'].value_counts()
rare_states = state_counts[state_counts < 10].index
df['state_grouped'] = df['state'].replace(rare_states, 'Other')

In [21]:
df['city'].value_counts()

Unknown                2051
London                 1052
New York                655
Athens                  540
San Francisco           471
                       ... 
Oronoco                   1
Skagit County             1
Cassopolis Dowagiac       1
Menlo Park, CA            1
los Angeles               1
Name: city, Length: 2388, dtype: int64

In [22]:
city_counts = df['city'].value_counts()
rare_cities = city_counts[city_counts < 10].index
df['city_grouped'] = df['city'].replace(rare_cities, 'Other')

In [23]:
df['city_grouped'].value_counts()

Other            4646
Unknown          2051
London           1052
New York          655
Athens            540
                 ... 
Springfield        10
Ebene              10
Golden Valley      10
ATHENS             10
Charleston         10
Name: city_grouped, Length: 232, dtype: int64

In [24]:
df.head()

Unnamed: 0,title,company_profile,description,requirements,benefits,telecommuting,has_company_logo,has_questions,employment_type,required_experience,...,state,city,combined_text,experience_encoded,education_encoded,industry_grouped,function_grouped,country_grouped,state_grouped,city_grouped
0,Marketing Intern,"We're Food52, and we've created a groundbreaki...","Food52, a fast-growing, James Beard Award-winn...",Experience with content management systems a m...,No benefits listed,0,1,0,Other,Internship,...,NY,New York,"Marketing Intern We're Food52, and we've creat...",0.0,13.0,Unknown,Marketing,US,NY,New York
1,Customer Service - Cloud Video Production,"90 Seconds, the worlds Cloud Video Production ...",Organised - Focused - Vibrant - Awesome!Do you...,What we expect from you:Your key responsibilit...,What you will get from usThrough being part of...,0,1,0,Full-time,Not Applicable,...,Unknown,Auckland,Customer Service - Cloud Video Production 90 S...,6.0,13.0,Marketing and Advertising,Customer Service,NZ,Unknown,Auckland
2,Commissioning Machinery Assistant (CMA),Valor Services provides Workforce Solutions th...,"Our client, located in Houston, is actively se...",Implement pre-commissioning and commissioning ...,No benefits listed,0,1,0,Unknown,Unknown,...,IA,Wever,Commissioning Machinery Assistant (CMA) Valor ...,7.0,13.0,Unknown,Unknown,US,IA,Other
3,Account Executive - Washington DC,Our passion for improving quality of life thro...,THE COMPANY: ESRI – Environmental Systems Rese...,"EDUCATION: Bachelor’s or Master’s in GIS, busi...",Our culture is anything but corporate—we have ...,0,1,0,Full-time,Mid-Senior level,...,DC,Washington,Account Executive - Washington DC Our passion ...,3.0,8.0,Computer Software,Sales,US,DC,Washington
4,Bill Review Manager,SpotSource Solutions LLC is a Global Human Cap...,JOB TITLE: Itemization Review ManagerLOCATION:...,QUALIFICATIONS:RN license in the State of Texa...,Full Benefits Offered,0,1,1,Full-time,Mid-Senior level,...,FL,Fort Worth,Bill Review Manager SpotSource Solutions LLC i...,3.0,8.0,Hospital & Health Care,Health Care Provider,US,FL,Fort Worth


In [25]:
df.columns

Index(['title', 'company_profile', 'description', 'requirements', 'benefits',
       'telecommuting', 'has_company_logo', 'has_questions', 'employment_type',
       'required_experience', 'required_education', 'industry', 'function',
       'fraudulent', 'country', 'state', 'city', 'combined_text',
       'experience_encoded', 'education_encoded', 'industry_grouped',
       'function_grouped', 'country_grouped', 'state_grouped', 'city_grouped'],
      dtype='object')

In [26]:
df2 = df[['telecommuting','has_company_logo', 'has_questions', 'employment_type', 'required_experience', 'required_education', 'industry_grouped', 'function_grouped', 'country_grouped', 'state_grouped', 'city_grouped', 'fraudulent']]
df2.head()

Unnamed: 0,telecommuting,has_company_logo,has_questions,employment_type,required_experience,required_education,industry_grouped,function_grouped,country_grouped,state_grouped,city_grouped,fraudulent
0,0,1,0,Other,Internship,Unknown,Unknown,Marketing,US,NY,New York,0
1,0,1,0,Full-time,Not Applicable,Unknown,Marketing and Advertising,Customer Service,NZ,Unknown,Auckland,0
2,0,1,0,Unknown,Unknown,Unknown,Unknown,Unknown,US,IA,Other,0
3,0,1,0,Full-time,Mid-Senior level,Bachelor's Degree,Computer Software,Sales,US,DC,Washington,0
4,0,1,1,Full-time,Mid-Senior level,Bachelor's Degree,Hospital & Health Care,Health Care Provider,US,FL,Fort Worth,0


In [27]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

# Custom order for ordinal features
experience_order = [
    'Internship',
    'Entry level',
    'Associate',
    'Mid-Senior level',
    'Director',
    'Executive',
    'Not Applicable',
    'Unknown'
]

education_order = [
    'Some High School Coursework',
    'High School or equivalent',
    'Vocational - HS Diploma',
    'Some College Coursework Completed',
    'Associate Degree',
    'Vocational',
    'Vocational - Degree',
    'Certification',
    "Bachelor's Degree",
    "Master's Degree",
    'Professional',
    'Doctorate',
    'Unspecified',
    'Unknown'
]

preprocessor = ColumnTransformer(
    transformers=[
        # Ordinal encoding for ordered columns
        ('ord', OrdinalEncoder(categories=[experience_order, education_order]), 
         ['required_experience', 'required_education']),
        
        # One-hot encoding for nominal columns
        ('ohe', OneHotEncoder(drop='first'),
         ['employment_type','industry_grouped','function_grouped', 'country_grouped', 'state_grouped', 'city_grouped'])
    ],
    remainder='passthrough'  # Keep other features 
)


In [28]:
data = df2.drop('fraudulent', axis=1)

y = df2['fraudulent']
X_meta = preprocessor.fit_transform(data)

In [29]:
from scipy.sparse import hstack

X_final = hstack([X_text, X_meta])  

In [30]:
X_final

<17589x5503 sparse matrix of type '<class 'numpy.float64'>'
	with 3123837 stored elements in Compressed Sparse Row format>

### Modelling

In [31]:
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from xgboost import XGBClassifier

from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X_final, y, test_size=0.2, random_state=42, stratify=y)

In [33]:
clf_lr = LogisticRegression(class_weight='balanced', max_iter=1000)

clf_lr.fit(X_train, y_train)

In [34]:
y_pred = clf_lr.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.97      0.98      3347
           1       0.58      0.91      0.70       171

    accuracy                           0.96      3518
   macro avg       0.79      0.94      0.84      3518
weighted avg       0.97      0.96      0.97      3518



In [35]:
probs = clf_lr.predict_proba(X_test)[:, 1]

for t in [0.5, 0.6, 0.7, 0.8]:
    preds = (probs >= t).astype(int)
    p, r, f1, _ = precision_recall_fscore_support(y_test, preds, average='binary')
    print(f"Threshold: {t:.2f} → Precision: {p:.2f}, Recall: {r:.2f}, F1: {f1:.2f}")


Threshold: 0.50 → Precision: 0.58, Recall: 0.91, F1: 0.70
Threshold: 0.60 → Precision: 0.66, Recall: 0.89, F1: 0.75
Threshold: 0.70 → Precision: 0.77, Recall: 0.88, F1: 0.82
Threshold: 0.80 → Precision: 0.84, Recall: 0.80, F1: 0.82


In [36]:
clf_rf = RandomForestClassifier()
clf_rf.fit(X_train, y_train)

y_pred = clf_rf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      3347
           1       0.98      0.60      0.75       171

    accuracy                           0.98      3518
   macro avg       0.98      0.80      0.87      3518
weighted avg       0.98      0.98      0.98      3518



In [37]:
clf_sgd = SGDClassifier()
clf_sgd.fit(X_train, y_train)

y_pred = clf_sgd.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      3347
           1       0.91      0.68      0.78       171

    accuracy                           0.98      3518
   macro avg       0.95      0.84      0.89      3518
weighted avg       0.98      0.98      0.98      3518



In [38]:
ratio = 16734/855
ratio

19.571929824561405

In [39]:
clf_xgb = XGBClassifier(scale_pos_weight=19.57)
clf_xgb.fit(X_train, y_train)

y_pred = clf_xgb.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99      3347
           1       0.89      0.79      0.84       171

    accuracy                           0.99      3518
   macro avg       0.94      0.89      0.92      3518
weighted avg       0.98      0.99      0.98      3518



In [40]:
from catboost import CatBoostClassifier

clf_catboost = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.1,
    depth=6,
    eval_metric='F1',
    verbose=100,
    class_weights=[1.0, 19.57],  
    random_seed=42
)

clf_catboost.fit(X_train, y_train)

y_pred = clf_catboost.predict(X_test)
print(classification_report(y_test, y_pred))

0:	learn: 0.8401292	total: 562ms	remaining: 9m 21s
100:	learn: 0.9706942	total: 35.6s	remaining: 5m 16s
200:	learn: 0.9946869	total: 1m 9s	remaining: 4m 35s
300:	learn: 0.9992535	total: 1m 43s	remaining: 3m 59s
400:	learn: 0.9999626	total: 2m 17s	remaining: 3m 25s
500:	learn: 1.0000000	total: 2m 51s	remaining: 2m 50s
600:	learn: 1.0000000	total: 3m 24s	remaining: 2m 15s
700:	learn: 1.0000000	total: 3m 58s	remaining: 1m 41s
800:	learn: 1.0000000	total: 4m 32s	remaining: 1m 7s
900:	learn: 1.0000000	total: 5m 6s	remaining: 33.7s
999:	learn: 1.0000000	total: 5m 40s	remaining: 0us
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      3347
           1       0.82      0.84      0.83       171

    accuracy                           0.98      3518
   macro avg       0.91      0.92      0.91      3518
weighted avg       0.98      0.98      0.98      3518



## Track Experiments using MLFlow

In [41]:
models = [
    (
        "Logistic Regression", 
        LogisticRegression(class_weight='balanced', max_iter=1000), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Logistic Regression with Threshold Tuning", 
        LogisticRegression(class_weight='balanced', max_iter=1000), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Random Forest", 
        RandomForestClassifier(), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "SGDClassifier",
        SGDClassifier(), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier",
        XGBClassifier(scale_pos_weight=16734/855), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "CatBoostClassifier",
        CatBoostClassifier(
            iterations=1000,
            learning_rate=0.1,
            depth=6,
            eval_metric='F1',
            verbose=100,
            class_weights=[1.0, 19.57],  
            random_seed=42
        ), 
        (X_train, y_train),
        (X_test, y_test)
    )
]

In [42]:
reports = []

for model_name, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]
    
    model.fit(X_train, y_train)
    
    if model_name == "Logistic Regression with Threshold Tuning":
        probs = model.predict_proba(X_test)[:, 1]
        y_pred = (probs >= 0.8).astype(int)
    else:
        y_pred = model.predict(X_test)
        
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

0:	learn: 0.8401292	total: 362ms	remaining: 6m 1s
100:	learn: 0.9706942	total: 35.7s	remaining: 5m 17s
200:	learn: 0.9946869	total: 1m 10s	remaining: 4m 41s
300:	learn: 0.9992535	total: 1m 46s	remaining: 4m 6s
400:	learn: 0.9999626	total: 2m 23s	remaining: 3m 34s
500:	learn: 1.0000000	total: 2m 58s	remaining: 2m 57s
600:	learn: 1.0000000	total: 3m 32s	remaining: 2m 21s
700:	learn: 1.0000000	total: 4m 7s	remaining: 1m 45s
800:	learn: 1.0000000	total: 4m 42s	remaining: 1m 10s
900:	learn: 1.0000000	total: 5m 17s	remaining: 34.9s
999:	learn: 1.0000000	total: 5m 51s	remaining: 0us


In [43]:
reports[0]

{'0': {'precision': 0.9950754078177901,
  'recall': 0.9659396474454736,
  'f1-score': 0.9802910855063675,
  'support': 3347.0},
 '1': {'precision': 0.5762081784386617,
  'recall': 0.9064327485380117,
  'f1-score': 0.7045454545454546,
  'support': 171.0},
 'accuracy': 0.9630471859010802,
 'macro avg': {'precision': 0.7856417931282259,
  'recall': 0.9361861979917426,
  'f1-score': 0.8424182700259111,
  'support': 3518.0},
 'weighted avg': {'precision': 0.9747154600566101,
  'recall': 0.9630471859010802,
  'f1-score': 0.9668878726313487,
  'support': 3518.0}}

In [44]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost

In [51]:
# Initialize MLflow

from mlflow.tracking import MlflowClient

client = MlflowClient()
experiment = client.get_experiment_by_name("Fake Job Detection")
client.restore_experiment(experiment.experiment_id)

mlflow.set_experiment("Fake Job Detection")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

for i, element in enumerate(models):
    model_name = element[0]
    model = element[1]
    report = reports[i]
    
    with mlflow.start_run(run_name=model_name):        
        mlflow.log_param("model", model_name)
        mlflow.log_metric('accuracy', report['accuracy'])
        mlflow.log_metric('recall_class_1', report['1']['recall'])
        mlflow.log_metric('recall_class_0', report['0']['recall'])
        mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score'])  
        mlflow.log_metric('precision_class_1', report['1']['precision'])
        mlflow.log_metric('precision_class_0', report['0']['precision'])
        mlflow.log_metric('f1_score_class_1', report['1']['f1-score'])
        mlflow.log_metric('f1_score_class_0', report['0']['f1-score'])
        mlflow.log_metric('precision_macro', report['macro avg']['precision'])
        mlflow.log_metric('recall_macro', report['macro avg']['recall'])

        
        if "XGB" in model_name:
            mlflow.xgboost.log_model(model, "model")
        elif "CatBoost" in model_name:
            mlflow.catboost.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model")  



🏃 View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/218890209698209928/runs/fdf1cacb07fa46e2909c225694cc7cf1
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/218890209698209928




🏃 View run Logistic Regression with Threshold Tuning at: http://127.0.0.1:5000/#/experiments/218890209698209928/runs/3507d2b85e8e4e8880ad15bc1c6a9fc4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/218890209698209928




🏃 View run Random Forest at: http://127.0.0.1:5000/#/experiments/218890209698209928/runs/3c4c8381513d4b0dadb61855737d361b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/218890209698209928




🏃 View run SGDClassifier at: http://127.0.0.1:5000/#/experiments/218890209698209928/runs/73a46eea3eea46eb8fcc6f094ff954a5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/218890209698209928


  self.get_booster().save_model(fname)


🏃 View run XGBClassifier at: http://127.0.0.1:5000/#/experiments/218890209698209928/runs/9ac5051e94fa4cbf88c3c7d4867602c0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/218890209698209928




🏃 View run CatBoostClassifier at: http://127.0.0.1:5000/#/experiments/218890209698209928/runs/5dcf999de29048d2b6f29ac01fbd694d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/218890209698209928
