In [1]:
import numpy as np
import pandas as pd  

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

import shallow_pipeline as pipeline 

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
COMPLAINTS_CSV = 'data/complaints.csv'

In [3]:
# Load data 
cfpb = pipeline.load_data(verbose=True)

# Process features  
cat_columns = {('product', 50), 
               ('sub-product', 50), 
               ('issue', 50), 
               ('sub-issue', 50), 
               ('company', 100), 
               ('state', None), 
               ('tags', None), 
               ('consumer_disputed', None)}

cfpb_X = pipeline.process_cat_features(cfpb, cat_columns)

# Process label 
cfpb_y = np.where(cfpb['company_response_to_consumer'].isin(
    ['Closed with non-monetary relief', 'Closed with monetary relief']), 1, 0)
print('\nLabel balance:', cfpb_y.mean())

Date range: 2015-03-19 00:00:00 to 2019-12-31 00:00:00
Number of complaints: 491146

Distribution of company response: 
Closed with explanation            0.808145
Closed with non-monetary relief    0.124195
Closed with monetary relief        0.053729
Closed                             0.007617
Untimely response                  0.006314
Name: company_response_to_consumer, dtype: float64

Distribution of missing values: 
date_received                   0.000000
product                         0.000000
sub-product                     0.106225
issue                           0.000000
sub-issue                       0.260953
consumer_complaint_narrative    0.000000
company_public_response         0.520477
company                         0.000000
state                           0.003848
zip_code                        0.224844
tags                            0.827795
consumer_consent_provided       0.000000
submitted_via                   0.000000
date_sent_to_company            0.000000
c

In [4]:
cfpb_X.head()

Unnamed: 0,tags_Missing,tags_Older American,"tags_Older American, Servicemember",tags_Servicemember,consumer_disputed_Missing,consumer_disputed_No,consumer_disputed_Yes,product_Bank account or service,product_Checking or savings account,product_Consumer Loan,product_Credit card,product_Credit card or prepaid card,product_Credit reporting,"product_Credit reporting, credit repair services, or other personal consumer reports",product_Debt collection,"product_Money transfer, virtual currency, or money service",product_Money transfers,product_Mortgage,product_Other financial service,product_Payday loan,"product_Payday loan, title loan, or personal loan",product_Prepaid card,product_Student loan,product_Vehicle loan or lease,product_Virtual currency,"company_ACE CASH EXPRESS, INC.",company_AES/PHEAA,company_AFNI INC.,company_ALLY FINANCIAL INC.,company_AMERICAN EXPRESS COMPANY,company_AMERICAN HONDA FINANCE CORP,company_Ad Astra Recovery Services Inc,company_Alliance Data Card Services,"company_BANK OF AMERICA, NATIONAL ASSOCIATION",company_BARCLAYS BANK DELAWARE,"company_BAYVIEW LOAN SERVICING, LLC",company_BB&T CORPORATION,company_BBVA FINANCIAL CORPORATION,"company_Big Picture Loans, LLC",company_CAPITAL ONE FINANCIAL CORPORATION,"company_CARRINGTON MORTGAGE SERVICES, LLC","company_CAVALRY INVESTMENTS, LLC","company_CCS Financial Services, Inc.","company_CITIBANK, N.A.","company_CITIZENS FINANCIAL GROUP, INC.",company_CL Holdings LLC,company_CORELOGIC INC,company_CREDIT ACCEPTANCE CORPORATION,company_CURO Intermediate Holdings,"company_Caliber Home Loans, Inc.","company_Coinbase, Inc.","company_Commonwealth Financial Systems, Inc.","company_Conn's, Inc.","company_Convergent Resources, Inc.",company_DISCOVER BANK,company_Ditech Financial LLC,"company_Diversified Consultants, Inc.",company_ENCORE CAPITAL GROUP INC.,"company_EOS Holdings, Inc.","company_EQUIFAX, INC.",company_ERC,"company_Empowerment Ventures, LLC",company_Experian Information Solutions Inc.,company_FIFTH THIRD FINANCIAL CORPORATION,"company_FLAGSTAR BANK, FSB","company_Fidelity National Information Services, Inc. (FNIS)",company_Freedom Mortgage Company,company_GREAT LAKES,"company_General Motors Financial Company, Inc.","company_HCFS Health Care Financial Services, Inc.",company_HSBC NORTH AMERICA HOLDINGS INC.,"company_HUNTINGTON NATIONAL BANK, THE",company_HYUNDAI CAPITAL AMERICA,"company_Hunter Warfield, Inc.","company_I.C. System, Inc.","company_I.Q. DATA INTERNATIONAL, INC.",company_JPMORGAN CHASE & CO.,company_KEYCORP,company_LEXISNEXIS,"company_LoanCare, LLC",company_M&T BANK CORPORATION,company_MONEYGRAM PAYMENT SYSTEMS WORLDWIDE INC,company_Midwest Recovery Systems,company_NATIONSTAR MORTGAGE,company_NAVY FEDERAL CREDIT UNION,company_NETSPEND CORPORATION,company_NISSAN MOTOR ACCEPTANCE CORPORATION,"company_National Credit Systems,Inc.","company_Navient Solutions, LLC.","company_Nelnet, Inc.","company_ONEMAIN FINANCIAL HOLDINGS, LLC.",company_Ocwen Financial Corporation,company_Other,"company_PENNYMAC LOAN SERVICES, LLC.",company_PHH Mortgage Services Corporation,company_PNC Bank N.A.,company_PORTFOLIO RECOVERY ASSOCIATES INC,"company_Paypal Holdings, Inc","company_ProCollect, Inc","company_QUICKEN LOANS, INC.",company_REGIONS FINANCIAL CORPORATION,company_RUSHMORE LOAN MANAGEMENT SERVICES LLC,"company_Receivables Performance Management, LLC",company_Resurgent Capital Services L.P.,company_RoundPoint Mortgage Servicing Corporation,"company_SANTANDER BANK, NATIONAL ASSOCIATION","company_SELECT PORTFOLIO SERVICING, INC.",company_SLM CORPORATION,"company_SUNTRUST BANKS, INC.",company_SYNCHRONY FINANCIAL,company_Santander Consumer USA Holdings Inc.,company_Selene Finance LP,"company_Seterus, Inc.","company_Shellpoint Partners, LLC","company_Southwest Credit Systems, L.P.",company_Specialized Loan Servicing Holdings LLC,company_TD BANK US HOLDING COMPANY,company_TENET HEALTHCARE CORPORATION,company_TOYOTA MOTOR CREDIT CORPORATION,"company_TRANSUNION INTERMEDIATE HOLDINGS, INC.",company_TRANSWORLD SYSTEMS INC,company_U.S. BANCORP,company_UNITED SERVICES AUTOMOBILE ASSOCIATION,company_WELLS FARGO & COMPANY,"company_WESTERN UNION COMPANY, THE","company_Westlake Services, LLC","issue_Account opening, closing, or management","issue_Advertising and marketing, including promotional offers","issue_Application, originator, mortgage broker",issue_Applying for a mortgage or refinancing an existing mortgage,issue_Attempts to collect debt not owed,issue_Billing disputes,issue_Can't repay my loan,issue_Closing an account,issue_Closing on a mortgage,issue_Closing your account,issue_Communication tactics,issue_Cont'd attempts collect debt not owed,issue_Credit monitoring or identity theft protection services,issue_Credit reporting company's investigation,issue_Dealing with my lender or servicer,issue_Dealing with your lender or servicer,issue_Deposits and withdrawals,issue_Disclosure verification of debt,issue_False statements or representation,issue_Fees or interest,issue_Fraud or scam,issue_Getting a credit card,issue_Improper contact or sharing of info,issue_Improper use of your report,issue_Incorrect information on credit report,issue_Incorrect information on your report,"issue_Loan modification,collection,foreclosure","issue_Loan servicing, payments, escrow account",issue_Managing an account,issue_Managing the loan or lease,issue_Opening an account,issue_Other,"issue_Other features, terms, or problems",issue_Problem when making payments,issue_Problem with a credit reporting company's investigation into an existing problem,issue_Problem with a lender or other company charging your account,issue_Problem with a purchase shown on your statement,issue_Problem with fraud alerts or security freezes,issue_Problems caused by my funds being low,issue_Problems when you are unable to pay,issue_Settlement process and costs,issue_Struggling to pay mortgage,issue_Struggling to pay your loan,issue_Struggling to repay your loan,issue_Taking/threatening an illegal action,issue_Threatened to contact someone or share information improperly,issue_Took or threatened to take negative or legal action,issue_Trouble during payment process,issue_Unable to get credit report/credit score,issue_Unable to get your credit report or credit score,issue_Written notification about debt,sub-product_Auto,sub-product_Auto debt,sub-product_Checking account,sub-product_Conventional adjustable mortgage (ARM),sub-product_Conventional fixed mortgage,sub-product_Conventional home mortgage,sub-product_Credit card,sub-product_Credit card debt,sub-product_Credit repair services,sub-product_Credit reporting,sub-product_Domestic (US) money transfer,sub-product_FHA mortgage,sub-product_Federal student loan debt,sub-product_Federal student loan servicing,sub-product_General purpose card,sub-product_General-purpose credit card or charge card,sub-product_General-purpose prepaid card,sub-product_Home equity loan or line of credit,sub-product_Home equity loan or line of credit (HELOC),sub-product_I do not know,sub-product_Installment loan,sub-product_International money transfer,sub-product_Lease,sub-product_Loan,sub-product_Medical,sub-product_Medical debt,sub-product_Missing,sub-product_Mobile or digital wallet,sub-product_Mortgage,sub-product_Mortgage debt,sub-product_Non-federal student loan,sub-product_Other,"sub-product_Other (i.e. phone, health club, etc.)",sub-product_Other bank product/service,sub-product_Other banking product or service,sub-product_Other debt,sub-product_Other mortgage,sub-product_Other personal consumer report,sub-product_Other type of mortgage,sub-product_Payday loan,sub-product_Payday loan debt,sub-product_Personal line of credit,sub-product_Private student loan,sub-product_Private student loan debt,sub-product_Reverse mortgage,sub-product_Savings account,sub-product_Store credit card,sub-product_Title loan,sub-product_VA mortgage,sub-product_Vehicle lease,sub-product_Vehicle loan,sub-product_Virtual currency,state_AA,state_AE,state_AK,state_AL,state_AP,state_AR,state_AS,state_AZ,state_CA,state_CO,state_CT,state_DC,state_DE,state_FL,state_FM,state_GA,state_GU,state_HI,state_IA,state_ID,state_IL,state_IN,state_KS,state_KY,state_LA,state_MA,state_MD,state_ME,state_MH,state_MI,state_MN,state_MO,state_MP,state_MS,state_MT,state_Missing,state_NC,state_ND,state_NE,state_NH,state_NJ,state_NM,state_NV,state_NY,state_OH,state_OK,state_OR,state_PA,state_PR,state_PW,state_RI,state_SC,state_SD,state_TN,state_TX,state_UNITED STATES MINOR OUTLYING ISLANDS,state_UT,state_VA,state_VI,state_VT,state_WA,state_WI,state_WV,state_WY,sub-issue_Account information incorrect,sub-issue_Account status,sub-issue_Account status incorrect,sub-issue_Account terms,sub-issue_Attempted to collect wrong amount,sub-issue_Billing problem,sub-issue_Can't get other flexible options for repaying your loan,sub-issue_Card was charged for something you did not purchase with the card,sub-issue_Company closed your account,sub-issue_Credit card company isn't resolving a dispute about a purchase on your statement,sub-issue_Credit inquiries on your report that you don't recognize,sub-issue_Debt is not mine,sub-issue_Debt is not yours,sub-issue_Debt resulted from identity theft,sub-issue_Debt was paid,sub-issue_Debt was result of identity theft,sub-issue_Deposits and withdrawals,sub-issue_Didn't receive enough information to verify debt,sub-issue_Didn't receive notice of right to dispute,sub-issue_Difficulty submitting a dispute or getting information about a dispute over the phone,sub-issue_Frequent or repeated calls,sub-issue_Information belongs to someone else,sub-issue_Information is missing that should be on the report,sub-issue_Information is not mine,sub-issue_Investigation took more than 30 days,sub-issue_Missing,sub-issue_No notice of investigation status/result,sub-issue_Not given enough info to verify debt,sub-issue_Old information reappears or never goes away,sub-issue_Other,sub-issue_Other problem,sub-issue_Other problem getting your report or credit score,sub-issue_Personal information incorrect,sub-issue_Problem during payment process,sub-issue_Problem using a debit or ATM card,sub-issue_Problem with fees,sub-issue_Problem with personal statement of dispute,sub-issue_Problem with statement of dispute,sub-issue_Public record,sub-issue_Public record information inaccurate,sub-issue_Received bad information about my loan,sub-issue_Received bad information about your loan,sub-issue_Reinserted previously deleted info,sub-issue_Reporting company used your report improperly,sub-issue_Right to dispute notice not received,sub-issue_Their investigation did not fix an error on your report,sub-issue_Threatened or suggested your credit would be damaged,sub-issue_Transaction was not authorized,sub-issue_Trouble with how payments are being handled,sub-issue_Trouble with how payments are handled,sub-issue_Was not notified of investigation status or results,"sub-issue_You told them to stop contacting you, but they keep trying"
0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [5]:
# Split training and testing data 
X_train, X_test, y_train, y_test = train_test_split(
    cfpb_X, cfpb_y, test_size=0.2, random_state=42)

# Config setup 
MODELS = {
    'RF': RandomForestClassifier()}  
GRID = {
    'RF': [{'n_estimators': x, 'max_depth': y} 
                           for x in (10, 100) \
                           for y in (5, 50)]} 

# Initialize results dataframe 
results_df =  pd.DataFrame(columns=('model_name', 'threshold', 'parameters', 
                                    'accuracy', 'precision', 'recall'))                    

# Iterate over thresholds and classifiers 
for threshold in [0.1, 0.2]: 
    print("Using probability threshold:", threshold)
    
    for model_key in MODELS.keys():
        for params in GRID[model_key]:
            
            print("Training model:", model_key, "|", params)

            # Fit model 
            model = MODELS[model_key]
            model.set_params(**params)
            model.fit(X_train, y_train)

            # Predict 
            y_pred = (model.predict_proba(X_test)[:,1] >= threshold).astype(bool)

            # Evaluate 
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)

            # Record results
            results_df.loc[len(results_df)] = [model_key, threshold, str(params), 
                                               accuracy, precision, recall]
        
results_df

Using probability threshold: 0.1
Training model: RF | {'n_estimators': 10, 'max_depth': 5}
Training model: RF | {'n_estimators': 10, 'max_depth': 50}
Training model: RF | {'n_estimators': 100, 'max_depth': 5}
Training model: RF | {'n_estimators': 100, 'max_depth': 50}
Using probability threshold: 0.2
Training model: RF | {'n_estimators': 10, 'max_depth': 5}
Training model: RF | {'n_estimators': 10, 'max_depth': 50}
Training model: RF | {'n_estimators': 100, 'max_depth': 5}
Training model: RF | {'n_estimators': 100, 'max_depth': 50}


Unnamed: 0,model_name,threshold,parameters,accuracy,precision,recall
0,RF,0.1,"{'n_estimators': 10, 'max_depth': 5}",0.177359,0.177334,1.0
1,RF,0.1,"{'n_estimators': 10, 'max_depth': 50}",0.59097,0.280631,0.835754
2,RF,0.1,"{'n_estimators': 100, 'max_depth': 5}",0.177329,0.177329,1.0
3,RF,0.1,"{'n_estimators': 100, 'max_depth': 50}",0.588364,0.282045,0.854929
4,RF,0.2,"{'n_estimators': 10, 'max_depth': 5}",0.743826,0.346165,0.500258
5,RF,0.2,"{'n_estimators': 10, 'max_depth': 50}",0.725664,0.353206,0.658132
6,RF,0.2,"{'n_estimators': 100, 'max_depth': 5}",0.753965,0.35743,0.485677
7,RF,0.2,"{'n_estimators': 100, 'max_depth': 50}",0.732454,0.36121,0.662036


In [6]:
# Get feature importance
best_model = RandomForestClassifier(n_estimators=100, max_depth=50)
best_model.fit(X_train, y_train)

coefs = pd.DataFrame({'feature': X_train.columns.values, 
                      'coef': best_model.feature_importances_.ravel()})
coefs.sort_values(by='coef', ascending=False)[:10]

Unnamed: 0,feature,coef
62,company_Experian Information Solutions Inc.,0.06634
92,company_Other,0.025211
0,tags_Missing,0.022388
119,"company_TRANSUNION INTERMEDIATE HOLDINGS, INC.",0.018583
3,tags_Servicemember,0.017007
242,state_FL,0.015659
237,state_CA,0.015436
1,tags_Older American,0.014222
314,sub-issue_Information belongs to someone else,0.014081
283,state_TX,0.014019
