In [91]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import xgboost as xgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)

In [92]:
df = pd.read_csv('./dataset/brca_metabric_clinical_data.tsv', sep='\t')
df.to_csv('./dataset/brca.csv')

In [93]:
df.drop(columns=['Study ID', 'Patient ID', 'Sample ID'], inplace=True)

In [94]:
n_nan_columns = df.isnull().sum()
print(n_nan_columns.head(50))

Age at Diagnosis                   11
Type of Breast Surgery            554
Cancer Type                         0
Cancer Type Detailed                0
Cellularity                       592
Chemotherapy                      529
Pam50 + Claudin-low subtype       529
Cohort                             11
ER status measured by IHC          83
ER Status                          40
Neoplasm Histologic Grade         121
HER2 status measured by SNP6      529
HER2 Status                       529
Tumor Other Histologic Subtype    135
Hormone Therapy                   529
Inferred Menopausal State         529
Integrative Cluster               529
Primary Tumor Laterality          639
Lymph nodes examined positive     266
Mutation Count                    151
Nottingham prognostic index       222
Oncotree Code                       0
Overall Survival (Months)         528
Overall Survival Status           528
PR Status                         529
Radio Therapy                     529
Relapse Free

In [95]:
df.dropna(inplace=True)

In [96]:
df

Unnamed: 0,Age at Diagnosis,Type of Breast Surgery,Cancer Type,Cancer Type Detailed,Cellularity,Chemotherapy,Pam50 + Claudin-low subtype,Cohort,ER status measured by IHC,ER Status,Neoplasm Histologic Grade,HER2 status measured by SNP6,HER2 Status,Tumor Other Histologic Subtype,Hormone Therapy,Inferred Menopausal State,Integrative Cluster,Primary Tumor Laterality,Lymph nodes examined positive,Mutation Count,Nottingham prognostic index,Oncotree Code,Overall Survival (Months),Overall Survival Status,PR Status,Radio Therapy,Relapse Free Status (Months),Relapse Free Status,Number of Samples Per Patient,Sample Type,Sex,3-Gene classifier subtype,TMB (nonsynonymous),Tumor Size,Tumor Stage,Patient's Vital Status
1,43.19,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,High,NO,LumA,1.0,Positve,Positive,3.0,NEUTRAL,Negative,Ductal/NST,YES,Pre,4ER+,Right,0.0,2.0,4.020,IDC,84.633333,0:LIVING,Positive,YES,83.52,0:Not Recurred,1,Primary,Female,ER+/HER2- High Prolif,2.615035,10.0,1.0,Living
4,76.97,MASTECTOMY,Breast Cancer,Breast Mixed Ductal and Lobular Carcinoma,High,YES,LumB,1.0,Positve,Positive,3.0,NEUTRAL,Negative,Mixed,YES,Post,9,Right,8.0,2.0,6.080,MDLC,41.366667,1:DECEASED,Positive,YES,18.55,1:Recurred,1,Primary,Female,ER+/HER2- High Prolif,2.615035,40.0,2.0,Died of Disease
5,78.77,MASTECTOMY,Breast Cancer,Breast Invasive Ductal Carcinoma,Moderate,NO,LumB,1.0,Positve,Positive,3.0,NEUTRAL,Negative,Ductal/NST,YES,Post,7,Left,0.0,4.0,4.062,IDC,7.800000,1:DECEASED,Positive,YES,2.89,1:Recurred,1,Primary,Female,ER+/HER2- High Prolif,5.230071,31.0,4.0,Died of Disease
10,86.41,BREAST CONSERVING,Breast Cancer,Breast Invasive Ductal Carcinoma,Moderate,NO,LumB,1.0,Positve,Positive,3.0,GAIN,Negative,Ductal/NST,YES,Post,9,Right,1.0,4.0,5.032,IDC,36.566667,1:DECEASED,Negative,YES,36.09,0:Not Recurred,1,Primary,Female,ER+/HER2- High Prolif,5.230071,16.0,2.0,Died of Other Causes
11,84.22,MASTECTOMY,Breast Cancer,Breast Invasive Lobular Carcinoma,High,NO,Her2,1.0,Negative,Positive,2.0,LOSS,Negative,Lobular,NO,Post,3,Left,0.0,5.0,3.056,ILC,36.266667,1:DECEASED,Negative,NO,35.79,1:Recurred,1,Primary,Female,ER+/HER2- High Prolif,6.537589,28.0,2.0,Died of Disease
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1697,71.22,MASTECTOMY,Breast Cancer,Breast Mixed Ductal and Lobular Carcinoma,High,NO,LumA,5.0,Positve,Positive,2.0,NEUTRAL,Negative,Mixed,YES,Post,3,Left,4.0,11.0,5.060,MDLC,85.000000,1:DECEASED,Positive,NO,83.88,0:Not Recurred,1,Primary,Female,ER+/HER2- Low Prolif,14.382695,30.0,2.0,Died of Other Causes
1698,70.65,BREAST CONSERVING,Breast Cancer,Breast Mixed Ductal and Lobular Carcinoma,High,NO,LumB,5.0,Positve,Positive,1.0,NEUTRAL,Negative,Mixed,NO,Post,8,Left,0.0,9.0,2.040,MDLC,201.166667,0:LIVING,Positive,YES,198.52,0:Not Recurred,1,Primary,Female,ER+/HER2- Low Prolif,11.767659,20.0,1.0,Living
1700,75.62,MASTECTOMY,Breast Cancer,Breast Invasive Ductal Carcinoma,High,NO,Basal,5.0,Negative,Negative,3.0,GAIN,Negative,Ductal/NST,NO,Post,10,Right,0.0,4.0,4.040,IDC,105.200000,1:DECEASED,Negative,NO,103.82,0:Not Recurred,1,Primary,Female,ER-/HER2-,5.230071,20.0,1.0,Died of Other Causes
1702,52.84,BREAST CONSERVING,Breast Cancer,Breast Mixed Ductal and Lobular Carcinoma,High,YES,Normal,5.0,Positve,Positive,2.0,NEUTRAL,Negative,Mixed,NO,Post,8,Right,6.0,5.0,5.040,MDLC,200.333333,0:LIVING,Positive,YES,197.70,0:Not Recurred,1,Primary,Female,ER+/HER2- High Prolif,6.537589,20.0,2.0,Living


In [97]:
X = df.iloc[:, :-2]
y = df.iloc[:, -2]

In [98]:
X = pd.get_dummies(X)

In [99]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [100]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

accuracy


0.8904109589041096

In [101]:
svm_model = SVC(kernel='rbf')
svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

accuracy_svm

0.7442922374429224

In [103]:
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
accuracies = []

for kernel in kernels:
    svm_model = SVC(kernel=kernel)
    svm_model.fit(X_train, y_train)
    y_pred_svm = svm_model.predict(X_test)
    accuracy_svm = accuracy_score(y_test, y_pred_svm)
    accuracies.append(accuracy_svm)
    print(kernel, accuracy_svm)

best_kernel = kernels[accuracies.index(max(accuracies))]
best_accuracy = max(accuracies)

best_kernel, best_accuracy


linear 0.7945205479452054
poly 0.6757990867579908
rbf 0.7442922374429224
sigmoid 0.7945205479452054


('linear', 0.7945205479452054)

In [90]:
correlation_matrix = X.corr()
correlation_matrix
# highly_correlated_features = correlation_matrix[abs(correlation_matrix) > 0.8].stack().index.tolist()


Unnamed: 0,Age at Diagnosis,Cohort,Neoplasm Histologic Grade,Lymph nodes examined positive,Mutation Count,Nottingham prognostic index,Overall Survival (Months),Relapse Free Status (Months),Number of Samples Per Patient,TMB (nonsynonymous),Tumor Size,Type of Breast Surgery_BREAST CONSERVING,Type of Breast Surgery_MASTECTOMY,Cancer Type_Breast Cancer,Cancer Type Detailed_Breast,Cancer Type Detailed_Breast Invasive Ductal Carcinoma,Cancer Type Detailed_Breast Invasive Lobular Carcinoma,Cancer Type Detailed_Breast Invasive Mixed Mucinous Carcinoma,Cancer Type Detailed_Breast Mixed Ductal and Lobular Carcinoma,Cellularity_High,Cellularity_Low,Cellularity_Moderate,Chemotherapy_NO,Chemotherapy_YES,Pam50 + Claudin-low subtype_Basal,Pam50 + Claudin-low subtype_Her2,Pam50 + Claudin-low subtype_LumA,Pam50 + Claudin-low subtype_LumB,Pam50 + Claudin-low subtype_NC,Pam50 + Claudin-low subtype_Normal,Pam50 + Claudin-low subtype_claudin-low,ER status measured by IHC_Negative,ER status measured by IHC_Positve,ER Status_Negative,ER Status_Positive,HER2 status measured by SNP6_GAIN,HER2 status measured by SNP6_LOSS,HER2 status measured by SNP6_NEUTRAL,HER2 status measured by SNP6_UNDEF,HER2 Status_Negative,HER2 Status_Positive,Tumor Other Histologic Subtype_Ductal/NST,Tumor Other Histologic Subtype_Lobular,Tumor Other Histologic Subtype_Medullary,Tumor Other Histologic Subtype_Mixed,Tumor Other Histologic Subtype_Mucinous,Tumor Other Histologic Subtype_Other,Tumor Other Histologic Subtype_Tubular/ cribriform,Hormone Therapy_NO,Hormone Therapy_YES,Inferred Menopausal State_Post,Inferred Menopausal State_Pre,Integrative Cluster_1,Integrative Cluster_10,Integrative Cluster_2,Integrative Cluster_3,Integrative Cluster_4ER+,Integrative Cluster_4ER-,Integrative Cluster_5,Integrative Cluster_6,Integrative Cluster_7,Integrative Cluster_8,Integrative Cluster_9,Primary Tumor Laterality_Left,Primary Tumor Laterality_Right,Oncotree Code_BREAST,Oncotree Code_IDC,Oncotree Code_ILC,Oncotree Code_IMMC,Oncotree Code_MDLC,Overall Survival Status_0:LIVING,Overall Survival Status_1:DECEASED,PR Status_Negative,PR Status_Positive,Radio Therapy_NO,Radio Therapy_YES,Relapse Free Status_0:Not Recurred,Relapse Free Status_1:Recurred,Sample Type_Primary,Sex_Female,3-Gene classifier subtype_ER+/HER2- High Prolif,3-Gene classifier subtype_ER+/HER2- Low Prolif,3-Gene classifier subtype_ER-/HER2-,3-Gene classifier subtype_HER2+
Age at Diagnosis,1.000000,0.031611,-0.096408,0.025244,0.032214,-0.026063,-0.163522,-0.098289,,0.031076,0.064657,-0.106074,0.106074,,-0.018473,-0.050553,0.045938,0.001387,0.030238,0.001852,-0.002958,0.000026,0.413432,-0.413432,-0.131482,-0.083122,0.065528,0.176390,0.022667,-0.109099,-0.070598,-0.257898,0.257898,-0.218024,0.218024,-0.093138,0.032310,0.068773,0.019469,0.133792,-0.133792,-0.037677,0.045938,-0.011374,0.030238,0.001387,-0.018473,-0.029287,-0.284142,0.284142,0.746013,-0.746013,-0.062088,-0.166168,0.077695,0.047245,0.016625,-0.029235,-0.119722,0.026465,0.099650,0.063043,0.054816,0.038351,-0.038351,-0.018473,-0.050553,0.045938,0.001387,0.030238,-0.300358,0.300358,-0.052677,0.052677,0.079122,-0.079122,0.061335,-0.061335,,,0.131150,0.057212,-0.130077,-0.132104
Cohort,0.031611,1.000000,-0.011275,-0.137843,0.304067,-0.113454,0.193670,0.141868,,0.304784,-0.059629,0.058305,-0.058305,,0.026939,-0.059870,-0.007468,-0.016526,0.079494,0.027417,-0.031333,-0.007769,0.233979,-0.233979,0.064224,0.025519,0.000452,-0.006717,0.012529,-0.003997,-0.078249,0.059014,-0.059014,0.029173,-0.029173,0.013440,-0.036341,0.002125,0.034957,-0.015861,0.015861,-0.068352,-0.007468,0.045367,0.079494,-0.016526,0.026939,-0.002558,0.243939,-0.243939,0.014575,-0.014575,0.007516,0.002592,-0.024595,0.029715,-0.018073,0.000879,0.017372,-0.029885,0.018236,0.012805,-0.044366,0.036630,-0.036630,0.026939,-0.059870,-0.007468,-0.016526,0.079494,-0.194617,0.194617,-0.001887,0.001887,0.164449,-0.164449,-0.055598,0.055598,,,-0.032474,-0.012496,0.038784,0.022289
Neoplasm Histologic Grade,-0.096408,-0.011275,1.000000,0.163060,0.033601,0.715346,-0.110314,-0.082108,,0.033176,0.150270,-0.064222,0.064222,,-0.004902,0.214050,-0.086265,-0.103508,-0.162674,0.089392,-0.033127,-0.070513,-0.289945,0.289945,0.240002,0.188619,-0.418005,0.132144,0.003639,-0.037689,0.110936,0.373192,-0.373192,0.376639,-0.376639,0.213571,0.045857,-0.224890,0.037310,-0.220448,0.220448,0.241024,-0.086265,0.082332,-0.162674,-0.103508,-0.004902,-0.206435,-0.009460,0.009460,-0.068646,0.068646,0.133056,0.269340,-0.010856,-0.226842,-0.142657,0.139187,0.204752,0.062732,-0.158125,-0.213258,0.130728,0.027605,-0.027605,-0.004902,0.214050,-0.086265,-0.103508,-0.162674,-0.080712,0.080712,0.333348,-0.333348,-0.127007,0.127007,-0.114132,0.114132,,,0.110993,-0.494213,0.305102,0.224915
Lymph nodes examined positive,0.025244,-0.137843,0.163060,1.000000,-0.076690,0.618333,-0.256570,-0.247280,,-0.076893,0.337898,-0.213232,0.213232,,-0.026239,0.013462,0.026514,-0.051679,-0.014828,0.030271,-0.016692,-0.020295,-0.310180,0.310180,0.000247,0.119226,-0.123024,0.042434,-0.004331,0.002496,0.023928,0.088985,-0.088985,0.109486,-0.109486,0.099647,0.034736,-0.108376,-0.015459,-0.130406,0.130406,0.021242,0.026514,0.014127,-0.014828,-0.051679,-0.026239,-0.043614,-0.150111,0.150111,0.024420,-0.024420,-0.008798,-0.018339,0.069992,-0.068771,-0.032725,0.046140,0.124722,0.029750,-0.048859,-0.052076,0.040595,-0.024418,0.024418,-0.026239,0.013462,0.026514,-0.051679,-0.014828,-0.165401,0.165401,0.119425,-0.119425,-0.149248,0.149248,-0.224109,0.224109,,,0.030741,-0.108475,0.029491,0.084438
Mutation Count,0.032214,0.304067,0.033601,-0.076690,1.000000,-0.023916,0.057572,0.035709,,0.999347,0.016239,-0.001849,0.001849,,-0.019269,-0.031113,0.028770,-0.018355,0.026102,0.084782,-0.077187,-0.036919,0.095311,-0.095311,0.063145,0.162372,-0.008113,-0.016513,-0.005437,-0.085328,-0.113920,0.082602,-0.082602,0.075710,-0.075710,-0.014896,-0.037122,0.034890,-0.027882,-0.022731,0.022731,-0.031820,0.028770,0.025887,0.026102,-0.018355,-0.019269,-0.017040,0.156473,-0.156473,0.028280,-0.028280,-0.009654,0.018528,-0.019449,0.054902,-0.069042,0.062100,0.020309,-0.069975,0.006945,-0.005772,0.003944,0.013354,-0.013354,-0.019269,-0.031113,0.028770,-0.018355,0.026102,-0.126762,0.126762,0.080512,-0.080512,0.100676,-0.100676,-0.052842,0.052842,,,0.026604,-0.066118,0.034142,0.020166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Sex_Female,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3-Gene classifier subtype_ER+/HER2- High Prolif,0.131150,-0.032474,0.110993,0.030741,0.026604,0.102282,-0.004889,-0.021539,,0.025806,0.082248,-0.051599,0.051599,,0.034974,0.034417,-0.061491,0.060136,-0.021794,0.143560,-0.131814,-0.061785,0.130205,-0.130205,-0.217593,-0.064146,-0.154456,0.575160,0.057931,-0.135054,-0.207271,-0.339115,0.339115,-0.357457,0.357457,-0.056300,0.062574,0.015854,0.057931,0.200643,-0.200643,0.055060,-0.061491,0.007149,-0.021794,0.060136,0.034974,-0.087263,-0.240636,0.240636,0.117117,-0.117117,0.245293,-0.184671,0.159638,-0.134123,-0.115013,-0.133985,-0.211488,0.198479,0.077122,0.056340,0.211801,0.026570,-0.026570,0.034974,0.034417,-0.061491,0.060136,-0.021794,-0.104441,0.104441,-0.146363,0.146363,0.001107,-0.001107,-0.058931,0.058931,,,1.000000,-0.562171,-0.333944,-0.261020
3-Gene classifier subtype_ER+/HER2- Low Prolif,0.057212,-0.012496,-0.494213,-0.108475,-0.066118,-0.381150,0.102288,0.083431,,-0.065012,-0.113122,0.045279,-0.045279,,-0.023410,-0.211800,0.109843,-0.013352,0.185630,-0.181218,0.093331,0.125814,0.221268,-0.221268,-0.244039,-0.229596,0.591203,-0.367561,-0.032567,0.173765,-0.117168,-0.350830,0.350830,-0.382678,0.382678,-0.276266,-0.021653,0.270772,-0.032567,0.261174,-0.261174,-0.229058,0.109843,-0.065927,0.185630,-0.013352,-0.023410,0.155225,-0.037390,0.037390,0.040393,-0.040393,-0.157435,-0.270874,-0.064815,0.362775,0.209008,-0.097661,-0.250582,-0.074420,0.105508,0.167922,-0.170592,-0.035476,0.035476,-0.023410,-0.211800,0.109843,-0.013352,0.185630,0.128983,-0.128983,-0.326667,0.326667,0.078737,-0.078737,0.125942,-0.125942,,,-0.562171,1.000000,-0.343367,-0.268386
3-Gene classifier subtype_ER-/HER2-,-0.130077,0.038784,0.305102,0.029491,0.034142,0.188499,-0.032867,-0.003577,,0.033278,0.006156,0.077731,-0.077731,,0.005530,0.116038,-0.038571,-0.027064,-0.105646,0.018585,0.061500,-0.059423,-0.307108,0.307108,0.610054,-0.009360,-0.373131,-0.254467,-0.019346,-0.055767,0.452256,0.656377,-0.656377,0.703126,-0.703126,-0.128496,0.016744,0.113079,-0.019346,0.109385,-0.109385,0.098553,-0.038571,0.107988,-0.105646,-0.027064,0.005530,-0.053299,0.283298,-0.283298,-0.101407,0.101407,-0.057642,0.673048,-0.082557,-0.167734,-0.036703,0.288806,-0.124218,-0.102003,-0.134933,-0.170973,-0.017992,0.000546,-0.000546,0.005530,0.116038,-0.038571,-0.027064,-0.105646,0.004932,-0.004932,0.423928,-0.423928,-0.099098,0.099098,0.005364,-0.005364,,,-0.333944,-0.343367,1.000000,-0.159428


In [104]:
from sklearn.decomposition import PCA

pca = PCA(n_components=5)
X_pca = pca.fit_transform(X)

X_pca


array([[-1.23665044,  0.2202125 , -3.09361505,  1.02745183,  1.77974988],
       [-1.59454971, -1.76233055,  4.08218269, -3.64502699,  2.05577789],
       [-1.17956251, -3.58123623, -0.62675025, -2.4791757 ,  0.1068351 ],
       ...,
       [ 4.42243899,  2.02382307, -0.11360488, -1.54316362, -2.42986336],
       [-2.51213788,  3.17292885,  2.08430487,  0.60336875,  0.65321312],
       [-4.10198652,  3.1148563 , -0.62134321,  3.24305329,  6.25205119]])

In [105]:
explained_variance = pca.explained_variance_ratio_
variance_covered = sum(explained_variance[:5]) * 100
variance_covered

35.529029449549874

In [86]:
le = LabelEncoder()
y_train_transformed = le.fit_transform(y_train)
y_test_transformed = le.fit_transform(y_test)

print(y_train_transformed)
print(y_train)


xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_train, y_train_transformed)

y_pred_xgb = xgb_model.predict(y_test_transformed)
accuracy_xgb = accuracy_score(y_test_transformed, y_pred_xgb)

accuracy_xgb

[1 1 0 0 1 0 1 2 0 2 0 0 1 0 0 0 1 1 0 1 1 1 1 2 1 0 1 1 1 0 0 1 1 1 0 1 1
 1 1 0 1 1 0 0 0 0 1 1 1 1 1 0 0 3 0 1 1 1 1 1 0 1 1 0 0 1 1 1 2 1 0 1 1 0
 0 1 1 2 1 2 1 0 1 0 0 2 1 1 0 1 0 1 1 0 1 0 1 1 0 1 0 1 1 1 1 1 0 0 0 1 0
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 0 0 0 3 2 1 1 1 1 1 1 0 0 0 0
 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 2 1 0 1 0 2 0 1 1 0 1 0 0 0 1 1 0 0 1 2 1 1
 0 2 1 1 2 1 1 1 1 1 1 0 1 1 1 1 1 2 1 1 1 0 1 1 0 0 1 2 1 1 0 1 1 2 0 1 1
 1 1 1 0 1 0 2 0 1 1 1 0 0 0 1 2 0 0 0 1 1 0 0 1 0 0 1 0 1 1 0 0 1 1 0 0 1
 0 0 1 0 1 2 1 2 1 0 0 1 1 0 1 1 0 1 1 0 0 1 0 1 0 1 0 1 0 0 0 1 0 0 0 1 1
 1 1 1 1 1 1 2 1 1 1 0 1 0 1 1 2 1 0 0 1 0 1 0 0 0 0 0 1 1 1 0 1 2 1 1 1 1
 0 1 0 1 1 0 1 1 0 1 1 1 2 0 2 1 0 2 1 1 0 0 1 1 0 0 1 1 1 2 1 0 1 0 0 0 1
 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 0 1 1 1 0 0 1 1 1 0 0 0 1 0 1
 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 1 2 0 1 1 1 1 0 1 0 1 1
 1 0 0 1 1 1 1 1 1 0 1 2 0 1 2 1 0 0 1 1 0 0 0 2 1 1 1 1 1 0 0 0 1 0 1 0 1
 1 2 1 0 0 1 1 1 1 0 0 1 

XGBoostError: [11:54:40] /Users/runner/work/xgboost/xgboost/src/predictor/cpu_predictor.cc:719: Check failed: m->NumColumns() == model.learner_model_param->num_feature (1 vs. 84) : Number of columns in data must equal to trained model.
Stack trace:
  [bt] (0) 1   libxgboost.dylib                    0x0000000145edb355 dmlc::LogMessageFatal::~LogMessageFatal() + 117
  [bt] (1) 2   libxgboost.dylib                    0x0000000146123497 void xgboost::predictor::CPUPredictor::DispatchedInplacePredict<xgboost::data::ArrayAdapter, 64ul>(std::__1::any const&, std::__1::shared_ptr<xgboost::DMatrix>, xgboost::gbm::GBTreeModel const&, float, xgboost::PredictionCacheEntry*, unsigned int, unsigned int) const + 407
  [bt] (2) 3   libxgboost.dylib                    0x0000000146118147 xgboost::predictor::CPUPredictor::InplacePredict(std::__1::shared_ptr<xgboost::DMatrix>, xgboost::gbm::GBTreeModel const&, float, xgboost::PredictionCacheEntry*, unsigned int, unsigned int) const + 1463
  [bt] (3) 4   libxgboost.dylib                    0x000000014602de6c xgboost::gbm::GBTree::InplacePredict(std::__1::shared_ptr<xgboost::DMatrix>, float, xgboost::PredictionCacheEntry*, int, int) const + 444
  [bt] (4) 5   libxgboost.dylib                    0x000000014604f4ad xgboost::LearnerImpl::InplacePredict(std::__1::shared_ptr<xgboost::DMatrix>, xgboost::PredictionType, float, xgboost::HostDeviceVector<float>**, int, int) + 141
  [bt] (5) 6   libxgboost.dylib                    0x0000000145f00986 InplacePredictImpl(std::__1::shared_ptr<xgboost::DMatrix>, char const*, xgboost::Learner*, unsigned long long const**, unsigned long long*, float const**) + 278
  [bt] (6) 7   libxgboost.dylib                    0x0000000145f012f3 XGBoosterPredictFromDense + 483
  [bt] (7) 8   libffi.8.dylib                      0x000000010ee49972 ffi_call_unix64 + 82

