# Bosques aleatorios

In [1]:
import numpy as np
import pandas as pd

## Importando datos

In [2]:
mammographic_standard = pd.read_csv('./processed/mammographic_masses_standard.csv')
mammographic_standard.head(10)

Unnamed: 0,Age,Shape,Margin,Density,Severity
0,0.765804,0.17446,1.395631,0.240313,1
1,0.151666,0.979883,1.395631,0.240313,1
2,-1.895458,-1.436386,-1.158927,0.240313,0
3,0.083429,-1.436386,1.395631,0.240313,1
4,1.379941,-1.436386,0.756992,0.240313,1
5,-0.940133,-0.630963,-1.158927,0.240313,1
6,-1.349558,0.17446,-1.158927,-2.612545,0
7,0.288141,-0.630963,-1.158927,-2.612545,0
8,-0.121284,-1.436386,-1.158927,0.240313,0
9,-0.257759,0.17446,0.756992,0.240313,0


## Selección de datos de prueba

In [3]:
df_feat = mammographic_standard.drop('Severity', axis = 1)
df_feat.head()

Unnamed: 0,Age,Shape,Margin,Density
0,0.765804,0.17446,1.395631,0.240313
1,0.151666,0.979883,1.395631,0.240313
2,-1.895458,-1.436386,-1.158927,0.240313
3,0.083429,-1.436386,1.395631,0.240313
4,1.379941,-1.436386,0.756992,0.240313


In [4]:
df_target = mammographic_standard['Severity'].to_frame()
df_target.head()

Unnamed: 0,Severity
0,1
1,1
2,0
3,1
4,1


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_feat, np.ravel(df_target), test_size=0.25, random_state=42)

## Entrenamiento del bosque

In [6]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=100,random_state=101,criterion='gini')
rfc.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=101, verbose=0, warm_start=False)

In [7]:
rfc_pred = rfc.predict(X_test)

## Prueba del modelo

In [9]:
from sklearn.metrics import classification_report,confusion_matrix

In [10]:
print(confusion_matrix(y_test,rfc_pred))

[[79 27]
 [18 84]]


In [11]:
print(classification_report(y_test,rfc_pred))

              precision    recall  f1-score   support

           0       0.81      0.75      0.78       106
           1       0.76      0.82      0.79       102

   micro avg       0.78      0.78      0.78       208
   macro avg       0.79      0.78      0.78       208
weighted avg       0.79      0.78      0.78       208

