# Random Forest Regression - Predição do Peso da Carcaça

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix
from sklearn import metrics
from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv('../../dados/dadosTransformados_v1.csv', sep=';')

In [3]:
df.shape

(960939, 53)

In [4]:
pd.set_option('display.max_columns',999)
pd.set_option("display.max_rows", 999)

In [5]:
df.head(5)

Unnamed: 0,tipificacao_1,tipificacao_2,tipificacao_3,maturidade,acabamento,peso,classificacao_estabelecimento,outros_incentivos,pratica_recuperacao_pastagem,fertiirrigacao,ilp,ilpf,ifp,fabrica_racao,identificacao_individual,regua_de_manejo,bpa,participa_aliancas_mercadologicas,rastreamento_sisbov,tot1m_chuva,med1m_formituinst,med1m_ndvi,med1m_prer_soja,med1m_prer_milho,med1m_prer_boi,tot3m_chuva,med3m_formituinst,med3m_ndvi,med3m_prer_soja,med3m_prer_milho,med3m_prer_boi,tot6m_chuva,med6m_formituinst,med6m_ndvi,med6m_prer_soja,med6m_prer_milho,med6m_prer_boi,tot12m_chuva,med12m_formituinst,med12m_ndvi,med12m_prer_soja,med12m_prer_milho,med12m_prer_boi,cnt1m_cl_ituinst,cnt3m_cl_ituinst,cnt6m_cl_ituinst,cnt12m_cl_ituinst,ano,categoria,classificacao,longitude,latitude,mes
0,1,0,0,1,3,345.4,26.0,0,0,0,0,0,0,1,1,1,0,1,1,197.61,72.22,0.64,74.68,36.31,145.47,601.79,72.21,0.63,75.32,36.04,147.1,1190.49,72.21,0.63,75.32,36.04,147.1,2012.51,74.54,0.63,75.32,36.04,147.1,0.6129,0.5882,0.5882,0.5882,2017,1,1,-54.36319,-19.92092,2
1,1,0,0,1,3,268.8,26.0,0,0,0,0,0,0,1,1,1,0,1,1,197.61,72.22,0.64,74.68,36.31,145.47,601.79,72.21,0.63,75.32,36.04,147.1,1190.49,72.21,0.63,75.32,36.04,147.1,2012.51,74.54,0.63,75.32,36.04,147.1,0.6129,0.5882,0.5882,0.5882,2017,1,1,-54.36319,-19.92092,2
2,1,0,0,1,3,288.4,26.0,0,0,0,0,0,0,1,1,1,0,1,1,197.61,72.22,0.64,74.68,36.31,145.47,601.79,72.21,0.63,75.32,36.04,147.1,1190.49,72.21,0.63,75.32,36.04,147.1,2012.51,74.54,0.63,75.32,36.04,147.1,0.6129,0.5882,0.5882,0.5882,2017,1,1,-54.36319,-19.92092,2
3,1,0,0,1,3,357.2,26.0,0,0,0,0,0,0,1,1,1,0,1,1,197.61,72.22,0.64,74.68,36.31,145.47,601.79,72.21,0.63,75.32,36.04,147.1,1190.49,72.21,0.63,75.32,36.04,147.1,2012.51,74.54,0.63,75.32,36.04,147.1,0.6129,0.5882,0.5882,0.5882,2017,1,1,-54.36319,-19.92092,2
4,1,0,0,1,3,267.6,26.0,0,0,0,0,0,0,1,1,1,0,1,1,197.61,72.22,0.64,74.68,36.31,145.47,601.79,72.21,0.63,75.32,36.04,147.1,1190.49,72.21,0.63,75.32,36.04,147.1,2012.51,74.54,0.63,75.32,36.04,147.1,0.6129,0.5882,0.5882,0.5882,2017,1,1,-54.36319,-19.92092,2


## Criando conjunto de treino e de teste

In [20]:
y = df['peso']
X = df.drop('peso',axis=1)

In [21]:
SEED = 43

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

In [23]:
X_train.shape

(672657, 52)

## Treinando o Modelo

In [24]:
model = RandomForestRegressor(n_estimators = 100, random_state = 0)
model.fit(X, y)  

KeyboardInterrupt: 

## Previsões dados de treino

In [None]:
y_predicted_train = model.predict(X_train)
y_predicted = model.predict(X_test)

## Analisando o Modelo

In [None]:
print(model.score(X_test, y_test))

In [None]:
print(np.sqrt(mean_squared_error(y_test, y_predicted)))

In [None]:
print('Mean Absolute Error (MAE):', metrics.mean_absolute_error(y_test, y_predicted))
print('Mean Squared Error (MSE):', metrics.mean_squared_error(y_test, y_predicted))
print('Root Mean Squared Error (RMSEp):', np.sqrt(metrics.mean_squared_error(y_test, y_predicted)))

## Validação do Modelo

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate

In [None]:
SEED = 1580
np.random.seed(SEED)
cv = KFold(n_splits = 10, shuffle = True)
results = cross_validate(model, X, y,cv=cv, return_train_score= False)

In [None]:
def imprime_resulado(results) :
    media = results ['test_score'].mean()
    print('Accuracy médio %.2f' % (media*100))
    desvio_padrao = results['test_score'].std()
    print("Accuracy intervalo [ %.2f, %.2f ]" % (media - 2 * desvio_padrao, media + 2 * desvio_padrao))
    

In [None]:
imprime_resulado(results)