In [65]:
# Importar bibliotecas 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, ExtraTreesRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold, cross_val_score

# Constantes
TESTSIZE = 0.2
RANDOMSTATE = 42

# Leitura dos dados
df = pd.read_csv('housing.csv', sep=',', decimal='.')

# Separar entre features e labels
X=df.drop(columns='MEDV')
y=df.loc[:,'MEDV']

# Separar entre dados de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TESTSIZE, random_state=RANDOMSTATE)

# Lista de nomes e Instanciamento
grad_boost=GradientBoostingRegressor(random_state=RANDOMSTATE)
rand_forest=RandomForestRegressor(random_state=RANDOMSTATE)
extra_trees=ExtraTreesRegressor(random_state=RANDOMSTATE)

lista_model=[('Gradient Boosting', grad_boost),
             ('Random Forest', rand_forest), 
             ('Extra Trees', extra_trees)]

kfold=KFold(n_splits=10, shuffle=True, random_state=RANDOMSTATE)

In [63]:
# Treinando o Modelo e Métricas
for name, model in lista_model:
    model.fit (X_train, y_train)
    y_pred=model.predict(X_test)
    rmse=mean_squared_error(y_test, y_pred)
    print (f'{name} MSE: {rmse:.2f}')

Gradient Boosting MSE: 3359779588.84
Random Forest MSE: 3418694550.00
Extra Trees MSE: 4115725650.00


In [64]:
# Treinando o Modelo e Métricas com cross-validation
for name, model in lista_model:
    scores = cross_val_score(model, X_train, y_train, cv=kfold, scoring='neg_mean_squared_error')
    print (f'Métricas do estimador {name}')
    print (f'Média              : {-scores.mean():2f}')
    print (f'Desvio Padrão      : {scores.std():2f}')
    print ('--------')

Métricas do estimador Gradient Boosting
Média              : 4762555625.676668
Desvio Padrão      : 1218851699.439169
--------
Métricas do estimador Random Forest
Média              : 5516809559.423078
Desvio Padrão      : 1152555391.102071
--------
Métricas do estimador Extra Trees
Média              : 5528829789.000000
Desvio Padrão      : 1204553554.861484
--------


## Manipulação Dados e Correção

In [73]:
# Importando biblioteca pandas
import pandas as pd

# variavel endereço e nome do arquivo
ender_arquivo='dados_2.csv'

# leitura dos dados
df = pd.read_csv(ender_arquivo, sep=';')
df

Unnamed: 0,Feature_0,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Label
0,-0.137864,1.532235,2.196646,0.729666,0.767479,-1.226179,-1.560715,0.681743,0.979455,0.962503,0
1,-1.579388,2.229179,0.227496,0.227534,0.635968,1.609521,1.577512,-0.474178,-3.391835,-0.771772,1
2,-0.177830,0.940990,0.585904,-1.391726,-0.736770,-1.406815,-0.483805,-0.322028,-0.703056,1.325432,0
3,-0.391865,1.320738,2.015275,-1.018709,1.290644,-1.889649,-0.404848,0.363116,-0.923430,0.311110,0
4,0.716359,0.533305,1.483744,-2.165546,-0.734811,-0.640154,-2.329548,-1.085748,1.428992,0.029796,1
...,...,...,...,...,...,...,...,...,...,...,...
995,-0.849169,1.073884,-0.385673,-0.113270,0.434503,-1.334611,1.026824,1.040062,-2.035535,-0.739771,1
996,-1.005058,1.234090,-0.459657,0.158606,-1.079842,0.918915,1.224641,1.193113,-2.273007,1.021515,1
997,-0.700731,1.491741,0.472630,0.180478,-1.366858,0.018017,0.084138,-0.863494,-1.128284,-0.031203,0
998,-0.483023,0.827078,-0.671721,0.101566,-1.254161,-0.276889,0.303358,-1.315441,-0.923523,1.309444,0


In [74]:
# Importando biblioteca pandas
import pandas as pd

# variavel endereço e nome do arquivo
ender_arquivo='dados_3.csv'

# leitura dos dados
df = pd.read_csv(ender_arquivo, sep=' ')
df

Unnamed: 0,Feature_0,Feature_1,Feature_2,Feature_3,Feature_4,Label
0,1.901371,0.858262,1.584843,1.130733,-1.522889,1
1,0.600063,0.225434,0.520463,-0.074065,-0.255904,1
2,-1.832360,2.199767,-1.689085,2.345099,-0.323531,1
3,-1.839614,0.741564,-1.688487,2.199702,-0.244150,1
4,-0.939304,0.110936,-0.857044,1.014978,-0.068246,0
...,...,...,...,...,...,...
2995,-0.759921,1.353863,-0.615804,-0.825844,0.803644,0
2996,-1.187728,-0.066599,-1.088481,1.384687,-0.139106,1
2997,1.539066,0.850768,1.316687,0.196862,-0.858073,0
2998,1.552319,0.177792,1.266162,1.512109,-1.550442,1


In [96]:
# Importando biblioteca pandas
import pandas as pd

# variavel endereço e nome do arquivo
ender_arquivo='dados_4.csv'

# leitura dos dados
df = pd.read_csv(ender_arquivo, sep='\t')
df

Unnamed: 0,-0.8489047958996515,-2.1929335980140996,-1.0439204483042874,-1.0109765681430902,0.04967923444446559,1.0325366448748625,-0.12252434423634756,2.4436145259057063,-0.30100583659157576,-0.8149833399034592,3
0,-0.919321,1.456459,0.147485,-3.275885,-0.333815,1.000514,0.004069,-0.360054,-0.113266,1.690327,3
1,-1.295318,0.411843,-1.159148,-2.495045,-1.221641,0.369156,-1.533276,0.941197,0.081715,1.801694,1
2,-0.186614,-0.384290,0.441852,-0.139822,-0.581831,0.628671,-0.786553,0.703124,0.718486,1.241956,1
3,0.133624,-0.700827,-0.605545,-1.746508,-0.515908,0.001855,0.426752,1.175182,0.415367,-0.560101,1
4,-0.379289,-0.904491,-0.425612,0.117369,-1.369218,0.381671,0.379926,2.041094,-1.142475,0.195370,1
...,...,...,...,...,...,...,...,...,...,...,...
994,-1.278002,0.042174,-1.043923,-0.582927,2.087794,-0.382436,-2.192232,-1.673785,-1.408238,-0.422837,3
995,0.179333,-1.337413,1.824631,0.303217,-0.185231,1.024515,0.264696,0.954682,0.684744,0.555915,0
996,-0.625184,-0.823048,0.765353,2.195479,-0.746329,-1.012400,0.496052,0.398102,0.166193,-0.706404,0
997,-1.026824,-1.730290,0.849879,1.500112,-0.309153,0.737387,-0.409057,1.439680,-0.068949,0.326736,1


In [97]:
# Importando biblioteca pandas
import pandas as pd

# variavel endereço e nome do arquivo
ender_arquivo='dados_5.csv'
lista_cols=['Feature_0', 'Feature_1','Target']

# leitura dos dados
df = pd.read_csv(ender_arquivo, sep='\t', usecols=lista_cols)
df

Unnamed: 0,Feature_0,Feature_1,Target
0,-0.021182,0.012130,-5.194355
1,0.000074,0.019679,-3.376057
2,-0.045825,-0.033578,-0.459187
3,0.026060,-0.004648,4.352418
4,-0.036686,-0.026732,-0.668197
...,...,...,...
995,0.009579,-0.008893,0.831118
996,-0.033153,-0.050921,0.988730
997,-0.056353,-0.046244,2.941199
998,-0.032132,-0.063062,-3.273184


In [82]:
# Importando biblioteca pandas
import pandas as pd

# variavel endereço e nome do arquivo
ender_arquivo='dados_6.csv'

# leitura dos dados
df = pd.read_csv(ender_arquivo, sep='\t', header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,-1.626850,-2.675881,3.029605,-0.323830,0.190790,-2.559812,0.406867,0.202089,2.047027,-2.568138,0
1,0.124887,-1.025044,0.521990,-2.095166,0.221847,-1.693476,-0.426833,1.320412,1.138497,-0.102198,0
2,-0.541229,-2.368121,1.827388,-1.370912,1.680010,-2.452930,0.490978,1.023766,1.988620,-2.198035,0
3,-4.046177,4.052463,3.835139,-0.558254,-0.859638,1.935893,0.892909,-0.045699,-1.254822,2.212408,0
4,-2.791270,1.776555,2.142707,0.279566,1.242548,1.178050,-0.258540,1.273198,0.254972,1.165803,0
...,...,...,...,...,...,...,...,...,...,...,...
999995,1.841844,-1.081227,-0.601514,-1.041679,-1.380062,-0.929006,-0.597511,0.067266,0.017762,-2.544658,0
999996,-2.805988,1.737186,1.404542,1.806035,-0.436407,1.975983,-0.535935,0.175755,0.249403,1.371157,0
999997,-3.909082,1.589143,0.114744,4.023767,1.252311,3.577290,-1.188318,0.993558,2.153193,2.584481,0
999998,-2.581747,2.413372,2.464464,0.606348,0.679737,1.412132,0.311622,-1.438362,-1.169999,1.087373,0
