In [1]:
import pandas as pd
import numpy as np
from os.path import join, dirname
from os import getcwd

In [2]:
path = dirname(getcwd())
path = join(path, "data")

In [3]:
data = pd.read_csv(join(path, "completion_short.csv")).drop('Unnamed: 0', axis=1)

In [4]:
X = data[["easting", "northing"]]
y = data.drop(["easting", "northing"], axis=1)

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
train_data = [y_train[x] for x in y_train.columns]
test_data = [y_test[x] for x in y_test.columns]

In [7]:
def create_forest():
    return RandomForestRegressor()
def create_bagging():
    return BaggingRegressor()

def create_boosting():
    return GradientBoostingRegressor()

def create_perceptron():
    return MLPRegressor()

In [8]:
models = [create_forest, create_bagging, create_boosting, create_perceptron]

In [9]:
data_sets = []

In [10]:
def print_scores(model, train_x, train_y, test_x, test_y, output=True):
    train_score = model.score(train_x, train_y)
    test_score = model.score(test_x, test_y)
    if output:
        print("Training: {}".format(train_score))
        print("Testing: {}".format(test_score))
    return train_score, test_score

In [11]:
good_models = []
for i in range(len(train_data)):
    max_score = -99999999
    print("---------")
    print(y.columns[i], "\n")

    for model in models:
        current_model = model().fit(X_train, train_data[i])
        print(current_model)
        (train, test) = print_scores(current_model, X_train, train_data[i], X_test, test_data[i])
        if test > max_score:
            best_model = current_model
            max_score = test
        print("\n")
    good_models.append(best_model)
    print("---------")

---------
porosity 

RandomForestRegressor()
Training: 0.9489192219266385
Testing: 0.6221910555865455


BaggingRegressor()
Training: 0.9367188784034076
Testing: 0.6035065948322278


GradientBoostingRegressor()
Training: 0.7551861246777627
Testing: 0.7004084223097987






MLPRegressor()
Training: -705848.7115029792
Testing: -694352.6681932248


---------
---------
permeability 

RandomForestRegressor()
Training: 0.945286402599563
Testing: 0.5559854060051777


BaggingRegressor()
Training: 0.9241332567520356
Testing: 0.5275375158224687


GradientBoostingRegressor()
Training: 0.731453438863709
Testing: 0.6436036488974213






MLPRegressor()
Training: -1533786.786431807
Testing: -1521687.6237978388


---------
---------
Poisson's ratio 

RandomForestRegressor()
Training: 0.999838024725761
Testing: 0.9980435390762231


BaggingRegressor()
Training: 0.9997430173257394
Testing: 0.9978135277091353


GradientBoostingRegressor()
Training: 0.9581262703513427
Testing: 0.9503956318167007






MLPRegressor()
Training: -80569.54380969769
Testing: -71838.09269722679


---------
---------
Young's Modulus 

RandomForestRegressor()
Training: 0.9998413809776404
Testing: 0.9994601929006112


BaggingRegressor()
Training: 0.9997103126042143
Testing: 0.9992684850295878


GradientBoostingRegressor()
Training: 0.9637357975496214
Testing: 0.9612351862867387






MLPRegressor()
Training: -8.938721332967697
Testing: -9.012204803405035


---------
---------
water saturation 

RandomForestRegressor()
Training: 0.9998549954698919
Testing: 0.9994763124461191


BaggingRegressor()
Training: 0.9997684056223696
Testing: 0.9993155950830268


GradientBoostingRegressor()
Training: 0.9751798151577556
Testing: 0.9699959527897701






MLPRegressor()
Training: -12341.967136169254
Testing: -10549.610299036874


---------
---------
oil saturation 

RandomForestRegressor()
Training: 0.9998584553225245
Testing: 0.999473712573045


BaggingRegressor()
Training: 0.9997641842038887
Testing: 0.9990657491826738


GradientBoostingRegressor()
Training: 0.9751798151577556
Testing: 0.9699959527897702






MLPRegressor()
Training: -17721.16036904177
Testing: -18247.720627271963


---------
---------
proppant weight (lbs) 

RandomForestRegressor()
Training: 0.8291927412657754
Testing: -0.254447876696813


BaggingRegressor()
Training: 0.7803300351226953
Testing: -0.3237151828203648


GradientBoostingRegressor()
Training: 0.11372789949066253
Testing: -0.005266183881839748


MLPRegressor()
Training: -0.4883122158848805
Testing: -0.4769346965556669


---------
---------
pump rate (cubic feet/min) 

RandomForestRegressor()
Training: 0.8240576414056061
Testing: -0.26170230385920834


BaggingRegressor()
Training: 0.7737960294832823
Testing: -0.324042719088818


GradientBoostingRegressor()
Training: 0.11124123393215968
Testing: -0.01487054738526683


MLPRegressor()
Training: -3.7432680296019853
Testing: -4.030900373908632


---------




# for i in range(len(train_data)):
    current_model = good_models[i]
    train, test = print_scores(current_model, X_train, train_data[i], X_test, test_data[i], output=False)
    print("Feature: {}, model: {}".format(train_data[i].name, current_model))
    print("train score: {}, test score: {}".format(train, test), end="\n\n")