In [1]:
import pandas as pd
import numpy as np
from os.path import join, dirname
from os import getcwd

In [2]:
path = dirname(getcwd())
path = join(path, "data")

In [3]:
data = pd.read_csv(join(path, "completion_short.csv")).drop(['Unnamed: 0', 'pump rate (cubic feet/min)','proppant weight (lbs)'], axis=1)

In [4]:
X = data[["easting", "northing"]]
y = data.drop(["easting", "northing"], axis=1)

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
train_data = [y_train[x] for x in y_train.columns]
test_data = [y_test[x] for x in y_test.columns]

In [7]:
def create_forest():
    return RandomForestRegressor()
def create_bagging():
    return BaggingRegressor()

def create_boosting():
    return GradientBoostingRegressor()

def create_perceptron():
    return MLPRegressor()

In [8]:
models = [create_forest, create_bagging, create_boosting]

In [9]:
data_sets = []

In [10]:
def print_scores(model, train_x, train_y, test_x, test_y, output=True):
    train_score = model.score(train_x, train_y)
    test_score = model.score(test_x, test_y)
    if output:
        print("Training: {}".format(train_score))
        print("Testing: {}".format(test_score))
    return train_score, test_score

In [11]:
good_models = []
for i in range(len(train_data)):
    max_score = -99999999
    print("---------")
    print(y.columns[i], "\n")

    for model in models:
        current_model = model().fit(X_train, train_data[i])
        print(current_model)
        (train, test) = print_scores(current_model, X_train, train_data[i], X_test, test_data[i])
        if test > max_score:
            best_model = current_model
            max_score = test
        print("\n")
    good_models.append(best_model)
    print("---------")

---------
porosity 

RandomForestRegressor()
Training: 0.9488948731611883
Testing: 0.6303648899082275


BaggingRegressor()
Training: 0.9352850771871193
Testing: 0.612226562121716


GradientBoostingRegressor()
Training: 0.7492256250043323
Testing: 0.7153342215093992


---------
---------
permeability 

RandomForestRegressor()
Training: 0.9441769581889431
Testing: 0.5801090969958762


BaggingRegressor()
Training: 0.927697132592809
Testing: 0.5511181958305351


GradientBoostingRegressor()
Training: 0.7174068888429944
Testing: 0.6803352040755686


---------
---------
Poisson's ratio 

RandomForestRegressor()
Training: 0.9997769097754128
Testing: 0.999167657952816


BaggingRegressor()
Training: 0.9996810241950863
Testing: 0.999039183531348


GradientBoostingRegressor()
Training: 0.9623968012322783
Testing: 0.9604242069302834


---------
---------
Young's Modulus 

RandomForestRegressor()
Training: 0.9998252734699052
Testing: 0.9992613688557564


BaggingRegressor()
Training: 0.99980738367502

In [12]:
for i in range(len(train_data)):
    current_model = good_models[i]
    train, test = print_scores(current_model, X_train, train_data[i], X_test, test_data[i], output=False)
    print("{}, model: {}".format(train_data[i].name, current_model))
    print("train score: {}, test score: {}".format(train, test), end="\n\n")

porosity, model: GradientBoostingRegressor()
train score: 0.7492256250043323, test score: 0.7153342215093992

permeability, model: GradientBoostingRegressor()
train score: 0.7174068888429944, test score: 0.6803352040755686

Poisson's ratio, model: RandomForestRegressor()
train score: 0.9997769097754128, test score: 0.999167657952816

Young's Modulus, model: RandomForestRegressor()
train score: 0.9998252734699052, test score: 0.9992613688557564

water saturation, model: RandomForestRegressor()
train score: 0.9998959518749305, test score: 0.999432678158618

oil saturation, model: RandomForestRegressor()
train score: 0.9999036577729523, test score: 0.9994117735962884



In [13]:
from joblib import dump

In [14]:
for i in range(len(train_data)):
    model_path = join(getcwd(), 'models')
    dump(good_models[i], join(model_path, train_data[i].name + '.model'))