In [65]:
import timeit
start_time = timeit.default_timer()

num_cases = 10000

beta      = get_random(0,90,num_cases)
TVD       = get_random(2900,3300,num_cases)
FG        = get_random(0.014,0.022,num_cases)
k         = get_random(0.88,1,num_cases)
OverPress = get_random(1.4,1.8,num_cases)
miu       = get_random(0.4,0.8,num_cases)
P_net     = get_random(1,5,num_cases)
 

S2 = TVD*FG;     S1 = S2/k# KPa/m
P_pore = TVD*0.0098*OverPress # KPa/m

half_length = 10;      
y_wide = 5             
x_wide = 3
delta = 0.01;  

area = np.empty_like(beta)
for i in range(num_cases):
    RE = FractureStress(half_length,S1[i],S2[i],beta[i],P_net[i],miu[i],P_pore[i],delta,x_wide,y_wide)
    area[i] = RE.area

HFNF_PSZ_area = pd.DataFrame({'S1':S1,'S2':S2,'beta':beta,'P_net':P_net,'miu':miu,'P_pore':P_pore,'Area':area})

HFNF_PSZ_area.to_csv(path_or_buf='HFNF_PSZ_area_RESULTS.csv')

print('Computing Time = %.2f sec'%(timeit.default_timer() - start_time))

KeyboardInterrupt: 

In [5]:
import math                                                 # basic calculations like square root
import numpy as np                                          # arrays and matrix math
import scipy.stats as st                                    # statistical methods

import seaborn as sns

import pandas.plotting as pd_plot

from sklearn import metrics                                 # measures to check our models
from sklearn.model_selection import cross_val_score         # cross validation methods
from sklearn.model_selection import train_test_split        # train and test split
from sklearn.ensemble import BaggingRegressor   
from sklearn.ensemble import RandomForestRegressor          # random forest method

In [None]:
import warnings
warnings.filterwarnings('ignore')

max_dep = 40                                  
max_depths = np.linspace(1,max_dep,max_dep)  

num_trees = 100
fit_forests = []; oob_mse = []; oob_score = []; 

index = 1
for max_depth in max_depths:                                  
    fit_forests.append(RandomForestRegressor(max_depth=max_depth,random_state=seed,n_estimators=int(num_tree),max_features = max_features,n_jobs=-1))
    fit_forests[index-1].fit(X = x_train, y = y_train)
    oob_mse.append( metrics.mean_squared_error(y_test,fit_forests[index-1].predict(x_test)) )
    oob_score.append( metrics.explained_variance_score(y_test,fit_forests[index-1].predict(x_test)) )
    index = index + 1
    
plt.subplot(121)
plt.title('Testing MSE vs. Maximum Depth'); 
plt.xlabel('Maximum Tree Depth')
plt.ylabel('Testing MSE')
plt.plot(max_depths,oob_mse)
plt.xlim(0,max_dep); plt.grid(linestyle=':')

plt.subplot(122)
plt.title('Out of Bag Score vs. Maximum Depth'); 
plt.xlabel('Maximum Tree Depth')
plt.ylabel('Testing Variance Explained')
plt.plot(max_depths,oob_score)
plt.xlim(0,max_dep); plt.grid(linestyle=':')

plt.subplots_adjust(left=0.0, bottom=0.0, right=2, top=0.8, wspace=0.22, hspace=0)
plt.savefig("RandFore_Oil_03_HP_MaxDepth.png", dpi=300,bbox_inches='tight')

In [None]:
num_tree = 100
max_depth = 20

regressor = RandomForestRegressor(max_depth=max_depth, random_state=seed, n_estimators=num_tree,max_features = max_features,n_jobs=-1)
regressor.fit(X = x_train, y = y_train)      

plt.subplot(131)
y_pred = regressor.predict(x_train)
plt.scatter(y_train[PARA_3],y_pred,s=None, c='red',marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=0.2, linewidths=0.3, verts=None, edgecolors="black")
plt.xlabel('Actual Oil EUR'); plt.ylabel('Estimated Oil EUR')
plt.xlim(0,5e5); plt.ylim(0,5e5)
plt.arrow(0,0,6e5,6e5,width=0.02,color='black',head_length=0.0,head_width=0.0)
MSE = metrics.mean_squared_error(y_train[PARA_3],y_pred)
Var_Explained = metrics.explained_variance_score(y_train[PARA_3],y_pred)
cor = math.sqrt(metrics.r2_score(y_train[PARA_3],y_pred))
plt.title('Model Check with Training Data \nMSE = %.2f, VE = %.2f, R^2 = %.2f' %(round(MSE,2),round(Var_Explained,2), round(cor,2)),fontsize=12)
plt.ticklabel_format(style='sci', scilimits=(0,0))

plt.subplot(132)
feature_imp = pd.Series(regressor.feature_importances_,index=Data_Subset.columns[0:18]).sort_values(ascending=False)
sns.barplot(x=feature_imp, y=feature_imp.index)
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")

plt.subplot(133)
y_pred = regressor.predict(x_test)
plt.scatter(y_test[PARA_3],y_pred,s=None, c='red',marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=0.2, linewidths=0.3, verts=None, edgecolors="black")
plt.xlabel('Actual Oil EUR'); plt.ylabel('Estimated Oil EUR')
plt.xlim(0,5e5); plt.ylim(0,5e5)
plt.arrow(0,0,6e5,6e5,width=0.02,color='black',head_length=0.0,head_width=0.0)
MSE = metrics.mean_squared_error(y_test[PARA_3],y_pred)
Var_Explained = metrics.explained_variance_score(y_test[PARA_3],y_pred)
cor = math.sqrt(metrics.r2_score(y_test[PARA_3],y_pred))
plt.title('Model Check with Testing Data \nMSE = %.2f, VE = %.2f, R^2 = %.2f' %(round(MSE,2),round(Var_Explained,2), round(cor,2)),fontsize=12)
plt.ticklabel_format(style='sci', scilimits=(0,0))

plt.subplots_adjust(left=0.0, bottom=0.0, right=2.6, top=1, wspace=0.25, hspace=0)
plt.savefig("RandFore_Oil_04_Final_Result.png", dpi=300,bbox_inches='tight')