### Import the required libraries 

In [None]:
import odbind as odb
from odbind.survey import Survey
from odbind.well import Well
import pandas as pd
import numpy as np


In [None]:
%load_ext autoreload
%autoreload 2

### Load the survey from Opendtect

In [None]:
sdata = Survey("UnderGrad_Proj")

### Well Data

In [None]:
wells = Well.names(sdata)

In [None]:
wells

#### Well Logs informations

In [None]:
well1 = Well(sdata,wells[0])


well1.log_info_dataframe()

In [None]:
well1.track_dataframe()

#### Load all the well logs into a panda dataframe

In [None]:
well_list = []

for well in wells:
    EB= Well(sdata, well)
    df = EB.logs_dataframe()[0]
    df['WELL'] = well
    well_list.append(df)    

In [None]:
well_df = pd.concat(well_list)

In [None]:
well_df.T

#### Names of all the available columns in the df

In [None]:
hd = list(well_df.columns)

#### select columns to be used in the example

In [None]:
sel_hd = ['dah','PEM2010_INPUT_DTC_ISO_1',
 'PEM2010_INPUT_NP_1',
 'PEM2010_INPUT_RHOB_1',
 'PEM2010_INPUT_RT_1',
 'PEM2010_INPUT_SWE_1',
 'PEM2010_INPUT_SWT_1',
 'PEM2010_INPUT_VCL_1'
       ,'WELL'  ]

In [None]:
df = well_df[sel_hd]


In [None]:
df.head()

In [None]:
df.tail()

### Exploration Data Analysis

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
import seaborn as sn

In [None]:
sn.pairplot(df.dropna().reset_index().drop(columns='dah'),hue='WELL' )

#### Automatic EDA using ydata-profiling

In [None]:
from ydata_profiling.profile_report import ProfileReport
from ydata_profiling.compare_reports import compare

In [None]:
ProfileReport(df)

#### drop null values 

In [None]:
df_nan = df.dropna()

In [None]:
pr1 = ProfileReport(df)

In [None]:
pr2 = ProfileReport(df_nan)

In [None]:
pr1.compare(pr2)

#### Remove outliers using PYOD

In [None]:
from pyod.models import lof
from pyod.utils.ut

In [None]:
lf = lof.LOF(contamination=0.01)

In [None]:
lf.fit(df_nan)

In [None]:
df_nan_an = df_nan.copy()

In [None]:
df_nan_an['anomaly'] = lf.predict(df_nan)
df_nan_an

In [None]:
fo2_1.put_log('Double GR', somelogs['dah']['m'].to_numpy(),\
              somelogs['Gamma Ray']['API'].to_numpy()*2,'API','GR',True]

In [None]:
df_nan_an["scores"] = lf.decision_scores_
df_nan_an

In [None]:
import matplotlib.pyplot as plt

In [None]:
df_nan_an["anomaly"].value_counts()

In [None]:
inlier = df_nan[df_nan_an['anomaly']==0]
outlier = df_nan[df_nan_an['anomaly']==1]

In [None]:
inlier

In [None]:
outlier = outlier[["PEM2010_INPUT_DTC_ISO_1"]].sort_index()

In [None]:
inlier["PEM2010_INPUT_DTC_ISO_1"].sort_index().plot()
plt.scatter(outlier.index,outlier["PEM2010_INPUT_DTC_ISO_1"],c='red' )

In [None]:
df_inliers = inlier.copy()

#### Features and targets "in this examples we use SWE as the targets"

In [None]:
df_inliers.columns

In [None]:
df_inliers['RT_log'] = df_inliers[['PEM2010_INPUT_RT_1']].apply(np.log)

In [None]:
df_inliers['RT_log'].sort_index().plot()

### Prepare data for Model Building

In [None]:
Xdata = df_inliers.drop(columns=["PEM2010_INPUT_SWT_1","PEM2010_INPUT_SWE_1","dah"])
ydata = df_inliers[["PEM2010_INPUT_SWE_1"]]

#### split to training and test data

In [None]:
from sklearn.model_selection import train_test_split

xtrain,xtest,ytrain, ytest = train_test_split(Xdata,ydata, test_size=0.25)


In [None]:
xtrain