# Well Log Analyzer

## 1.0 - SAM - Exploratory Data Analysis

Notebook intended to explore relevant features in the dataset.    
*Dataset:* 15/9-F-11 B  
*Provider:* Statoil Petroleum AS

### Data ingestion

In [1]:
#Importing required libraries
import pandas as pd
import matplotlib as mpl
import numpy as np

In [2]:
#Excel file uploading
relative_path_to_excel_file = '../data/raw/15_9-F-11 B.xlsx'
complete_excel_file = pd.read_excel(relative_path_to_excel_file)

In [3]:
#Extracting relevant data from file
metadata = complete_excel_file.iloc[0:63,:]

data_headers = pd.Series(["{} ({})".format(measurement,units) for measurement,units in zip(complete_excel_file.iloc[65], complete_excel_file.iloc[66])])

data = complete_excel_file.iloc[67:,:]

data.columns = data_headers
data.index = [i for i in range(data.shape[0])]



### Data Overview

In [4]:
#Verify dimensions of metadata
metadata.shape

(63, 20)

In [5]:
#Verify dimensions of data
data.shape

(45818, 20)

In [6]:
#Data overview
data.head(10)

Unnamed: 0,#Depth (#M),ABDCQF01 (g/cm3),ABDCQF02 (g/cm3),ABDCQF03 (g/cm3),ABDCQF04 (g/cm3),BS (inches),CALI (inches),DRHO (g/cm3),GR (API),NPHI (v/v),PEF (b/elec),RACEHM (ohm.m),RACELM (ohm.m),RD (ohm.m),RHOB (g/cm3),RM (ohm.m),ROP (m/hr),RPCEHM (ohm.m),RPCELM (ohm.m),RT (ohm.m)
0,188.5,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,4.3,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
1,188.6,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,3.801,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
2,188.7,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,4.301,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
3,188.8,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,4.6,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
4,188.9,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,4.599,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
5,189.0,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,4.299,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
6,189.1,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,3.801,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
7,189.2,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,4.101,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
8,189.3,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,4.302,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
9,189.4,-999.25,-999.25,-999.25,-999.25,36,-999.25,-999.25,5.201,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25


In [7]:
#Data description
data.describe()

Unnamed: 0,#Depth (#M),ABDCQF01 (g/cm3),ABDCQF02 (g/cm3),ABDCQF03 (g/cm3),ABDCQF04 (g/cm3),BS (inches),CALI (inches),DRHO (g/cm3),GR (API),NPHI (v/v),PEF (b/elec),RACEHM (ohm.m),RACELM (ohm.m),RD (ohm.m),RHOB (g/cm3),RM (ohm.m),ROP (m/hr),RPCEHM (ohm.m),RPCELM (ohm.m),RT (ohm.m)
count,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0,45818.0
unique,45818.0,953.0,869.0,960.0,866.0,10.0,401.0,178.0,36145.0,403.0,4476.0,11794.0,10674.0,11108.0,840.0,12644.0,18407.0,12307.0,10765.0,12644.0
top,3888.1,-999.25,-999.25,-999.25,-999.25,8.5,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25,-999.25
freq,1.0,30623.0,30623.0,30623.0,30623.0,15843.0,30275.0,30280.0,147.0,30352.0,30326.0,24043.0,24043.0,961.0,30326.0,744.0,407.0,24043.0,24260.0,744.0


In [8]:
#Check for existing data types
data.dtypes

#Depth (#M)         object
ABDCQF01 (g/cm3)    object
ABDCQF02 (g/cm3)    object
ABDCQF03 (g/cm3)    object
ABDCQF04 (g/cm3)    object
BS (inches)         object
CALI (inches)       object
DRHO (g/cm3)        object
GR (API)            object
NPHI (v/v)          object
PEF (b/elec)        object
RACEHM (ohm.m)      object
RACELM (ohm.m)      object
RD (ohm.m)          object
RHOB (g/cm3)        object
RM (ohm.m)          object
ROP (m/hr)          object
RPCEHM (ohm.m)      object
RPCELM (ohm.m)      object
RT (ohm.m)          object
dtype: object

### Data Preparation

In [12]:
#Check for individual data type randomly
type(data.loc[5, 'GR (API)'])

float

In [13]:
#As it was informed that NaN would come as -999.25, it is good to state it in the DataFrame.
data.replace(-999.25, np.nan) 

Unnamed: 0,#Depth (#M),ABDCQF01 (g/cm3),ABDCQF02 (g/cm3),ABDCQF03 (g/cm3),ABDCQF04 (g/cm3),BS (inches),CALI (inches),DRHO (g/cm3),GR (API),NPHI (v/v),PEF (b/elec),RACEHM (ohm.m),RACELM (ohm.m),RD (ohm.m),RHOB (g/cm3),RM (ohm.m),ROP (m/hr),RPCEHM (ohm.m),RPCELM (ohm.m),RT (ohm.m)
0,188.5,,,,,36.0,,,4.300,,,,,,,,,,,
1,188.6,,,,,36.0,,,3.801,,,,,,,,,,,
2,188.7,,,,,36.0,,,4.301,,,,,,,,,,,
3,188.8,,,,,36.0,,,4.600,,,,,,,,,,,
4,188.9,,,,,36.0,,,4.599,,,,,,,,,,,
5,189.0,,,,,36.0,,,4.299,,,,,,,,,,,
6,189.1,,,,,36.0,,,3.801,,,,,,,,,,,
7,189.2,,,,,36.0,,,4.101,,,,,,,,,,,
8,189.3,,,,,36.0,,,4.302,,,,,,,,,,,
9,189.4,,,,,36.0,,,5.201,,,,,,,,,,,
