In [1]:
#importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go


Loading the dataset in to dataframe

In [2]:
df = pd.read_csv('../data/welldata.csv')
df.head()

Unnamed: 0,DEPT,CALI,SGR,CGR,THOR,URAN,POTA,ILD,ILM,SFLU,PEF,NPHI,RHOB
0,50.5,5.902,145.146,24.131,2.477,13.727,0.701,18.533,17.162,2000.0,3.938,0.098,2.12
1,51.0,5.895,150.785,28.006,2.884,13.927,0.811,19.029,19.305,1955.824,3.924,0.049,2.1
2,51.5,5.895,155.732,31.508,3.259,14.091,0.909,18.66,20.851,1955.826,3.861,0.049,2.084
3,52.0,5.895,154.033,30.359,3.149,14.028,0.874,17.425,21.394,1955.826,3.629,0.244,2.074
4,52.5,5.895,169.844,40.371,4.755,14.686,1.022,16.477,20.181,2000.001,3.658,3.467,2.064


Exploring data to see data quality if there are any nulls and what are the data types

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 840 entries, 0 to 839
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   DEPT    840 non-null    float64
 1   CALI    840 non-null    float64
 2   SGR     840 non-null    float64
 3   CGR     840 non-null    float64
 4   THOR    840 non-null    float64
 5   URAN    840 non-null    float64
 6   POTA    840 non-null    float64
 7   ILD     840 non-null    float64
 8   ILM     840 non-null    float64
 9   SFLU    840 non-null    float64
 10  PEF     840 non-null    float64
 11  NPHI    840 non-null    float64
 12  RHOB    840 non-null    float64
dtypes: float64(13)
memory usage: 85.4 KB


Creating standard line plots in python

In [17]:
#plot line graph for each column against DEPT
fig = sp.make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.02)
fig.add_trace(go.Scatter(x=df['DEPT'], y=df['CALI'], mode='lines', name='CALI'), row=1, col=1)
fig.add_trace(go.Scatter(x=df['DEPT'], y=df['NPHI'], mode='lines', name='NPHI'), row=2, col=1)
fig.add_trace(go.Scatter(x=df['DEPT'], y=df['RHOB'], mode='lines', name='RHOB'), row=3, col=1)
fig.add_trace(go.Scatter(x=df['DEPT'], y=df['PEF'], mode='lines', name='PEF'), row=4, col=1)
fig.update_layout(height=600, width=800, title_text="Well Data")
fig.show()


Create oil industry logs using Plotly

In [11]:
#selecting all columns other than the first columns which is DEPTH (DEPT)
logs = df.columns[1:]
logs

Index(['CALI', 'SGR', 'CGR', 'THOR', 'URAN', 'POTA', 'ILD', 'ILM', 'SFLU',
       'PEF', 'NPHI', 'RHOB'],
      dtype='object')

In [12]:
#making a subplot for each log
fig = sp.make_subplots(rows=1, cols=len(logs), subplot_titles=logs)


In [13]:
#adding each log to the subplots canvas we created in the above step.
#We are using a for loop to iterate through each log and add it to the subplot.
#We will be plotting line plot for each log against the depth of the well.

for i, log_column in enumerate(logs, start=1):
    trace = go.Scatter(x=df[log_column], y=df['DEPT'], mode='lines', name=log_column)
    fig.add_trace(trace, row=1, col=i)

In [15]:
# Update title and height of the main canvas as well as setting the yaxes to be reversed
fig.update_layout(title_text="Wireline Log", showlegend=False, height=500) 
fig.update_yaxes(autorange="reversed")

# Show the plot
fig.show()