In [2]:
# package imports

import pandas as pd
from pandas import DataFrame
from datetime import date, time, datetime
import numpy as np
import gc
import time
import os
import math

In [3]:
cols = ['line_no',
        'line_name',
        'UKOOA',
        'datestring',
        'timestring',
        'shot_num',
        'latitude_dmd',
        'longitude_dmd',
        'depth_m',
        'shots_per_line',
        'line_length_km',
        'line_bearing_deg',
        'line_spacing_m',
        'Unnamed13',
        'Unnamed14']

lines = pd.read_excel('./OriginalData/KR16-05_MCS_Line_List_160503_0800.xlsx', names=cols)

## Formatting

In [4]:
# to get ride of the FGSP, LGSP, LSP and 'total' rows that are otherwise null.
lines.drop([0,1,2,99,100], axis = 0, inplace=True)
# reset the indexing
lines.reset_index(drop=True, inplace=True)
# forward fill the survey wide data down the columns
lines.fillna(method='ffill', axis = 0, inplace = True)
lines['shot_num'] = pd.to_numeric(lines['shot_num'])

In [5]:
# to remove the text and change the shot spacing to a number
def distancetidy(row):
    s = row['line_spacing_m']
    s = s.replace('Distance\n(','')
    s = s.replace('m)','')
    sfloat = float(s)
    return sfloat
    
lines['line_spacing_m'] = lines.apply(lambda row: distancetidy(row), axis=1)

In [6]:
# combining the date and time fields into a single field
def datetime(row):
    result = ''
    result = result + str(row['datestring'].year)
    result = result + '-' + str(row['datestring'].month)
    result = result + '-' + str(row['datestring'].day)
    result = result + ' ' + str(row['timestring'].hour)
    result = result + ':' + str(row['timestring'].minute)
    result = result + ':' + str(row['timestring'].second)
    return result

lines['datetime'] = lines.apply(lambda row: datetime(row), axis = 1)
lines['datetime'] = pd.to_datetime(lines['datetime'])

In [7]:
# formatting the lat and long strings into useable decimal degrees numbers
def LatDeg(row):
    latArr = row['latitude_dmd'].split('_')
    if latArr[1][-1] == 'S':
        result = int(latArr[0]) * -1
    else:
        result = int(latArr[0])
    return result

def LatMin(row):
    latArr = row['latitude_dmd'].split('_')
    result = float(latArr[1][0:-2])
    return result

def LongDeg(row):
    longArr = row['longitude_dmd'].split('_')
    if longArr[1][-1] == 'W':
        result = int(longArr[0]) * -1
    else:
        result = int(longArr[0])
    return result   

def LongMin(row):
    longArr = row['longitude_dmd'].split('_')
    result = float(longArr[1][0:-2])
    return result 

def Lat_dd(row):
    sign = 1
    if row['lat_deg'] < 0:
        sign = -1
    result = sign * (abs(row['lat_deg']) + (row['lat_min']/60))
    return result

def Long_dd(row):
    sign = 1
    if row['long_deg'] < 0:
        sign = -1
    result = sign * (abs(row['long_deg']) + (row['long_min']/60))
    return result

lines['lat_deg'] = lines.apply(lambda row: LatDeg(row), axis = 1)
lines['lat_min'] = lines.apply(lambda row: LatMin(row), axis = 1)
lines['long_deg'] = lines.apply(lambda row: LongDeg(row), axis = 1)
lines['long_min'] = lines.apply(lambda row: LongMin(row), axis = 1)
lines['lat_dd'] = lines.apply(lambda row: Lat_dd(row), axis = 1)
lines['long_dd'] = lines.apply(lambda row: Long_dd(row), axis = 1)

In [8]:
lines

Unnamed: 0,line_no,line_name,UKOOA,datestring,timestring,shot_num,latitude_dmd,longitude_dmd,depth_m,shots_per_line,...,line_spacing_m,Unnamed13,Unnamed14,datetime,lat_deg,lat_min,long_deg,long_min,lat_dd,long_dd
0,1.0,EWobs_0,EWobs_0.0.p190\nEWobs_0.0.p291,2016-04-03,06:58:41,881,27_15.43969'S,155_41.02666'E,4782.0,2766,...,200.0,,11249.0,2016-04-03 06:58:41,-27,15.43969,155,41.02666,-27.257328,155.683778
1,1.0,EWobs_0,EWobs_0.0.p190\nEWobs_0.0.p291,2016-04-03,07:37:05,989,27_15.57921'S,155_44.29178'E,4782.0,2766,...,200.0,,11249.0,2016-04-03 07:37:05,-27,15.57921,155,44.29178,-27.259653,155.738196
2,1.0,EWobs_0,EWobs_0.0.p190\nEWobs_0.0.p291,2016-04-06,04:17:21,12049,27_23.06125'S,161_19.48603'E,1717.0,2766,...,200.0,,11249.0,2016-04-06 04:17:21,-27,23.06125,161,19.48603,-27.384354,161.324767
3,1.0,EWobs_0,EWobs_0.0.p190\nEWobs_0.0.p291,2016-04-06,04:46:30,12129,27_23.07089'S,161_21.91162'E,1687.0,(+4),...,200.0,,11249.0,2016-04-06 04:46:30,-27,23.07089,161,21.91162,-27.384515,161.365194
4,2.0,EWobs_1,EWobs_1.0.p190\nEWobs_1.0.p291,2016-04-07,00:09:44,11241,27_22.96810'S,160_54.98404'E,1919.0,798,...,200.0,,3289.0,2016-04-07 00:09:44,-27,22.96810,160,54.98404,-27.382802,160.916401
5,2.0,EWobs_1,EWobs_1.0.p190\nEWobs_1.0.p291,2016-04-07,00:46:01,11341,27_22.98474'S,160_58.01667'E,1877.0,798,...,200.0,,3289.0,2016-04-07 00:46:01,-27,22.98474,160,58.01667,-27.383079,160.966945
6,2.0,EWobs_1,EWobs_1.0.p190\nEWobs_1.0.p291,2016-04-07,19:40:50,14529,27_22.90896'S,162_34.65093'E,1360.0,798,...,200.0,,3289.0,2016-04-07 19:40:50,-27,22.90896,162,34.65093,-27.381816,162.577516
7,2.0,EWobs_1,EWobs_1.0.p190\nEWobs_1.0.p291,2016-04-07,19:40:50,14529,27_22.90896'S,162_34.65093'E,1360.0,(+4),...,200.0,,3289.0,2016-04-07 19:40:50,-27,22.90896,162,34.65093,-27.381816,162.577516
8,3.0,EWobs_2,EWobs_2.0.p190\nEWobs_2.0.p291,2016-04-07,23:40:50,14407,27_23.35776'S,162_30.75251'E,1387.0,402,...,200.0,,1701.0,2016-04-07 23:40:50,-27,23.35776,162,30.75251,-27.389296,162.512542
9,3.0,EWobs_2,EWobs_2.0.p190\nEWobs_2.0.p291,2016-04-08,00:23:24,14311,27_22.94812'S,162_27.84082'E,1410.0,402,...,200.0,,1701.0,2016-04-08 00:23:24,-27,22.94812,162,27.84082,-27.382469,162.464014


In [11]:
# defining which tyep of shot point each row corresponds to
# fsp = first shot point
# fgsp = first good shot point
# lgsp = last good shot point
# lsp = last shot point
def shot_desc(row):
    if row.name % 4 == 0:
        return 'FSP'
    elif row.name % 4 == 1:
        return 'FGSP'
    elif row.name % 4 == 2:
        return 'LGSP'
    else:
        return 'LSP'

lines['shot_desc'] = lines.apply(lambda row: shot_desc(row), axis = 1)

# dropping the fsp and lsp rows
fgsp = lines[lines['shot_desc'] == 'FGSP']
fgsp.set_index('line_no', inplace = True, verify_integrity=True)
lgsp = lines[lines['shot_desc'] == 'LGSP']
lgsp = lgsp[['line_no','datetime','shot_num','lat_dd','long_dd','depth_m','shot_desc']]
# dropping the duplicate columns
new_cols = [['line_no','l_datetime','l_shot_num','l_lat_dd','l_long_dd','l_depth_m','l_shot_desc']] 
lgsp.columns = new_cols
lgsp.set_index('line_no', inplace = True, verify_integrity=True)

# joining the lgsp shot data onto the fgsp row (both shot and survey data)
goodLines = pd.concat([fgsp,lgsp], axis = 1, join = 'inner', copy=False)

In [12]:
final = goodLines[['line_name',
                'UKOOA',
                'shots_per_line',
                'line_length_km',
                'line_bearing_deg',
                'line_spacing_m',
                'shot_desc',              
                'shot_num',
                'datetime',
                'lat_dd',
                'long_dd',
                'depth_m',
                'l_shot_desc',              
                'l_shot_num',
                'l_datetime',
                'l_lat_dd',
                'l_long_dd',
                'l_depth_m']].copy()

# fixing the names
# f_ prefix for data specific to fgsp
# l_ prefix for data specific to lgsp
final_cols = ['line_name',
               'UKOOA',
               'shots_per_line',
               'line_length_km',
               'line_bearing_deg',
               'line_spacing_m',
               'f_shot_desc',              
               'f_shot_num',
               'f_datetime',
               'f_lat_dd',
               'f_long_dd',
               'f_depth_m',
               'l_shot_desc',              
               'l_shot_num',
               'l_datetime',
               'l_lat_dd',
               'l_long_dd',
               'l_depth_m']
final.columns = final_cols
# dropping duplicate entry for survey line D2A-Line2_0
final.duplicated(keep=False)
final.drop_duplicates(keep='first', inplace=True)
final

Unnamed: 0_level_0,line_name,UKOOA,shots_per_line,line_length_km,line_bearing_deg,line_spacing_m,f_shot_desc,f_shot_num,f_datetime,f_lat_dd,f_long_dd,f_depth_m,l_shot_desc,l_shot_num,l_datetime,l_lat_dd,l_long_dd,l_depth_m
line_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1.0,EWobs_0,EWobs_0.0.p190\nEWobs_0.0.p291,2766,553.0,91.223,200.0,FGSP,989,2016-04-03 07:37:05,-27.259653,155.738196,4782.0,LGSP,12049,2016-04-06 04:17:21,-27.384354,161.324767,1717.0
2.0,EWobs_1,EWobs_1.0.p190\nEWobs_1.0.p291,798,159.4,91.223,200.0,FGSP,11341,2016-04-07 00:46:01,-27.383079,160.966945,1877.0,LGSP,14529,2016-04-07 19:40:50,-27.381816,162.577516,1360.0
3.0,EWobs_2,EWobs_2.0.p190\nEWobs_2.0.p291,402,80.2,271.223,200.0,FGSP,14311,2016-04-08 00:23:24,-27.382469,162.464014,1410.0,LGSP,12707,2016-04-08 10:47:29,-27.384835,161.653821,1537.0
4.0,D3A-Line2_0,D3A-Line2_0.0.p190\nD3A-Line2_0.0.p291,587,29.3,120.358,50.0,FGSP,936,2016-04-08 21:30:46,-27.318728,161.530517,1463.0,LGSP,1522,2016-04-09 00:47:40,-27.447439,161.789111,1527.0
5.0,D3A-Line7_0,D3A-Line7_0.0.p190\nD3A-Line7_0.0.p291,588,29.35,209.317,50.0,FGSP,1468,2016-04-09 06:28:43,-27.280966,161.727907,1474.0,LGSP,881,2016-04-09 09:54:03,-27.512145,161.583347,1568.0
6.0,D3A-Line4_0,D3A-Line4_0.0.p190\nD3A-Line4_0.0.p291,589,29.4,120.357,50.0,FGSP,934,2016-04-09 15:30:07,-27.342302,161.51461,1500.0,LGSP,1522,2016-04-09 18:39:01,-27.471469,161.774132,1575.0
7.0,D3A-Line5_0,D3A-Line5_0.0.p190\nD3A-Line5_0.0.p291,106,5.25,210.361,50.0,FGSP,1468,2016-04-10 00:40:14,-27.270951,161.707774,1442.0,LGSP,1363,2016-04-10 01:19:40,-27.312573,161.682499,1538.0
8.0,D3A-Line5_1,D3A-Line5_1.0.p190\nD3A-Line5_1.0.p291,586,29.25,30.361,50.0,FGSP,936,2016-04-10 07:29:42,-27.478053,161.578992,1593.0,LGSP,1521,2016-04-10 10:42:15,-27.247154,161.721979,1453.0
9.0,D1B-Line08_0,D1B-Line08_0.1.p190\nD1B-Line08_0.1.p291,629,31.4,18.875,50.0,FGSP,934,2016-04-10 23:35:08,-26.546036,160.93497,1599.0,LGSP,1562,2016-04-11 03:48:18,-26.276487,161.032104,1651.0
10.0,D1B-Line09_0,D1B-Line09_0.0.p190\nD1B-Line09_0.0.p291,253,12.6,260.538,50.0,FGSP,1371,2016-04-11 07:43:59,-26.384179,161.050762,1621.0,LGSP,1119,2016-04-11 09:14:37,-26.404665,160.926562,1669.0


In [25]:
final.to_csv('SeismicLineDetails.csv')