In [None]:
# calculate statistics for the groups:
# 1. number of faults in a group
# 2. average dip within fault group
# 3. corridor width => sqrt((x2-x1)^2 +(y2-y1)^2) => 
# mean between the first and last fault nodes
# 4. average offset between faults in a group 

In [1]:
import pandas as pd

from shapely.geometry import Point
import geopandas as gpd

import numpy as np
import os

from scipy.spatial import distance

os.chdir('/home/paulina/Desktop/NRCan/scripts/data/')
faults = pd.read_csv('grouped_faults_kmean17.csv')

In [2]:
# compute approximated depth using v = 1500m/s
faults['depth'] = faults['time_s']*1500

Number of faults in a group can be determined by by calculating the number of non unique stick_idx numbers for each cluster label

In [3]:
count_faults = faults.groupby(['label'])
count_faults.agg(fault_number = ('stick_idx', pd.Series.nunique))

Unnamed: 0_level_0,fault_number
label,Unnamed: 1_level_1
0,6
1,10
2,2
3,6
4,7
5,6
6,2
7,3
8,2
9,5


Average dip within fault group
we need to access info about the first and last node of each fault stick. Code bellow will help us to do so:


In [4]:
faults_grouped = faults.pivot_table(index=['label','stick_idx','node_idx'],
                                   values=['X_nad27','Y_nad27', 'depth'])

In [5]:
# get indexes of label + fault stick (for looping later)
idx_list = faults.pivot_table(index=['label', 'stick_idx']).index 
faults_grouped.loc[idx_list[0]]['X_nad27'].tolist()


[480071.21875, 479695.46875, 479403.140625]

In [6]:
faults_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,X_nad27,Y_nad27,depth
label,stick_idx,node_idx,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,24,0,480071.218750,6.184824e+06,1866.492026
0,24,1,479695.468750,6.184826e+06,2532.322793
0,24,2,479403.140625,6.184827e+06,2823.054602
0,41,0,488963.039307,6.184790e+06,1927.371280
0,41,1,488822.921875,6.184790e+06,2380.524880
...,...,...,...,...,...
13,30,0,521343.609375,6.141600e+06,1420.091751
13,30,1,521354.859375,6.141663e+06,2123.448293
13,30,2,521203.687500,6.140751e+06,2906.359527
14,62,0,493685.218938,6.184779e+06,1850.608947


In [7]:
faults_grouped.groupby(level=['label']).head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,X_nad27,Y_nad27,depth
label,stick_idx,node_idx,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,24,0,480071.21875,6184824.0,1866.492026
1,7,0,334872.187595,6194453.0,2351.481104
2,28,0,519449.75,6131422.0,1529.248793
3,0,0,294654.039062,6199396.0,2611.063526
4,17,0,394486.359375,6189377.0,2225.055468
5,10,0,377854.5,6193065.0,2237.054888
6,44,0,517925.320485,6149184.0,1847.380179
7,26,0,502195.0,6162429.0,1821.868045
8,32,0,519509.828125,6124561.0,1981.899578
9,21,0,443871.125,6185140.0,2052.616255


In [18]:

label_list = faults_grouped.index.get_level_values(level=0).unique()
i = label_list[3]
group_df = faults_grouped.loc[faults_grouped.index.get_level_values('label') == i]
group_df.head()

label_list = group_df.index.get_level_values(level=1).unique()

fault_first = group_df.loc[
    group_df.index.get_level_values('stick_idx') == label_list[0]]
fault_last = group_df.loc[
    group_df.index.get_level_values('stick_idx') == label_list[-1]]

fault_first_Xtop = fault_first['X_nad27'].head(1).values
fault_first_Xbottom = fault_first['X_nad27'].tail(1).values
fault_first_Ytop = fault_first['Y_nad27'].head(1).values
fault_first_Ybottom = fault_first['Y_nad27'].tail(1).values

fault_last_Xtop = fault_last['X_nad27'].head(1).values
fault_last_Xbottom = fault_last['X_nad27'].tail(1).values
fault_last_Ytop = fault_last['Y_nad27'].head(1).values
fault_last_Ybottom = fault_last['Y_nad27'].tail(1).values

top_start = Point(fault_first_Xtop, fault_first_Ytop)
top_end = Point(fault_last_Xtop, fault_last_Ytop)
bottom_start = Point(fault_first_Xbottom, fault_first_Ybottom)
bottom_end = Point(fault_last_Xbottom, fault_last_Ybottom)

width_top = top_start.distance(top_end)
width_bottom = bottom_start.distance(bottom_end)

width_mean = np.mean([width_top, width_bottom])
print("Fault cluster: " + str(i))
print("Width of the fault corridor in km", width_mean/1000)


Fault cluster: 3
Width of the fault corridor in km 33.037134965467516


Calculate dip of each fault

In [19]:
a = Point(334872.187595,6.194453e+06).distance(Point(362893.317266, 6.192993e+06))
b = Point(334243.839147, 6.194465e+06).distance(Point(362574.460039, 6.193097e+06))
np.mean([a,b])

28211.384713793625

In [16]:
group_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,X_nad27,Y_nad27,depth
label,stick_idx,node_idx,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,0,0,294654.039062,6199396.0,2611.063526
3,0,1,295039.689453,6199377.0,3165.129696
3,38,0,290236.346987,6200058.0,2489.458716
3,38,1,291269.978361,6199830.0,2905.306353
3,38,2,291870.722656,6199718.0,3267.007176
3,39,0,297791.384766,6199249.0,2511.4695
3,39,1,297872.927761,6199245.0,2871.483983
3,39,2,297791.384766,6199249.0,3217.579876
3,48,0,282125.182312,6201551.0,2484.461068
3,48,1,284736.796784,6201469.0,3393.845666


Calculate average offset (in progress)

In [17]:
# # iterate through df using label + stick_idx to access 
# # coords for the specific fault 
# # we wil use it to calculate CORRIDOR WITH

# for i in idx_list:
    
#     x_start_top = faults_grouped.groupby(level=['label', 'stick_idx']).head(1).loc[i,'X_nad27'].values
#     y_start_top = faults_grouped.groupby(level=['label', 'stick_idx']).head(1).loc[i,'Y_nad27'].values
#     z_start_top = faults_grouped.groupby(level=['label', 'stick_idx']).head(1).loc[i,'depth'].values

# for i in idx_list:
    
#     x_end = faults_grouped.groupby(level=['label', 'stick_idx']).tail(1).loc[i,'X_nad27'].values
#     y_end = faults_grouped.groupby(level=['label', 'stick_idx']).tail(1).loc[i,'Y_nad27'].values
#     z_end = faults_grouped.groupby(level=['label', 'stick_idx']).tail(1).loc[i,'depth'].values
