In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
{Geochemical Clustering - Notebook 2
Geochemical dataset cleaning, filtering, and enrichment.}

{INTERNAL USE ONLY}
"""

__author__ = '{Malte Schade}'
__copyright__ = 'Copyright {2022}, {Geochemical Clustering - Notebook 2}'
__version__ = '{1}.{1}.{0}'
__maintainer__ = '{Malte Schade}'
__email__ = '{contact@malteschade.com}'
__status__ = '{FINISHED}'

# other modules
import pandas as pd
import numpy as np

import plotly.express as px

# constants
IN_PATH = 'chem_data.csv'
OUT_PATH = '_prep.csv'
SECTION = 'all'                     # OPTIONS: all, in, out
REG = [60300, 61000, 42700, 43700]  # borders of inner region

# settings
pd.set_option('display.max_columns', None)


In [2]:
# read data csv into df
df = pd.read_csv(IN_PATH, index_col=0)

# exclude cat values not equal to PT
df = df[df['cat'] != 'PT']

# regional filter
if SECTION == 'in':
    df = df[df['y'].between(REG[0], REG[1])][df['x'].between(REG[2], REG[3])]

elif SECTION == 'out':
    df = df[~df.isin(df[df['y'].between(REG[0], REG[1])]
                     [df['x'].between(REG[2], REG[3])]).any(axis=1)]

# approximate sample position
df['x1'] = df['x']+0.5*np.cos(df['for_dip'] * np.pi/180) * \
    np.sin(df['for_dir'] * np.pi/180)*(df['dpasse']+df['fpasse'])
df['y1'] = df['y']+0.5*np.cos(df['for_dip'] * np.pi/180) * \
    np.cos(df['for_dir'] * np.pi/180)*(df['dpasse']+df['fpasse'])
df['z1'] = df['z']+0.5 * \
    np.sin(df['for_dip'] * np.pi/180)*(df['dpasse']+df['fpasse'])

df


Unnamed: 0,key,cat,alias,site,year,type,x,y,z,for_type,for_len,for_dir,for_dip,tab,dpasse,fpasse,cao,mgo,sio2,fe2o3,al2o3,s,mno,pf,x1,y1,z1
0,TQC4825,LN,2019_B11_R4_D11,TQC,2019,SO,43263.3740,60619.6640,4.0990,BLHL,9.10,0.0,-90,ext,0.00,9.10,54.19,0.68,0.16,0.026,0.07,0.005,0.0049,,43263.374000,60619.664000,-0.451000
1,BIZ136,LN,BIZ136,BIZ,2020,SO,43404.9640,60548.0340,5.9120,CORE,97.00,290.0,-40,ext,91.00,93.00,54.25,0.40,0.18,0.031,0.07,0.005,0.0098,,43338.738139,60572.138242,-53.224460
2,TQC4385,LN,2018_B109_R3_C4,TQC,2018,SO,43249.9050,60473.6920,1.9940,BLHL,6.99,0.0,-90,ext,0.00,6.99,55.08,0.40,0.08,0.019,0.04,0.001,0.0033,,43249.905000,60473.692000,-1.501000
3,BIZ008,LN,BIZ008,BIZ,2006,SO,43167.3400,61055.1400,27.5300,CORE,200.31,290.0,-40,lci,128.75,130.83,55.44,0.29,0.07,0.090,0.03,0.002,0.0078,43.88,43073.911147,61089.145321,-55.897404
4,BIZ146,LN,BIZ146,BIZ,2020,SO,42969.3040,60467.7710,-3.4630,CORE,73.00,290.0,-40,ext,57.50,59.50,54.39,0.63,0.13,0.051,0.07,0.017,0.0083,,42927.192991,60483.098154,-41.066075
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15923,TQC4973,LN,2019_B24_R3_C2,TQC,2019,SO,43068.6540,60668.5210,4.8720,BLHL,9.87,0.0,-90,ext,0.00,9.87,54.82,0.33,0.12,0.027,0.07,0.000,0.0027,,43068.654000,60668.521000,-0.063000
15924,TQC5204,LN,2019_B46_R3_C1,TQC,2019,SO,43263.4910,60654.7010,4.5990,BLHL,9.60,0.0,-90,ext,0.00,9.60,54.31,0.86,0.24,0.040,0.11,0.007,0.0052,,43263.491000,60654.701000,-0.201000
15925,TQC0790,LN,2016_B40_Pin_2,TQC,2016,SO,43046.7383,60493.9575,8.7449,BLHL,3.74,0.0,-90,ext,0.00,3.74,55.35,0.33,0.10,0.040,0.04,0.003,0.0031,,43046.738300,60493.957500,6.874900
15926,BIZ135,LN,BIZ135,BIZ,2020,SO,43109.7270,60702.9660,-2.9270,CORE,76.00,290.0,-40,ext,18.60,21.00,54.73,0.58,0.16,0.045,0.06,0.002,0.0035,,43095.474043,60708.153652,-15.654195


In [3]:
# 3d scatter plot showing the selected datapoints and their type
fig = px.scatter_3d(df, 'x1', 'y1', 'z1', color='for_type',
                    opacity=0.5, hover_data=['key'])
fig.update_traces(marker_size=3)
fig.update_layout({
    'title': {'text': '3D Datapoints for Specified Quarry and Selection'},
    'scene': {'aspectmode': 'data'}
})


In [4]:
# save df to csv
df.to_csv(SECTION+OUT_PATH)
