GEOROC major/minor rocks
--

This notebook contains the code that generates GEOROC major/minor rock files. 
The code relies on the code developped for DashVolcano.

So we start by loading the code from DashVolcano. For this to work, you need the notebook to be located within the DashVolcano folder (e.g. within DashVolcano.1.0).

In [1]:
import numpy as np
import os
from config_variables import *
from Georoc_functions import load_georoc

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


##########################################
#                                        #
# Basic Statistics                       #
#                                        #
##########################################
Number of GVP volcanoes:  1416
Number of GVP eruptions (confirmed):  9833
Number of volcanoes with known eruption(s):  863
Number of GVP volcanoes with major rock 1:  1349
Number of GVP volcanoes with known eruption(s) and major rock 1:  846

Number of GEOROC volcanoes:  1048
Number of GEOROC volcanoes with eruption data:  740
Number of GEOROC volcanoes with rocks:  987


In [3]:
import warnings

all_majorrocks = []

cnt = 0
# loops over all GVP volcanoes which have been matched to GEOROC samples
for thisvolcano in grnames:
    # warnings are useful but it makes the notebook cleaner to hide them
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        # loads the data from GEOROC
        thisdf = load_georoc(thisvolcano)
    # in case a sample has two materials or more, the first material is chosen
    oldvalues = [x for x in list(thisdf['MATERIAL'].unique()) if (type(x)==str and '[' in x)]
    newvalues = {}
    for x in oldvalues:
        newvalues[x] = x.split('[')[0].strip() 
    thisdf['MATERIAL'].replace(to_replace=newvalues, inplace=True)
    
    cnt += 1
    # this is just to keep track of the progress of the code (it takes a bit of time)
    if cnt % 20 == 1:
        print(cnt)
    # processes each material separately
    for mat in ['WR', 'GL', 'INC']:
        thisdftmp = thisdf[thisdf['MATERIAL'].str.contains(mat)]
        # removes if 80 > SIO2 > 35
        thisdftmp = thisdftmp[(thisdftmp['SIO2(WT%)'] < 80) &  (thisdftmp['SIO2(WT%)'] > 35)  & (thisdftmp['FEOT(WT%)'] > 0)]      
        
        totalsamples = len(thisdftmp.index)
        # removes samples for which no rock name could be computed based on TAS diagram
        allrocks = [x for x in list(thisdftmp['ROCK'].value_counts().index[0:]) if x != 'UNNAMED']
        # computes percentages
        allrocksvalues = [round(100*(thisdftmp['ROCK'].value_counts()[r]/totalsamples),1) for r in allrocks]
        
        # this stores the minor and major rocks in the right format
        minorrocks = []
        majorrocks = []
        for r, rv in zip(allrocks,allrocksvalues):
            if rv < 10:
                minorrocks += [r,rv]
            else:
                majorrocks += [r,rv]
        # 
        if len(majorrocks) >= 10:
            majorrocks = majorrocks[0:10]
        else:
            majorrocks += ['No Data']*10
            majorrocks = majorrocks[0:10] 
        # 
        if len(minorrocks) >= 10:
            minorrocks = minorrocks[0:10]
        else:
            minorrocks += ['No Data']*10
            minorrocks = minorrocks[0:10] 

        # this retrieves the GVP name    
        if thisvolcano in dict_Georoc_sl.keys():
            thisvolcano = dict_Georoc_sl[thisvolcano] 
        
        gvpname = dict_Georoc_GVP[thisvolcano]
        gvpnumber = dfv[dfv['Volcano Name'] == gvpname]['Volcano Number'].values
        # there are 4 volcanoes that are part of the matching names but not the list of GVP volcanoes
        if len(gvpnumber) > 0:
            gvpnumber = gvpnumber[0]
        else:
            gvpnumber = dfvne[dfvne['Volcano Name'] == gvpname]['Volcano Number'].values
            if len(gvpnumber) > 0:
                gvpnumber = gvpnumber[0]
            else:
                # Acoculco', 'Badi', 'Bayah Dome', 'Los Azufres'
                if gvpname == 'Acoculco':
                    gvpnumber = '341827'
                if gvpname == 'Los Azufres':
                    gvpnumber = '341824'
                if gvpname == 'Badi':
                    gvpnumber = '221805'
                else:
                    gvpnumber = ''
                    
        # combines the data            
        if majorrocks+minorrocks != ['No Data']*20: 
            if not(gvpname in ['Los Azufres', 'Acoculco', 'Badi', 'Bayah Dome']):
                all_majorrocks.append([gvpnumber]+[gvpname]+[mat]+majorrocks+minorrocks)

# creates a dataframe with the data computed above                
mjr = []
mnr = []
for idx in range(1,6):
    mjr+=['major rock' + str(idx), '% of major rock' + str(idx)]
    mnr+=['minor rock' + str(idx), '% of minor rock' + str(idx)]
    
thisdf = pd.DataFrame(all_majorrocks, columns = ['Volcano Number','Volcano Name','material']+mjr+mnr)  
thisdf = thisdf.sort_values('Volcano Number')
# prints the first csv
thisdf.to_csv( '../GeorocDataset/completeGEOROCmajorminorrocks2021.txt')

thisdf = thisdf.replace(GEOROC_rocks,GEOROC_rock_col)
# replace rock_col by rock_sorted
longrockgvp = {}
for rc, rs in zip(rock_col, rock_sorted):
    longrockgvp[rc] = rs 
thisdf = thisdf.replace(longrockgvp)        
thisdf.to_csv( '../GeorocDataset/completeGEOROCmajorminorrocks2021_GVPnames.txt')

1
21
41
61
81
101
121
141
161
181
201
221
241
261
281
301
321
341
361
381
401
421
441
461
481
501
521
541
561
581
601
621
641
661
681
701
721
741
761
781
801
821
841
861
881
901
921
941
961
981
1001
1021
1041


Example: Taranaki
--
This runs the same code as above but just for Taranaki, so we can inspect the content.

In [4]:
[x for x in grnames if 'TARAN' in x]

['EGMONT (MOUNT TARANAKI)']

In [5]:
all_majorrocks = []

cnt = 0
for thisvolcano in ['EGMONT (MOUNT TARANAKI)']:
    thisdf = load_georoc(thisvolcano)
    # in case a sample has two materials or more, the first material is chosen
    oldvalues = [x for x in list(thisdf['MATERIAL'].unique()) if (type(x)==str and '[' in x)]
    newvalues = {}
    for x in oldvalues:
        newvalues[x] = x.split('[')[0].strip() 
    thisdf['MATERIAL'].replace(to_replace=newvalues, inplace=True)
    
    for mat in ['WR', 'GL', 'INC']:
        thisdftmp = thisdf[thisdf['MATERIAL'].str.contains(mat)]
        # removes if 80 > SIO2 > 35
        thisdftmp = thisdftmp[(thisdftmp['SIO2(WT%)'] < 80) &  (thisdftmp['SIO2(WT%)'] > 35)  & (thisdftmp['FEOT(WT%)'] > 0)]      
        # this is just to see the raw records
        if mat in ['GL', 'INC']:
            oxides_nofe =  ['SIO2(WT%)', 'TIO2(WT%)', 'AL2O3(WT%)', 'FE2O3(WT%)', 'FEO(WT%)', 'FEOT(WT%)', 'CAO(WT%)', 'MGO(WT%)', 'MNO(WT%)', 'K2O(WT%)', 'NA2O(WT%)', 'P2O5(WT%)']
            print(mat)
            print(thisdftmp[oxides_nofe])
            #if mat == 'GL':
            #    thisdftmp.to_csv( '../GeorocDataset/checkTaranaki_GL.txt')
        #
        totalsamples = len(thisdftmp.index)
        #
        allrocks = [x for x in list(thisdftmp['ROCK'].value_counts().index[0:]) if x != 'UNNAMED']
        allrocksvalues = [round(100*(thisdftmp['ROCK'].value_counts()[r]/totalsamples),1) for r in allrocks]
        #
        minorrocks = []
        majorrocks = []
        for r, rv in zip(allrocks,allrocksvalues):
            if rv < 10:
                minorrocks += [r,rv]
            else:
                majorrocks += [r,rv]
        # 
        if len(majorrocks) >= 10:
            majorrocks = majorrocks[0:10]
        else:
            majorrocks += ['No Data']*10
            majorrocks = majorrocks[0:10] 
        # 
        if len(minorrocks) >= 10:
            minorrocks = minorrocks[0:10]
        else:
            minorrocks += ['No Data']*10
            minorrocks = minorrocks[0:10] 

        if thisvolcano in dict_Georoc_sl.keys():
            thisvolcano = dict_Georoc_sl[thisvolcano] 
        
        gvpname = dict_Georoc_GVP[thisvolcano]
        gvpnumber = dfv[dfv['Volcano Name'] == gvpname]['Volcano Number'].values
        if len(gvpnumber) > 0:
            gvpnumber = gvpnumber[0]
        else:
            gvpnumber = dfvne[dfvne['Volcano Name'] == gvpname]['Volcano Number'].values
            if len(gvpnumber) > 0:
                gvpnumber = gvpnumber[0]
            else:
                # Acoculco', 'Badi', 'Bayah Dome', 'Los Azufres'
                if gvpname == 'Acoculco':
                    gvpnumber = '341827'
                if gvpname == 'Los Azufres':
                    gvpnumber = '341824'
                if gvpname == 'Badi':
                    gvpnumber = '221805'
                else:
                    gvpnumber = ''
        if majorrocks+minorrocks != ['No Data']*20: 
            if not(gvpname in ['Los Azufres', 'Acoculco', 'Badi', 'Bayah Dome']):
                all_majorrocks.append([gvpnumber]+[gvpname]+[mat]+majorrocks+minorrocks)
    
mjr = []
mnr = []
for idx in range(1,6):
    mjr+=['major rock' + str(idx), '% of major rock' + str(idx)]
    mnr+=['minor rock' + str(idx), '% of minor rock' + str(idx)]
    
thisdf = pd.DataFrame(all_majorrocks, columns = ['Volcano Number','Volcano Name','material']+mjr+mnr)  
thisdf = thisdf.sort_values('Volcano Number')
print(thisdf)

GL
      SIO2(WT%)  TIO2(WT%)  AL2O3(WT%)  FE2O3(WT%)  FEO(WT%)  FEOT(WT%)  \
3073  70.867562   0.390703   15.628131         0.0       0.0   1.662993   
3074  67.791350   0.560673   16.559872         0.0       0.0   2.523028   
3075  70.376678   0.520938   15.467842         0.0       0.0   1.923462   
3076  70.186645   0.608843   15.001497         0.0       0.0   2.515221   
3077  71.943527   0.600781   14.098328         0.0       0.0   2.843697   
3173  70.644563   0.525124   15.197694         0.0       0.0   2.285832   

      CAO(WT%)  MGO(WT%)  MNO(WT%)  K2O(WT%)  NA2O(WT%)  P2O5(WT%)  
3073  1.703066  0.380685       0.0  4.888800   4.478061        0.0  
3074  3.083700  0.911093       0.0  4.175010   4.395274        0.0  
3075  1.883390  0.641154       0.0  4.858746   4.327790        0.0  
3076  2.275676  0.748578       0.0  4.780916   3.882623        0.0  
3077  2.443176  0.640833       0.0  3.844998   3.584660        0.0  
3173  2.172570  0.607496       0.0  4.468699   4.098023  

In [6]:
thisdf[list(thisdf)[0:12]]

Unnamed: 0,Volcano Number,Volcano Name,material,major rock1,% of major rock1,major rock2,% of major rock2,major rock3,% of major rock3,major rock4,% of major rock4,major rock5
0,241030,Taranaki,WR,BASALTIC TRACHYANDESITE,38.7,BASALT,17.5,TRACHYBASALT,12.4,TRACHYANDESITE,11.7,BASALTIC ANDESITE
1,241030,Taranaki,GL,RHYOLITE,83.3,TRACHYTE/TRACHYDACITE,16.7,No Data,No Data,No Data,No Data,No Data
2,241030,Taranaki,INC,TRACHYTE/TRACHYDACITE,100.0,No Data,No Data,No Data,No Data,No Data,No Data,No Data
