In [4]:
# Import the needed libraries

import pandas as pd
import pyrolite

In [3]:
# Read the csv file as a pandas.DataFrame and show the first five rows
df_rocas = pd.read_csv("files/rocas.csv")

df_rocas.head()

Unnamed: 0,Nombre,SiO2,Al2O3,FeOT,CaO,MgO,Na2O,K2O,MnO,TiO2
0,Peridotita,45.16,1.56,8.79,0.97,44.47,0.1,0.02,0.13,0.1
1,Peridotita,45.97,2.94,8.9,2.83,39.89,0.17,0.04,0.13,0.19
2,Peridotita,46.91,3.62,8.23,2.73,39.55,0.14,0.01,0.13,0.11
3,Peridotita,44.96,2.01,9.04,1.1,43.39,0.1,0.02,0.13,0.11
4,Peridotita,45.24,0.73,7.92,0.42,46.79,0.01,0.01,0.11,0.03


In [5]:
print(f"The data set has {df_rocas.shape[0]} rows and {df_rocas.shape[1]} columns")

The data set has 10537 rows and 10 columns


In [14]:
print(f"There are {df_rocas['Nombre'].nunique()} different rock types within the table and these are:\n{df_rocas['Nombre'].unique()}")

print("The quantities of each type are:\n")

df_cantidad_rocas = df_rocas.groupby("Nombre").agg({"Nombre":"count"}).reset_index(names="Tipo de roca").rename(columns={"Nombre": "Cantidad"})

df_cantidad_rocas

There are 3 different rock types within the table and these are:
['Peridotita' 'Granodiorita' 'Andesita']
The quantities of each type are:



Unnamed: 0,Tipo de roca,Cantidad
0,Andesita,5973
1,Granodiorita,2993
2,Peridotita,1571


We can see that Andesita is the most common rock type in the table with 5973 records.

Now that we know how many types of rocks are in the table, we can obtain the mean composition for each one 

In [15]:
df_rocas.columns

Index(['Nombre', 'SiO2', 'Al2O3', 'FeOT', 'CaO', 'MgO', 'Na2O', 'K2O', 'MnO',
       'TiO2'],
      dtype='object')

In [16]:
df_mean_composition = df_rocas.groupby("Nombre", as_index=False).agg({
    "SiO2": "mean", 
    "Al2O3": "mean", 
    "FeOT": "mean", 
    "CaO": "mean", 
    "MgO": "mean", 
    "Na2O": "mean",
    "K2O": "mean",
    "MnO": "mean",
    "TiO2": "mean"
})

print("The mean composition of each rock type is:\n")

df_mean_composition

The mean composition of each rock type is:



Unnamed: 0,Nombre,SiO2,Al2O3,FeOT,CaO,MgO,Na2O,K2O,MnO,TiO2
0,Andesita,57.270542,16.658308,7.139491,6.864083,3.82686,3.493394,1.715592,0.143708,0.90854
1,Granodiorita,66.478101,15.494279,3.736919,3.448311,1.618093,3.725909,2.983752,0.078362,0.517362
2,Peridotita,43.415374,2.630939,8.564758,2.391172,39.636627,0.226822,0.117431,0.138131,0.186882


Now, we can obtain the range of values for major componentes present in the table for each type

In [18]:
major_components = ["SiO2", "Al2O3", "CaO", "MgO", "Na2O", "K2O"]
for rock_type in df_rocas["Nombre"].unique():
    
    df_type = df_rocas.loc[df_rocas["Nombre"] == rock_type].copy()
    
    print(f"For {rock_type}, these are the ranges for each major component:\n")
    
    for component in major_components:
        print(f"{component}: {df_type[component].min()} - {df_type[component].max()}")
    print("-------------------------------------------------------")
    

For Peridotita, these are the ranges for each major component:

SiO2: 23.49 - 58.16
Al2O3: 0.01 - 25.4
CaO: 0.0 - 25.265
MgO: 0.71 - 53.1
Na2O: 0.0 - 7.3
K2O: 0.0 - 5.1
-------------------------------------------------------
For Granodiorita, these are the ranges for each major component:

SiO2: 35.3 - 79.9
Al2O3: 5.4 - 22.71
CaO: 0.0 - 22.91
MgO: 0.0 - 16.0
Na2O: 0.04 - 7.9
K2O: 0.01 - 8.13
-------------------------------------------------------
For Andesita, these are the ranges for each major component:

SiO2: 40.448 - 84.91
Al2O3: 2.53 - 25.47
CaO: 0.07 - 19.0
MgO: 0.01 - 17.6
Na2O: 0.0 - 10.05
K2O: 0.0 - 9.46
-------------------------------------------------------
