In [3]:
from scipy.stats import skew, kurtosis
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from correlation_matrix import plot_correlation_matrix

In [5]:
def calculate_stats(df_place):
    sample_count = len(df_place.groupby(by='sample_name'))
    porosity_values = df_place['Porosity (%)']

    return (len(porosity_values),
            sample_count,
            round(np.mean(porosity_values), 2),
            round(np.median(porosity_values), 2),
            round(np.min(porosity_values), 2),
            round(np.max(porosity_values), 2),
            round(skew(porosity_values, bias=False), 2),
            round(kurtosis(porosity_values, bias=False, fisher=True), 2))

In [11]:
def print_stats(data):
    for place in ['Cachoeira_do_Roncador', 'Lajedo_Soledade', 'Pedreira_Sal']:
        df_place = data[data['place'] == place]
        if len(df_place) > 0:
            count_value, sample_count, mean_value, \
                median_value, min_value, max_value, \
                skew_value, kurt_value = calculate_stats(df_place)

            print(f'{place} Stats:\n'
                f'  count {count_value}\n'
                f'  sample_count {sample_count}\n'
                f'  mean {mean_value}\n'
                f'  median {median_value}\n'
                f'  min {min_value}\n'
                f'  max {max_value}\n'
                f'  skew {skew_value}\n'
                f'  kurt {kurt_value}\n')

In [12]:
dataset = pd.read_csv(f'../data/all_data.csv',
                      sep=';',
                      decimal='.')
print_stats(dataset)

Cachoeira_do_Roncador Stats:
  count 135
  sample_count 27
  mean 16.99
  median 18.01
  min 4.22
  max 39.8
  skew 0.59
  kurt 0.53

Lajedo_Soledade Stats:
  count 52
  sample_count 13
  mean 10.61
  median 10.07
  min 1.74
  max 17.67
  skew -0.16
  kurt -1.06

Pedreira_Sal Stats:
  count 48
  sample_count 19
  mean 1.26
  median 1.14
  min 0.6
  max 2.43
  skew 0.86
  kurt -0.26



In [13]:
dataset = pd.read_csv(f'../data/exp1_effective_porosity.csv',
                      sep=';',
                      decimal='.')
print_stats(dataset)

Cachoeira_do_Roncador Stats:
  count 94
  sample_count 14
  mean 15.64
  median 18.47
  min 4.22
  max 35.87
  skew 0.36
  kurt 0.5

Pedreira_Sal Stats:
  count 48
  sample_count 19
  mean 1.26
  median 1.14
  min 0.6
  max 2.43
  skew 0.86
  kurt -0.26



In [14]:
dataset = pd.read_csv(f'../data/exp2_total_porosity.csv',
                      sep=';',
                      decimal='.')
print_stats(dataset)

Cachoeira_do_Roncador Stats:
  count 41
  sample_count 13
  mean 20.09
  median 17.53
  min 8.31
  max 39.8
  skew 0.74
  kurt -0.18

Lajedo_Soledade Stats:
  count 52
  sample_count 13
  mean 10.61
  median 10.07
  min 1.74
  max 17.67
  skew -0.16
  kurt -1.06

