In [13]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

### Loading the data

In [2]:
df = pd.read_csv("BRSA_778_cast_gr_rhob_drho_hdrs_nphi_pe_dt.csv")

### Removing lines with None

In [3]:
df = df[756:26279]

### Reset the index of the df

In [4]:
df = df.reset_index(drop=True)

### Taking a closer look at the `energy`

In [None]:
""" 
plt.figure(dpi=300)

sns.histplot(df['energy'], color='darkgreen')
plt.title("Energia - Original")
"""

In [None]:
"""
plt.figure(dpi=300)

sns.histplot(np.log10(df['energy']), color='darkgreen')
plt.title("Energia - Logaritmo Natural")
"""

In [None]:
"""
# Define o tema uma vez
sns.set_theme(style="whitegrid")

# Cria figura com 1 linha e 2 colunas de subplots
fig, axes = plt.subplots(
    nrows=1, ncols=2, 
    figsize=(10, 8),   # largura x altura em polegadas
    dpi=300
)

# Propriedades dos outliers
flier_props = dict(marker='o', alpha=0.3)

# 1º boxplot: energia no escala log2
sns.boxplot(
    y=np.log10(df['energy']),
    fill=False,
    linewidth=1,
    color='darkblue',
    flierprops=flier_props,
    ax=axes[1]           # desenha no primeiro eixo
)
axes[1].set_xlabel("BRSA_778", fontsize=12)
axes[1].set_ylabel("Log10(Energy)", fontsize=12)
axes[1].set_title("Boxplot Energy (Log10) - BRSA_778", fontsize=14)

# 2º boxplot: energia original
sns.boxplot(
    y=df['energy'],
    fill=False,
    linewidth=1,
    color='darkgreen',
    flierprops=flier_props,
    ax=axes[0]           # desenha no segundo eixo
)
axes[0].set_xlabel("BRSA_778", fontsize=12)
axes[0].set_ylabel("Energy (Original)", fontsize=12)
axes[0].set_title("Boxplot Energy - BRSA_778", fontsize=14)

# Ajusta espaços automaticamente
plt.tight_layout()

plt.savefig("boxplots/boxplot_BRSA_778_energy_comparison.png", dpi=300)

plt.show()
"""

### Plotting the boxplots

In [29]:

texture_props = [
    'contrast', 
    'dissimilarity', 
    'homogeneity', 
    'energy', 
    'correlation',
    'entropy'
]
"""
texture_props_df = df[texture_props]
"""

'\ntexture_props_df = df[texture_props]\n'

In [None]:
"""
for prop in texture_props:
    sns.set_theme(style="whitegrid")

    plt.figure(dpi=300)

    # Deixando os outliers meio transparentes
    flier_props = dict(marker='o', alpha=0.3)

    sns.boxplot(
        y=texture_props_df[prop],
        fill=False,
        linewidth=1,
        color='darkgreen',
        flierprops=flier_props
    )

    prop_label = prop.capitalize()

    plt.xlabel("BRSA_778", fontsize=12)
    plt.ylabel(prop_label, fontsize=12)
    plt.title(f"Boxplot {prop_label} - BRSA_778", fontsize=14)
    plt.tight_layout()
    
    plt.savefig(f'boxplots/boxplot_BRSA_778_{prop}.png', dpi=300)
    
    plt.show()
    """


### Checking mean, median, etc for each variable

In [27]:
df['contrast'].name.capitalize()

'Contrast'

In [33]:
def display_info(prop):
    count = prop.count()

    mean = prop.mean()

    median = prop.median()

    min_ = prop.min()

    max_ = prop.max()

    range = max_ - min_

    std = prop.std()

    cv = std / mean

    skew = prop.skew()

    print(
        f"""
        {prop.name.capitalize()}:
        count: {count}
        mean: {mean.round(2)}
        median: {median.round(2)}
        min: {min_.round(2)}
        max: {max_.round(2)}
        range: {range.round(2)}
        std: {std.round(2)}
        cv: {cv.round(2)}
        skew: {skew.round(2)}
        \n
        """
    )

In [34]:
for prop in texture_props:
    display_info(df[prop])


        Contrast:
        count: 25523
        mean: 213.6
        median: 152.32
        min: 3.43
        max: 848.74
        range: 845.31
        std: 160.83
        cv: 0.75
        skew: 1.02
        

        

        Dissimilarity:
        count: 25523
        mean: 9.84
        median: 8.8
        min: 0.87
        max: 22.46
        range: 21.59
        std: 4.05
        cv: 0.41
        skew: 0.49
        

        

        Homogeneity:
        count: 25523
        mean: 0.14
        median: 0.13
        min: 0.04
        max: 0.76
        range: 0.71
        std: 0.06
        cv: 0.46
        skew: 1.89
        

        

        Energy:
        count: 25523
        mean: 0.04
        median: 0.04
        min: 0.03
        max: 0.59
        range: 0.57
        std: 0.02
        cv: 0.52
        skew: 6.2
        

        

        Correlation:
        count: 25523
        mean: 0.87
        median: 0.88
        min: 0.01
        max: 1.0
        range: 0.99
        std