In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings


warnings.filterwarnings('ignore') # Elimina los Warnings
sns.set_theme(style="whitegrid", # plantilla para seaborn
        color_codes=True,
        context="notebook",
        rc={"grid.linewidth":0.25,"grid.color":"grey","grid.linestyle":"-"},
        font_scale=1,
        palette="husl")

plt.style.use('dark_background') # Estilo de los graficos

# Cargamos el DataFrame()
df = pd.read_csv('../data/processed/dataset.csv') # "Muestra"

# coolors: https://coolors.co/palette/edae49-d1495b-00798c-30638e-003d5b
colors = {"f":"#D1495B","m":"#30638E"}
categorical_columns = ["sex","education","ethnicity"] 
numerical_columns = ["age","main_src","total_src"]
print(df)

In [None]:
### ANALISIS DESCRIPTIVO ###

# Variables Categoricas
print(f'EDUCATION:\n{df["education"].value_counts()}\n')
print(f'ETHNICITY:\n{df["ethnicity"].value_counts()}\n')

# Variables Numericas
print(f'DESCRIPTION:\n{df[numerical_columns].describe()}')

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(15, 10))

sns.kdeplot(
    data=df,
    x="age",
    hue="sex",
    multiple="fill",
    levels=10,
    palette=colors,
    alpha=0.5,
    legend=True,
    ax=ax[0]

)
sns.kdeplot(
    data=df,
    x="total_src",
    hue="sex",
    multiple="fill",
    levels=10,
    palette=colors,
    alpha=0.5,
    legend=True,
    ax=ax[1]
)
ax[0].set_title('Distribución segun Edad')
ax[1].set_title('Distribución segun Ingreso Total')
ax[1].set_xlim(0, 15_000)

plt.show()

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(15, 10))

sns.histplot(
    data=df,
    x="age",
    hue="sex",
    element="step",
    kde=True,
    fill=True,
    alpha=0.5,
    palette=colors,
    ax=ax[0]
)
sns.histplot(
    data=df,
    x="total_src",
    hue="sex",
    element="step",
    kde=True,
    fill=True,
    alpha=0.5,
    palette=colors,
    ax=ax[1]

)
ax[0].set_title('Distribución segun Edad')
ax[1].set_title('Distribución segun Ingreso Total')
ax[1].set_xlim(0,8_000)

plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 10))

sns.kdeplot(
    data=df[df["sex"]=='f'],
    x="age",
    y="total_src",
    levels=15,
    color='#D1495B',
    ax=ax[0]
)
sns.kdeplot(
    data=df[df["sex"]=='m'],
    x="age",
    y="total_src",
    levels=15,
    color= '#30638E',
    ax=ax[1]
)
ax[0].set_title('Densidad de mujeres segun Edad e Ingreso Total')
ax[0].set(ylim=(0,9000))
ax[1].set_title('Densidad de varones segun Edad e Ingreso Total')
ax[1].set(ylim=(0,9000))

plt.legend()
plt.show()

In [None]:
plt.rcParams.update({'font.size': 12})
fig = plt.figure(figsize=(15, 10))
g = sns.catplot(
    data=df,
    x="age",
    y="education",
    hue="sex",
    kind="boxen",
    height=6,
    aspect=1.5,
    palette=colors,
    margin_titles=True,
    sharex=False,
    legend=True,
    alpha=0.75,
)
g.set(xlabel="Edad", ylabel="")
g.set_titles(row_template="{row_name} education")
for ax in g.axes.flat:
    ax.xaxis.set_major_formatter('{x:.0f}')

In [None]:
plt.rcParams.update({'font.size': 12})
fig = plt.figure(figsize=(15, 10))
g = sns.catplot(
    data=df,
    x="total_src",
    y="education",
    hue="sex",
    kind="boxen",
    height=6,
    aspect=1,
    palette=colors,
    margin_titles=True,
    sharex=False,
    legend=True,
)
g.set(xlim=(0,12000))
g.set(xlabel="Edad", ylabel="")
g.set_titles(row_template="{row_name} education")
for ax in g.axes.flat:
    ax.xaxis.set_major_formatter('{x:.0f}')

In [None]:

fig = plt.figure(figsize=(15, 10))
g = sns.violinplot(
    data=df,
    x="ethnicity",
    y="total_src",
    hue="sex",
    split=True,
    inner="quart",
    fill=False,
    palette=colors,
)

plt.title('Ingreso Total segun Etnia y Sexo')

plt.show()

In [None]:
fig, ax = plt.subplots(4, 2, figsize=(20, 15))

# Obtén los valores únicos de 'education'
education_values = df['education'].unique()

# Itera sobre los valores únicos de 'education'
for i, education in enumerate(education_values):
    # Calcula el índice de la subtrama
    row = i // 2
    col = i % 2

    # Filtra los datos para el valor actual de 'education'
    data = df[df['education'] == education]

    # Crea el gráfico de barras en la subtrama correspondiente
    barplot = sns.barplot(
        data=data,
        x='ethnicity',
        y='total_src',
        hue='sex',
        ax=ax[row, col],  # Esto coloca el gráfico en la subtrama correcta
        ci=None,
        palette={"f":"#D1495B","m":"#30638E"},
        alpha=0.5,
        legend=False
    )

    # Establece el título de la subtrama
    ax[row, col].set_title(f'Education: {education}')

# Ajusta el layout para evitar la superposición
fig.tight_layout()

plt.show()