In [1]:
import pandas as pd
from shared import utils_data_preparation
from shared import utils_analysis

Preparazione dataset ed input

In [2]:
# Caricamento dati
df_2017 = pd.read_csv("data/Financia_literacy_2017.csv")
df_2020 = pd.read_csv("data/Financia_literacy_2020.csv")

# Controllo struttura
db_check = utils_data_preparation.check_dataframe_structure(df_2017, df_2020)

# Se la struttura è compatibile, trasformiamo entrambi i DataFrame
if db_check:
    df_2017_transformed = utils_data_preparation.transform_dataframe(df_2017)
    df_2020_transformed = utils_data_preparation.transform_dataframe(df_2020)

# Creazione della nuova colonna con la generazione
df_2017_transformed["education"] = df_2017_transformed["qd9"].apply(utils_data_preparation.categorize_education)
df_2020_transformed["education"] = df_2020_transformed["qd9"].apply(utils_data_preparation.categorize_education)

# Creazione della nuova colonna con la generazione
df_2017_transformed["generation"] = df_2017_transformed["qd7"].apply(utils_data_preparation.categorize_generation)
df_2020_transformed["generation"] = df_2020_transformed["qd7"].apply(utils_data_preparation.categorize_generation)

# Seleziona le due variabili da usare per la segmentazione
var1 = "education"  # Sostituisci con il nome della prima variabile
var2 = "generation" 

# Creazione di una nuova variabile che è la concatenazione dei valori di var1 e var2
df_2017_transformed["segmentation"] = df_2017_transformed[var1].astype(str) + "_" + df_2017_transformed[var2].astype(str) 
df_2020_transformed["segmentation"] = df_2020_transformed[var1].astype(str) + "_" + df_2020_transformed[var2].astype(str) 

df_2017_transformed = utils_data_preparation.calculate_scores(df_2017_transformed)
df_2020_transformed = utils_data_preparation.calculate_scores(df_2020_transformed)

segmentation_counts = df_2017_transformed["segmentation"].value_counts()
segmentation_values = segmentation_counts[segmentation_counts >= 200].index.tolist()

PASS
No missing values.
No missing values.


Analisi

In [3]:
final_results, summary_results = utils_analysis.analyze_association_rules(
    df_train=df_2017_transformed,
    df_test=df_2020_transformed,
    segmentation_column="segmentation",
    segmentation_values=segmentation_values,
    columns_A=utils_data_preparation.knowledge_score_variables,
    columns_B=utils_data_preparation.behavioral_score_variables,
    export_name="knowledge_behavioral"
)

In [4]:
final_results, summary_results = utils_analysis.analyze_association_rules(
    df_train=df_2017_transformed,
    df_test=df_2020_transformed,
    segmentation_column="segmentation",
    segmentation_values=segmentation_values,
    columns_A=utils_data_preparation.knowledge_score_variables,
    columns_B=utils_data_preparation.attitude_score_variables,
    export_name="knowledge_attitude"
)

In [5]:
final_results, summary_results = utils_analysis.analyze_association_rules(
    df_train=df_2017_transformed,
    df_test=df_2020_transformed,
    segmentation_column="segmentation",
    segmentation_values=segmentation_values,
    columns_A=utils_data_preparation.attitude_score_variables,
    columns_B=utils_data_preparation.behavioral_score_variables,
    export_name="attitude_behavioral"
)