In [1]:
import pandas as pd
from shared import utils_data_preparation
from shared import utils_analysis
import os 

output_dir = 'output'
os.makedirs(output_dir, exist_ok=True)

Preparazione dataset ed input

In [2]:
# Caricamento dati
df_2017 = pd.read_csv("data/Financia_literacy_2017.csv")
df_2020 = pd.read_csv("data/Financia_literacy_2020.csv")

# Controllo struttura
db_check = utils_data_preparation.check_dataframe_structure(df_2017, df_2020)

# Se la struttura è compatibile, trasformiamo entrambi i DataFrame
if db_check:
    df_2017_transformed = utils_data_preparation.transform_dataframe(df_2017)
    df_2020_transformed = utils_data_preparation.transform_dataframe(df_2020)

# Creazione della nuova colonna con la generazione
df_2017_transformed["education"] = df_2017_transformed["qd9"].apply(utils_data_preparation.categorize_education)
df_2020_transformed["education"] = df_2020_transformed["qd9"].apply(utils_data_preparation.categorize_education)

# Creazione della nuova colonna con la generazione
df_2017_transformed["generation"] = df_2017_transformed["qd7"].apply(utils_data_preparation.categorize_generation)
df_2020_transformed["generation"] = df_2020_transformed["qd7"].apply(utils_data_preparation.categorize_generation)

# Seleziona le due variabili da usare per la segmentazione
var1 = "education"  # Sostituisci con il nome della prima variabile
var2 = "generation" 

# Creazione di una nuova variabile che è la concatenazione dei valori di var1 e var2
df_2017_transformed["segmentation"] = df_2017_transformed[var1].astype(str) + "_" + df_2017_transformed[var2].astype(str) 
df_2020_transformed["segmentation"] = df_2020_transformed[var1].astype(str) + "_" + df_2020_transformed[var2].astype(str) 

df_2017_transformed = utils_data_preparation.calculate_scores(df_2017_transformed)
df_2020_transformed = utils_data_preparation.calculate_scores(df_2020_transformed)

segmentation_counts = df_2017_transformed["segmentation"].value_counts()
segmentation_values = segmentation_counts[segmentation_counts >= 200].index.tolist()

PASS
No missing values.
No missing values.


In [3]:
df_2017_transformed.groupby("segmentation")["total_score"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
segmentation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Diploma_Boomers,139.0,10.338129,2.70146,4.0,8.0,10.0,12.0,16.0
Diploma_Gen_X,262.0,9.973282,3.08178,1.0,8.0,10.0,12.0,18.0
Diploma_Gen_Z,172.0,8.767442,2.837404,1.0,7.0,9.0,11.0,16.0
Diploma_Millennials,337.0,9.548961,2.804886,3.0,8.0,10.0,11.0,18.0
Diploma_Silent_Generation,15.0,10.0,2.13809,7.0,8.0,10.0,11.5,13.0
No_diploma_Boomers,309.0,9.038835,2.747805,2.0,7.0,9.0,11.0,16.0
No_diploma_Gen_X,232.0,8.982759,2.654679,2.0,7.0,9.0,11.0,17.0
No_diploma_Gen_Z,105.0,7.952381,2.595784,2.0,6.0,8.0,10.0,13.0
No_diploma_Millennials,191.0,8.445026,2.636957,2.0,7.0,8.0,10.0,18.0
No_diploma_Silent_Generation,77.0,8.402597,2.961417,2.0,6.0,8.0,11.0,15.0


Analisi

In [4]:
final_results_knowledge_behavioral, summary_results_knowledge_behavioral = utils_analysis.analyze_association_rules(
    df_train=df_2017_transformed,
    df_test=df_2020_transformed,
    segmentation_column="segmentation",
    segmentation_values=segmentation_values,
    columns_A=utils_data_preparation.knowledge_score_variables,
    columns_B=utils_data_preparation.behavioral_score_variables,
    export_name="knowledge_behavioral",
)

In [5]:
utils_analysis.plot_metrics_distribution(final_results_knowledge_behavioral, save_path="output\\output_plots")

In [6]:
final_results_knowledge_attitude, summary_results_knowledge_attitude = utils_analysis.analyze_association_rules(
    df_train=df_2017_transformed,
    df_test=df_2020_transformed,
    segmentation_column="segmentation",
    segmentation_values=segmentation_values,
    columns_A=utils_data_preparation.knowledge_score_variables,
    columns_B=utils_data_preparation.attitude_score_variables,
    export_name="knowledge_attitude"
)