In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from fcapy.lattice import ConceptLattice
from fcapy.context import FormalContext
import os 

from shared import utils_data_preparation
from shared import utils_analysis

    
output_dir = 'output'
os.makedirs(output_dir, exist_ok=True)
 

In [2]:
# Caricamento dati
df_2017 = pd.read_csv("data/Financia_literacy_2017.csv")
df_2020 = pd.read_csv("data/Financia_literacy_2020.csv")

# Controllo struttura
db_check = utils_data_preparation.check_dataframe_structure(df_2017, df_2020)

# Se la struttura è compatibile, trasformiamo entrambi i DataFrame
if db_check:
    df_2017_transformed = utils_data_preparation.transform_dataframe(df_2017)
    df_2020_transformed = utils_data_preparation.transform_dataframe(df_2020)


PASS
No missing values.
No missing values.


In [3]:

# Creazione della nuova colonna con la generazione
df_2017_transformed["education"] = df_2017_transformed["qd9"].apply(utils_data_preparation.categorize_education)
df_2020_transformed["education"] = df_2020_transformed["qd9"].apply(utils_data_preparation.categorize_education)

# Creazione della nuova colonna con la generazione
df_2017_transformed["generation"] = df_2017_transformed["qd7"].apply(utils_data_preparation.categorize_generation)
df_2020_transformed["generation"] = df_2020_transformed["qd7"].apply(utils_data_preparation.categorize_generation)

# Seleziona le due variabili da usare per la segmentazione
var1 = "education"  # Sostituisci con il nome della prima variabile
var2 = "generation" 

# Creazione di una nuova variabile che è la concatenazione dei valori di var1 e var2
df_2017_transformed["segmentation"] = df_2017_transformed[var1].astype(str) + "_" + df_2017_transformed[var2].astype(str) 
df_2020_transformed["segmentation"] = df_2020_transformed[var1].astype(str) + "_" + df_2020_transformed[var2].astype(str) 

In [4]:
df_2017_transformed = utils_data_preparation.calculate_scores(df_2017_transformed)
df_2020_transformed = utils_data_preparation.calculate_scores(df_2020_transformed)

In [None]:
#train_data_2017, test_data_2017 = utils_data_preparation.create_train_test_set(df_2017_transformed)

In [43]:
segmentation_counts = df_2017_transformed["segmentation"].value_counts()
segmentation_values = segmentation_counts[segmentation_counts >= 200].index.tolist()

In [77]:
import pandas as pd

# Lista per raccogliere i risultati
all_results = []

# Itera su tutti i valori di segmentation
for segment in segmentation_values:
    # Filtra i dati per il segmento corrente
    df_iter = df_2017_transformed[df_2017_transformed["segmentation"] == segment]

    # Definizione delle colonne A e B
    columns_A = utils_data_preparation.knowledge_score_variables
    columns_B = utils_data_preparation.behavioral_score_variables

    # Crea l'oggetto AssociationRules per il segmento corrente
    ar = utils_analysis.AssociationRules(df_iter, columns_A=columns_A, columns_B=columns_B, group=segment)

    # Calcola le metriche di associazione
    results = ar.calculate_all_metrics_for_selected_sets()

    # Filtra le regole in base ai criteri specificati
    filtered_results_db = ar.filter_by_values(results, min_support=0.12, min_confidence=0.6, min_lift=1.35)

    # Aggiunge i risultati alla lista
    all_results.append(filtered_results_db)

# Concatena tutti i risultati in un unico DataFrame
final_results_db = pd.concat(all_results, ignore_index=True)

In [None]:
final_results_db["Group"].value_counts()

Group
Diploma_Millennials    6
Diploma_Gen_X          3
No_diploma_Gen_X       1
Name: count, dtype: int64

In [None]:
lista_A = final_results_db["A"].tolist()
lista_B = final_results_db["B"].tolist()

# Liste per memorizzare i risultati
support_test_values = []
confidence_test_values = []
lift_test_values = []

ar = utils_analysis.AssociationRules(df_2020_transformed, columns_A=columns_A, columns_B=columns_B, group="test_2020")

# Itera sugli elementi delle liste a e b
for item_a, item_b in zip(lista_A, lista_B):
    # Calcola le metriche usando l'oggetto AssociationRules
    support_test = ar.support(list(item_a + item_b))  # Unisce a e b in una lista
    confidence_test = ar.confidence(list(item_a), list(item_b))
    lift_test = ar.lift(list(item_a), list(item_b))

    # Memorizza i valori nelle liste
    support_test_values.append(support_test)
    confidence_test_values.append(confidence_test)
    lift_test_values.append(lift_test)

# Aggiunge le nuove colonne al DataFrame finale
final_results_db["supporto_test"] = support_test_values
final_results_db["confidenza_test"] = confidence_test_values
final_results_db["lift_test"] = lift_test_values

In [82]:
result=final_results_db[(final_results_db["supporto_test"]>0) & (final_results_db["confidenza_test"]>0) & (final_results_db["lift_test"]>1)]
result['Group'].value_counts()

Group
Diploma_Millennials    6
Diploma_Gen_X          3
No_diploma_Gen_X       1
Name: count, dtype: int64

In [83]:
# Raggruppa per "Group" e calcola le medie
df_summary = final_results_db.groupby("Group").agg({
    "Support": "mean",
    "Confidence": "mean",
    "Lift": "mean",
    "supporto_test": "mean",
    "confidenza_test": "mean",
    "lift_test": "mean"
}).reset_index()

# Rinomina le colonne per chiarezza
df_summary.rename(columns={
    "Support": "Support Medio",
    "Confidence": "Confidence Media",
    "Lift": "Lift Medio",
    "supporto_test": "Support Test Medio",
    "confidenza_test": "Confidence Test Media",
    "lift_test": "Lift Test Medio"
}, inplace=True)

In [92]:
final_results_db.to_csv("estrazione.csv")