In [None]:
import json

src_json = "/home/ndecaux/NAS_EMPENN/share/projects/actidep/bids/actimetry_feature_descriptions.json"
significant_features = "global_significant_features.json"
output_json = "/home/ndecaux/NAS_EMPENN/share/projects/actidep/bids/actimetry_significant_features.json"

# Chargement des données

In [2]:
# Charger les descriptions complètes
with open(src_json, 'r') as f:
    all_features = json.load(f)

# Charger les features significatives
with open(significant_features, 'r') as f:
    significant_data = json.load(f)

print(f"Nombre total de features : {len(all_features)}")
print(f"Tâches analysées : {list(significant_data['tasks'].keys())}")

Nombre total de features : 41
Tâches analysées : ['depressed_vs_control', 'apathy_vs_nonapathy', 'ami', 'aes']


# Extraction des features significatives uniques

In [3]:
# Extraire toutes les features significatives de toutes les tâches
all_significant_features = set()

for task_name, task_data in significant_data['tasks'].items():
    print(f"\nTâche: {task_name} ({task_data['type']})")
    
    # Features 12h
    features_12h = task_data.get('selected_features_12h', [])
    print(f"  Features 12h: {len(features_12h)}")
    
    # Features 3d
    features_3d = task_data.get('selected_features_3d', [])
    print(f"  Features 3d: {len(features_3d)}")
    
    # Ajouter au set (en enlevant le préfixe 'acti_')
    for feat in features_12h + features_3d:
        # Enlever le préfixe 'acti_'
        clean_feat = feat.replace('acti_', '')
        
        # Pour les features 12h avec suffixe numérique, enlever le suffixe
        if '_12h_' in clean_feat:
            base_feat = '_'.join(clean_feat.split('_')[:-1])  # Enlever le dernier élément (le numéro)
            all_significant_features.add(base_feat)
        else:
            all_significant_features.add(clean_feat)

print(f"\nNombre total de features uniques significatives : {len(all_significant_features)}")
print(f"Features : {sorted(all_significant_features)}")


Tâche: depressed_vs_control (classification)
  Features 12h: 60
  Features 3d: 10

Tâche: apathy_vs_nonapathy (classification)
  Features 12h: 60
  Features 3d: 10

Tâche: ami (regression)
  Features 12h: 6
  Features 3d: 3

Tâche: aes (regression)
  Features 12h: 13
  Features 3d: 3

Nombre total de features uniques significatives : 30
Features : ['activity_max_12h', 'activity_max_3d', 'activity_mean_12h', 'activity_mean_3d', 'activity_min_12h', 'activity_min_3d', 'activity_rate_3d', 'freq_max_12h', 'freq_max_3d', 'freq_mean_12h', 'freq_mean_3d', 'freq_min_12h', 'freq_min_3d', 'freq_std_12h', 'freq_std_3d', 'inactivity_max_12h', 'inactivity_max_3d', 'inactivity_mean_12h', 'inactivity_min_12h', 'inactivity_min_3d', 'inactivity_std_3d', 'oadl_fft_mean_12h', 'oadl_fft_min_12h', 'oadl_min_12h', 'walk_fft_max_12h', 'walk_fft_mean_12h', 'walk_fft_min_12h', 'walk_max_12h', 'walk_mean_12h', 'walk_min_12h']


# Filtrage du dictionnaire complet

In [4]:
# Créer un dictionnaire filtré
filtered_features = {}

for key, feature_data in all_features.items():
    feature_name = feature_data['name']
    
    if feature_name in all_significant_features:
        filtered_features[key] = feature_data
        print(f"✓ Conservé : {key} - {feature_name}")

print(f"\nNombre de features dans le JSON filtré : {len(filtered_features)}")

✓ Conservé : 2 - inactivity_std_3d
✓ Conservé : 3 - inactivity_min_3d
✓ Conservé : 4 - inactivity_max_3d
✓ Conservé : 5 - activity_mean_3d
✓ Conservé : 7 - activity_min_3d
✓ Conservé : 8 - activity_max_3d
✓ Conservé : 9 - freq_mean_3d
✓ Conservé : 10 - freq_std_3d
✓ Conservé : 11 - freq_min_3d
✓ Conservé : 12 - freq_max_3d
✓ Conservé : 13 - activity_rate_3d
✓ Conservé : 14 - inactivity_mean_12h
✓ Conservé : 16 - inactivity_min_12h
✓ Conservé : 17 - inactivity_max_12h
✓ Conservé : 18 - activity_mean_12h
✓ Conservé : 20 - activity_min_12h
✓ Conservé : 21 - activity_max_12h
✓ Conservé : 22 - freq_mean_12h
✓ Conservé : 23 - freq_std_12h
✓ Conservé : 24 - freq_min_12h
✓ Conservé : 25 - freq_max_12h
✓ Conservé : 26 - walk_mean_12h
✓ Conservé : 28 - walk_min_12h
✓ Conservé : 29 - walk_max_12h
✓ Conservé : 30 - walk_fft_mean_12h
✓ Conservé : 32 - walk_fft_min_12h
✓ Conservé : 33 - walk_fft_max_12h
✓ Conservé : 36 - oadl_min_12h
✓ Conservé : 38 - oadl_fft_mean_12h
✓ Conservé : 40 - oadl_fft_min

# Sauvegarde du JSON filtré

In [5]:
# Sauvegarder le JSON filtré
with open(output_json, 'w', encoding='utf-8') as f:
    json.dump(filtered_features, f, indent=2, ensure_ascii=False)

print(f"JSON filtré sauvegardé dans : {output_json}")
print(f"Nombre de features conservées : {len(filtered_features)}")

JSON filtré sauvegardé dans : /home/ndecaux/NAS_EMPENN/share/projects/actidep/bids/actimetry_significant_features.json
Nombre de features conservées : 30


# Statistiques par type de feature

In [6]:
# Analyser les types de features conservées
features_by_type = {
    '3d': [],
    '12h': []
}

for key, feature_data in filtered_features.items():
    feature_name = feature_data['name']
    if feature_name.endswith('_3d'):
        features_by_type['3d'].append(feature_name)
    elif '_12h' in feature_name:
        features_by_type['12h'].append(feature_name)

print("Répartition des features significatives :")
print(f"  Features 3d : {len(features_by_type['3d'])}")
for feat in sorted(features_by_type['3d']):
    print(f"    - {feat}")

print(f"\n  Features 12h : {len(features_by_type['12h'])}")
for feat in sorted(features_by_type['12h']):
    print(f"    - {feat}")

Répartition des features significatives :
  Features 3d : 11
    - activity_max_3d
    - activity_mean_3d
    - activity_min_3d
    - activity_rate_3d
    - freq_max_3d
    - freq_mean_3d
    - freq_min_3d
    - freq_std_3d
    - inactivity_max_3d
    - inactivity_min_3d
    - inactivity_std_3d

  Features 12h : 19
    - activity_max_12h
    - activity_mean_12h
    - activity_min_12h
    - freq_max_12h
    - freq_mean_12h
    - freq_min_12h
    - freq_std_12h
    - inactivity_max_12h
    - inactivity_mean_12h
    - inactivity_min_12h
    - oadl_fft_mean_12h
    - oadl_fft_min_12h
    - oadl_min_12h
    - walk_fft_max_12h
    - walk_fft_mean_12h
    - walk_fft_min_12h
    - walk_max_12h
    - walk_mean_12h
    - walk_min_12h


# Groupes de features par préfixe

In [None]:
# Grouper les features par préfixe (premier mot)
def group_by_prefix(features):
    groups = {}
    for feat in features:
        prefix = feat.split('_')[0]
        if prefix not in groups:
            groups[prefix] = []
        groups[prefix].append(feat)
    return groups

all_filtered_features = [f['name'] for f in filtered_features.values()]
feature_groups = group_by_prefix(all_filtered_features)

print("Features significatives par groupe :")
for prefix, feats in sorted(feature_groups.items()):
    print(f"\n{prefix.upper()} ({len(feats)} features):")
    for feat in sorted(feats):
        print(f"  - {feat}")

Corrélations attendues pour FA :
  Positives (+) :
    - activity_mean_3d
    - activity_min_3d
    - activity_max_3d
    - freq_mean_3d
    - freq_min_3d
    - freq_max_3d
    - activity_rate_3d
    - activity_mean_12h
    - activity_min_12h
    - activity_max_12h
    - freq_mean_12h
    - freq_min_12h
    - freq_max_12h
    - walk_mean_12h
    - walk_min_12h
    - walk_max_12h
    - walk_fft_mean_12h
    - walk_fft_min_12h
    - walk_fft_max_12h
    - oadl_min_12h
    - oadl_fft_mean_12h
    - oadl_fft_min_12h

  Négatives (-) :
    - inactivity_std_3d
    - inactivity_min_3d
    - inactivity_max_3d
    - freq_std_3d
    - inactivity_mean_12h
    - inactivity_min_12h
    - inactivity_max_12h
    - freq_std_12h


Corrélations attendues pour IFW :
  Positives (+) :
    - inactivity_std_3d
    - inactivity_min_3d
    - inactivity_max_3d
    - freq_std_3d
    - inactivity_mean_12h
    - inactivity_min_12h
    - inactivity_max_12h
    - freq_std_12h

  Négatives (-) :
    - activity_mean_

# Vérification des corrélations attendues

In [None]:
# Afficher les corrélations attendues par métrique
print("Corrélations attendues pour FA :")
print("  Positives (+) :")
for key, feature_data in filtered_features.items():
    if feature_data.get('excepted_correlation', {}).get('FA') == '+':
        print(f"    - {feature_data['name']}")

print("\n  Négatives (-) :")
for key, feature_data in filtered_features.items():
    if feature_data.get('excepted_correlation', {}).get('FA') == '-':
        print(f"    - {feature_data['name']}")

print("\n\nCorrélations attendues pour IFW :")
print("  Positives (+) :")
for key, feature_data in filtered_features.items():
    if feature_data.get('excepted_correlation', {}).get('IFW') == '+':
        print(f"    - {feature_data['name']}")

print("\n  Négatives (-) :")
for key, feature_data in filtered_features.items():
    if feature_data.get('excepted_correlation', {}).get('IFW') == '-':
        print(f"    - {feature_data['name']}")

# Statistiques par groupe et direction de corrélation

In [None]:
# Analyser la direction des corrélations par groupe
import pandas as pd

stats_data = []
for prefix, feats in feature_groups.items():
    for metric in ['FA', 'IFW']:
        positive = 0
        negative = 0
        for feat in feats:
            # Trouver la feature dans le dictionnaire filtré
            for key, feature_data in filtered_features.items():
                if feature_data['name'] == feat:
                    expected = feature_data.get('excepted_correlation', {}).get(metric)
                    if expected == '+':
                        positive += 1
                    elif expected == '-':
                        negative += 1
                    break
        
        stats_data.append({
            'Groupe': prefix,
            'Métrique': metric,
            'Total': len(feats),
            'Corrélation +': positive,
            'Corrélation -': negative
        })

stats_df = pd.DataFrame(stats_data)
print("\nStatistiques par groupe et métrique:")
print(stats_df.to_string(index=False))