In [8]:
import csv
import numpy as np
from scipy.stats import chisquare
from scipy import stats
import pandas as pd

with open('../Data/infos_algopol.csv', 'r') as to_read:
    csv_r = csv.reader(to_read, delimiter = ';')
    age_per_ego = {}
    en_tete = csv_r.next()
    id_age = en_tete.index('ego_age')
    for line in csv_r:
        if line[id_age] == '':
            age_per_ego[line[0]] = None
        else:
            age_per_ego[line[0]] = int(line[id_age])

def job(ego, id_job):
    job_per_id = {
        '1' : 'Agriculteur',
        '2' : 'Artisan',
        '3' : 'Commerçant',
        '4' : 'Chef d\'entreprise de 10 salariés ou plus',
        '5' : 'Profession libérale',
        '6' : 'Cadre de la fonction publique, profession intellectuelle et artistique',
        '7' : 'Cadre d\'entreprise',
        '8' : 'Professions intermédiaires de l\'enseignement, de la santé, de la fonction publique',
        '9' : 'Professions intermédiaires administratives et commerciales des entreprises',
        '10' : 'Technicien',
        '11' : 'Contremaître, agent de maîtrise',
        '12' : 'Employé de la fonction publique',
        '13' : 'Employé administratifs d\'entreprise',
        '14' : 'Employé de commerce',
        '15' : 'Personnel des services directs aux particuliers',
        '16' : 'Ouvrier',
        '17' : 'Chômeur ou inactif',
        '18' : 'Étudiant' if age_per_ego[ego] < 35 
            else 'Retraité' if age_per_ego[ego] > 60 
            else 'Chômeur ou inactif'
    }
    
    return job_per_id[id_job]

def super_job(job):
    super_job_per_job = {
        'Agriculteur' : 'Agriculteur',
        'Artisan' : 'Artisans, commerçants et chefs d’entreprise',
        'Commerçant' : 'Artisans, commerçants et chefs d’entreprise',
        'Chef d\'entreprise de 10 salariés ou plus' : 'Artisans, commerçants et chefs d’entreprise',
        'Profession libérale' : 'Cadres et professions intellectuelles supérieures',
        'Cadre de la fonction publique, profession intellectuelle et artistique': 'Cadres et professions intellectuelles supérieures',
        'Cadre d\'entreprise' : 'Cadres et professions intellectuelles supérieures',
        'Professions intermédiaires de l\'enseignement, de la santé, de la fonction publique'  : 'Professions intermédiaires',
        'Professions intermédiaires administratives et commerciales des entreprises' : 'Professions intermédiaires',
        'Technicien' : 'Professions intermédiaires',
        'Contremaître, agent de maîtrise' : 'Professions intermédiaires',
        'Employé de la fonction publique' : 'Employé',
        'Employé administratifs d\'entreprise' : 'Employé',
        'Employé de commerce' : 'Employé',
        'Personnel des services directs aux particuliers' : 'Employé',
        'Ouvrier' : 'Ouvrier',
        'Chômeur ou inactif' : 'Chômeur ou inactif',
        'Étudiant' : 'Etudiant',
        'Retraité' : 'Retraité'
        
    }
    return super_job_per_job[job]

def gender_to_int(gender):
    if gender in ['male', 'female']:
        return gender
    else:
        return '_'

def slice_age(age):
    age = int(age)
    if age < 25:
        return '18-25'
    if age < 40:
        return '26-40'
    if age < 60:
        return '41-60'
    return '60+'

def restrict_relationship(indic_value):
    if indic_value in ['En couple', 
                       'Fiancé(e)', 
                       'En couple (en attente)']:
        return 'couple'
    if indic_value in ['Marié(e)', 
                       'Marié(e) (en attente)', 
                       'En union civile (en attente)', 
                       'En union civile', 
                       'En concubinage']:
        return 'marié'
    if indic_value in ['Célibataire']:
        return 'célibataire'
    return '_'

def filter_corpus(corpus):
    # fonction qui retourne la liste des égos inclus dans corpus (all, csa ou p5)
    result = []
    with open('../Data/corpus_per_ego.csv', 'r') as to_read:
        csv_r = csv.reader(to_read, delimiter = ';')
        for line in csv_r:
            if line[1] == corpus:
                result.append(line[0])
    return result

num_indics = [
  'fb_female_friends_count',
  'fb_male_friends_count',
  'fb_friends_count',
  'publi_count',
  'comments',  
  'likes'   
]

non_num_indics = [
  'ego_age',
  'fb_relationship',
  'ego_gender',
  'ego_profession', 
]

interesting_ind = num_indics + non_num_indics

rs_indics = [
    'rs_density',
    'rs_modularity',
    'rs_diameter',
    'rs_betweenness',
    'rs_nb_links',
    'rs_size_max_cc',
    'rs_louvain_2',
    'rs_louvain_2_max_cc',
    'rs_louvain_5',
    'rs_louvain_5_max_cc',
    'rs_clustering_coeff',
]

indics = num_indics + non_num_indics + rs_indics + ['new_classe']
name_per_cluster = ['dense', 'fork', 'star-like', 'holes', 'bowties']
nb_to_name = {
    'all' : {424 : 'dense', 861 : 'fork', 649 : 'star-like', 667 : 'holes', 543 : 'bowties'},
    'csa' : {62 : 'dense', 143 : 'fork', 106 : 'star-like', 89 : 'holes', 117 : 'bowties'}
}

In [33]:
#Construction dictionnaire de données

corpus = 'csa'
list_egos = filter_corpus(corpus)

size_per_ego = {}
with open('../Data/size.csv', 'r') as to_read:
    csv_r = csv.reader(to_read, delimiter = ';')
    csv_r.next()
    for line in csv_r:
        ego = line[0]
        if not ego in list_egos:
            continue
        size_per_ego[ego] = int(line[1])
        
classe_per_ego = {}

for classe in range(5):
    with open('../Results/facebook_k5/Typo_5/egos_classe_%s.csv' % classe, 'r') as to_read:
        csv_r = csv.reader(to_read, delimiter = ';')
        csv_r.next()
        for line in csv_r:
            ego = line[0]
            if not ego in list_egos:
                continue
            classe_per_ego[ego] = classe 

algopol_per_ego = {}
with open('../Data/infos_algopol.csv', 'r') as to_read:
    csv_r = csv.reader(to_read, delimiter = ';')
    en_tete = csv_r.next()[1:]
    for line in csv_r:
        ego, indics = line[0], line[1:]
        if not ego in list_egos:
            continue
        algopol_per_ego[ego] = {}
        for indic in num_indics:
            if indic in ['publi_count', 'comments','likes']:
                continue
            if indics[en_tete.index(indic)] in ['', '_']:
                continue
            if indic in ['fb_male_friends_count', 'fb_female_friends_count']:
                ind_indic = en_tete.index(indic)
                nb_friends = float(indics[en_tete.index('fb_friends_count')])
                algopol_per_ego[ego][indic] = float(indics[ind_indic]) / nb_friends if not nb_friends == 0 else 0
                continue
            algopol_per_ego[ego][indic] = float(indics[en_tete.index(indic)])
        for indic in non_num_indics:
            indic_value = indics[en_tete.index(indic)]
            if indic_value in ['', '_']:
                continue
            if indic == 'ego_profession':
                indic_value = super_job(job(ego, indic_value.decode('utf8')))
            elif indic == 'ego_gender':
                indic_value = gender_to_int(indic_value)
            elif indic == 'ego_age':
                indic_value = slice_age(indic_value)
            elif indic == 'fb_relationship':
                indic_value = restrict_relationship(indic_value)
            if indic_value in ['', '_']:
                continue
            algopol_per_ego[ego][indic] = indic_value
        for indic in rs_indics:
            if indics[en_tete.index(indic)] in ['', '_', 'undetermined']:
                continue
            algopol_per_ego[ego][indic] = float(indics[en_tete.index(indic)])
            
with open('../Data/pubs_per_ego.csv', 'r') as to_read:
    csv_r = csv.reader(to_read, delimiter = ';')
    csv_r.next()
    for line in csv_r:
        nb_pub, comments, likes = [int(x) for x in line[1:]]
        ego = line[0].split('.')[0]
        if not ego in algopol_per_ego:
            continue
        algopol_per_ego[ego]['publi_count'] = int(nb_pub)
        if nb_pub == 0:
            algopol_per_ego[ego]['comments'] = 0
            algopol_per_ego[ego]['likes'] = 0
        else:
            algopol_per_ego[ego]['comments'] = int(comments)/float(nb_pub)
            algopol_per_ego[ego]['likes'] = int(likes)/float(nb_pub)
            
with open('../Data/classesnew.csv', 'r') as to_read:
    csv_r = csv.reader(to_read, delimiter = ';')
    for line in csv_r:
        if line[0] in algopol_per_ego:
            algopol_per_ego[line[0]]['new_classe'] = line[1]
            
###On passe les num indics en labelisé
for indic in num_indics:
    cut = pd.qcut([float(algopol_per_ego[ego][indic]) for ego in classe_per_ego], 6)
    i = 0
    for ego in classe_per_ego:
        split_cut = cut[i].split(',')
        algopol_per_ego[ego][indic] = '%s-%s' % (split_cut[0][1:], split_cut[1][1:-1])
        i += 1

In [34]:
indic_results = {indic : list(set([algopol_per_ego[ego][indic]
                   for ego in classe_per_ego if indic in algopol_per_ego[ego]]))
                    for indic in interesting_ind}

list_egos = filter_corpus('csa')
print len(list_egos)

with open('../Data/indic_stats_csa.csv', 'w') as to_write:
    csv_w = csv.writer(to_write, delimiter = ';')
    new_entete = ['ego', 'classe']
    for ind in interesting_ind:
        new_entete.extend(['%s_%s' % (ind, res) for res in indic_results[ind]])
    csv_w.writerow(new_entete)
    for ego in list_egos:
        new_line = [ego, classe_per_ego[ego]]
        try:
            for ind in interesting_ind:
                for res in indic_results[ind]:
                    if algopol_per_ego[ego][ind] == res:
                        new_line.append(1)
                    else:
                        new_line.append(0)
        except:
            continue
        csv_w.writerow(new_line)

517


In [51]:
###Test Chi² avec Valérie

csv.QUOTE_ALL = True

def compute_chi_2(indic):
    indic_all = np.array([algopol_per_ego[ego][indic]
                   for ego in classe_per_ego if indic in algopol_per_ego[ego]])
    
    indic_results = list(set([algopol_per_ego[ego][indic]
                   for ego in classe_per_ego if indic in algopol_per_ego[ego]]))
    
    classe = np.array([classe_per_ego[ego] for ego in classe_per_ego if indic in algopol_per_ego[ego]])

    classe_temp = []
    nb_per_cluster = {cluster : 0 for cluster in range(5)}
    for ego in classe_per_ego:
        nb_per_cluster[classe_per_ego[ego]] += 1
        if indic in algopol_per_ego[ego]:
            classe_temp.append(classe_per_ego[ego])
       
    name_of_cluster_per_id = {i : nb_to_name[corpus][nb_per_cluster[i]] for i in range(5)}

    egos = pd.DataFrame({
        'indic' : indic_all,
        'classe' : classe            
                    })
    
    egos_tab = pd.crosstab(egos.indic, egos.classe, margins = True)
    egos_tab.columns = nb_to_name[corpus].values() + ['rows_total']
    egos_tab.index = indic_results + ['col_total']
    
    observed = egos_tab.ix[0:len(indic_results),0:5]
    
    expected =  np.outer(egos_tab["rows_total"][0:len(indic_results)],
                     egos_tab.ix["col_total"][0:5]) / float(egos_tab.iloc[len(indic_results)][5])

    expected = pd.DataFrame(expected)
    expected.columns = nb_to_name[corpus].values()
    expected.index = indic_results
    exp_dict = expected.to_dict()
    
    chi_squared_stat = (((observed-expected)**2)/expected).sum().sum()
    p_value = 1 - stats.chi2.cdf(x=chi_squared_stat,  # Find the p-value
                                 df=1)
    
    with open('../Results/chi_square_%s.csv' % corpus, 'a') as to_write:
        csv_w = csv.writer(to_write, delimiter = ';')
        csv_w.writerow([indic])
        
        obs_dict = observed.to_dict()
        csv_w.writerow(['']+name_per_cluster)
        for result in indic_results:
            csv_w.writerow(
                [result] + 
                [round(obs_dict[cluster][result] / float(sum(obs_dict[cluster].values())), 2) \
                     for cluster in name_per_cluster]
            )
            csv_w.writerow(
                [''] + 
                [round((obs_dict[cluster][result] - exp_dict[cluster][result])**2 \
                       / exp_dict[cluster][result], 2) \
                     for cluster in name_per_cluster]
            )
            
        csv_w.writerow(['chi² : %s' % chi_squared_stat, 'p value : %s ' % p_value])
        csv_w.writerow([])
        
touch = open('../Results/chi_square_%s.csv' % corpus, 'w')
touch.close()

for indic in non_num_indics:
    compute_chi_2(indic)
    
for indic in num_indics:
    compute_chi_2(indic)

In [33]:
### CORPUS CSA

csv.QUOTE_ALL = True

typo = 5
main_dir = 'facebook_k5'
corpus = 'all'
list_egos = filter_corpus(corpus)

infos = []
list_results = {indic : [] for indic in non_num_indics+num_indics+['new_classe']}
list_results['ego_age'] = ['18-25','26-40','41-60','60+']
list_results['fb_relationship'] = ['Célibataire', 'En couple', 'Marié(e)']
    
size = []

for k in range(typo):
    infos.append({indic : {} for indic in non_num_indics+['new_classe']})
    for age in ['18-25','26-40','41-60','60+']:
        infos[k]['ego_age'][age] = 0
    with open('../Results/%s/Typo_%s/egos_classe_%s.csv' % (main_dir, typo, k), 'r') as to_read:
        csv_r = csv.reader(to_read, delimiter = ';')
        en_tete = csv_r.next()
        this_size = 0
        for line in csv_r:
            ego = line[0]
            if not ego in list_egos:
                continue
            this_size += 1
            for indic in non_num_indics+['new_classe']:
                if not indic in algopol_per_ego[ego]:
                    continue
                if not algopol_per_ego[ego][indic] in infos[k][indic]:
                    infos[k][indic][algopol_per_ego[ego][indic]] = 0
                if indic == 'ego_age':
                    infos[k][indic][slice_age(algopol_per_ego[ego][indic])] += 1
                    continue
                if not algopol_per_ego[ego][indic] in list_results[indic]:
                    if indic == 'fb_relationship':
                        continue
                    list_results[indic].append(algopol_per_ego[ego][indic])
                infos[k][indic][algopol_per_ego[ego][indic]] += 1

            for indic in num_indics:
                if not indic in infos[k]:
                    infos[k][indic] = []
                if not indic in algopol_per_ego[ego]:
                    continue
                infos[k][indic].append(algopol_per_ego[ego][indic])
        size.append(this_size)
        


with open('../Results/%s/Typo_%s/clusters_vs_facebook_%s.csv' % (main_dir, typo, corpus), 'w') as to_write:
    csv_w = csv.writer(to_write, delimiter = ';')
    csv_w.writerow([''] + [name_per_cluster[x] for x in range(typo)])
    csv_w.writerow([''] + size)
    for indic in non_num_indics + ['new_classe']:
        csv_w.writerow([indic])
        sum_res = {group : sum([infos[group][indic].get(result, 0) 
                       for result in list_results[indic]]) for group in range(typo)}
        for result in list_results[indic]:
            csv_w.writerow([result] + 
                           [round(100*round(infos[group][indic].get(result, 0) / float(sum_res[group]), 2))
                            for group in range(typo)])
        csv_w.writerow([])
    
    for indic in num_indics:
        csv_w.writerow([indic] +
                       [round(np.mean(infos[group][indic]), 2) 
                        for group in range(typo)])

ValueError: invalid literal for int() with base 10: '18-25'

In [1]:
###Regression logistique

from sklearn import linear_model

logit_reg = linear_model.LogisticRegression()

classe_per_ego = {}

for classe in range(5):
    with open('../Results/facebook_k5/Typo_5/egos_classe_%s.csv' % classe, 'r') as to_read:
        csv_r = csv.reader(to_read, delimiter = ';')
        csv_r.next()
        for line in csv_r:
            classe_per_ego[line[0]] = classe 
            
for ego in algopol_per_ego:
    relationship = algopol_per_ego[ego].get('fb_relationship', '')
    new_rel = 0
    if relationship == 'Marié(e)':
        new_rel = 1
    if relationship == 'En couple':
        new_rel = 0.5
    if relationship == 'Célibataire':
        new_rel = -1
    algopol_per_ego[ego]['fb_relationship'] = new_rel
    
list_indics_soc_dem = ['comments',
  'ego_age',
  'ego_gender',
  'ego_profession',
  'fb_female_friends_count',
  'fb_friends_count',
  'fb_male_friends_count',
  'fb_relationship',
  'likes',
  'publi_count']

all_ages = [float(algopol_per_ego[ego]['ego_age']) for ego in algopol_per_ego if 'ego_age' in algopol_per_ego[ego]]

csv_w = csv.writer(to_write, delimiter = ';')
csv_w.writerow(['classe'] + list_indics_soc_dem)

X, Y = [], []
for ego in classe_per_ego:
    temp = []
    for indic in list_indics_soc_dem:
        if indic == 'ego_age':
            if not indic in algopol_per_ego[ego]:
                temp.append(average_age)
            else:
                temp.append(algopol_per_ego[ego][indic])
        elif indic == 'ego_gender':
            if not indic in algopol_per_ego[ego]:
                temp.append(0)
            else:
                temp.append(algopol_per_ego[ego][indic])
        elif indic == 'ego_profession':
            if not indic in algopol_per_ego[ego]:
                temp.append(0)
            else:
                temp.append(algopol_per_ego[ego][indic])
        else:
            temp.append(float(algopol_per_ego[ego][indic]))

    csv_w.writerow([classe_per_ego[ego]] + temp)

    X.append(temp)
    Y.append(classe_per_ego[ego])

logit_reg.fit(X, Y)

NameError: name 'csv' is not defined

In [66]:
for classe in range(5):
    print classe
    i = 0
    for indic in list_indics_soc_dem:
        print '%s : %s' % (indic, round(logit_reg.coef_[classe][i], 2))
        i += 1
    print

0
comments : 0.0
ego_age : -0.03
ego_gender : -0.06
ego_profession : 0.01
fb_female_friends_count : -0.12
fb_friends_count : -0.01
fb_male_friends_count : -0.1
fb_relationship : 0.0
likes : -0.01
publi_count : -0.0

1
comments : 0.0
ego_age : 0.01
ego_gender : 0.15
ego_profession : -0.01
fb_female_friends_count : -0.5
fb_friends_count : 0.0
fb_male_friends_count : -0.51
fb_relationship : 0.0
likes : -0.01
publi_count : 0.0

2
comments : -0.02
ego_age : 0.02
ego_gender : -0.11
ego_profession : -0.02
fb_female_friends_count : 0.01
fb_friends_count : -0.0
fb_male_friends_count : -1.25
fb_relationship : 0.0
likes : 0.0
publi_count : 0.0

3
comments : 0.01
ego_age : -0.01
ego_gender : 0.02
ego_profession : 0.02
fb_female_friends_count : -1.18
fb_friends_count : 0.01
fb_male_friends_count : -0.12
fb_relationship : 0.0
likes : 0.05
publi_count : -0.0

4
comments : -0.03
ego_age : -0.01
ego_gender : -0.05
ego_profession : -0.01
fb_female_friends_count : -0.1
fb_friends_count : -0.01
fb_male_fr

In [43]:
typo = 5
main_dir = 'facebook_k5'

infos = []

list_results = {indic : [] for indic in non_num_indics+num_indics+['new_classe']}
size = []

for k in range(typo):
    infos.append({indic : {} for indic in non_num_indics+['new_classe']})
    with open('../Results/%s/Typo_%s/egos_classe_%s.csv' % (main_dir, typo, k), 'r') as to_read:
    #with open('../Results/%s/egos_classe_%s.csv' % (main_dir, k), 'r') as to_read:
        csv_r = csv.reader(to_read, delimiter = ';')
        en_tete = csv_r.next()
        this_size = 0
        for line in csv_r:
            this_size += 1
            ego = line[0]
            for indic in non_num_indics+['new_classe']:
                if not indic in algopol_per_ego[ego]:
                    continue
                if not algopol_per_ego[ego][indic] in infos[k][indic]:
                    infos[k][indic][algopol_per_ego[ego][indic]] = 0
                if not algopol_per_ego[ego][indic] in list_results[indic]:
                    list_results[indic].append(algopol_per_ego[ego][indic])
                infos[k][indic][algopol_per_ego[ego][indic]] += 1
                
            for indic in num_indics:
                if not indic in infos[k]:
                    infos[k][indic] = []
                if not indic in algopol_per_ego[ego]:
                    continue
                infos[k][indic].append(algopol_per_ego[ego][indic])
        size.append(this_size)
                
with open('../Results/%s/Typo_%s/clusters_vs_facebook.csv' % (main_dir, typo), 'w') as to_write:
#with open('../Results/%s/clusters_vs_facebook.csv' % (main_dir), 'w') as to_write:
    csv_w = csv.writer(to_write, delimiter = ';')
    csv_w.writerow([''] + range(typo))
    csv_w.writerow([''] + size)
    for indic in non_num_indics + ['new_classe']:
        csv_w.writerow([indic])
        sum_res = {group : sum([infos[group][indic].get(result, 0) 
                       for result in list_results[indic]]) for group in range(typo)}
        for result in list_results[indic]:
            csv_w.writerow([result] + 
                           [round(100*round(infos[group][indic].get(result, 0) / float(sum_res[group]), 2))
                            for group in range(typo)])

    for indic in num_indics:
        csv_w.writerow([indic] +
                       [round(np.mean(infos[group][indic]), 2) 
                        for group in range(typo)])       

In [51]:
typo = 5
main_dir = 'facebook_k5'

for size_min_max in [(0,50), (51, 100), (101, 150)]:
    infos = []
    list_results = {indic : [] for indic in non_num_indics+num_indics+['new_classe']}
    size = []
    for k in range(typo):
        infos.append({indic : {} for indic in non_num_indics+['new_classe']})
        with open('../Results/%s/Typo_%s/egos_classe_%s.csv' % (main_dir, typo, k), 'r') as to_read:
        #with open('../Results/%s/egos_classe_%s.csv' % (main_dir, k), 'r') as to_read:
            csv_r = csv.reader(to_read, delimiter = ';')
            en_tete = csv_r.next()
            this_size = 0
            for line in csv_r:
                ego = line[0]
                size_this_ego = size_per_ego[ego]
                if size_this_ego < size_min_max[0] or size_this_ego > size_min_max[1]:
                    continue
                this_size += 1
                for indic in non_num_indics+['new_classe']:
                    if not indic in algopol_per_ego[ego]:
                        continue
                    if not algopol_per_ego[ego][indic] in infos[k][indic]:
                        infos[k][indic][algopol_per_ego[ego][indic]] = 0
                    if not algopol_per_ego[ego][indic] in list_results[indic]:
                        list_results[indic].append(algopol_per_ego[ego][indic])
                    infos[k][indic][algopol_per_ego[ego][indic]] += 1

                for indic in num_indics:
                    if not indic in infos[k]:
                        infos[k][indic] = []
                    if not indic in algopol_per_ego[ego]:
                        continue
                    infos[k][indic].append(algopol_per_ego[ego][indic])
            size.append(this_size)

    with open('../Results/%s/Typo_%s/clusters_vs_facebook_%s.csv' % (main_dir, typo, size_min_max), 'w') as to_write:
    #with open('../Results/%s/clusters_vs_facebook.csv' % (main_dir), 'w') as to_write:
        csv_w = csv.writer(to_write, delimiter = ';')
        csv_w.writerow([''] + range(typo))
        csv_w.writerow([''] + size)
        for indic in non_num_indics + ['new_classe']:
            csv_w.writerow([indic])
            sum_res = {group : sum([infos[group][indic].get(result, 0) 
                           for result in list_results[indic]]) for group in range(typo)}
            for result in list_results[indic]:
                csv_w.writerow([result] + 
                               [round(100*round(infos[group][indic].get(result, 0) / float(sum_res[group]), 2))
                                for group in range(typo)])

        for indic in num_indics:
            csv_w.writerow([indic] +
                           [round(np.mean(infos[group][indic]), 2) 
                            for group in range(typo)])

In [28]:
typo = 5
main_dir = 'facebook_k5'

with open('../Results/%s/Typo_%s/algopol_vs_clusters.csv' % (main_dir, typo), 'w') as to_write:
    csv_w = csv.writer(to_write, delimiter = ';')
    csv_w.writerow(['cluster', 'size', ''] + num_indics)

for k in range(typo):
    size= 0 
    infos = {indic : [] for indic in num_indics}
    with open('../Results/facebook_k5/Typo_%s/egos_classe_%s.csv' % (typo, k), 'r') as to_read:
        csv_r = csv.reader(to_read, delimiter = ';')
        en_tete = csv_r.next()
        for line in csv_r:
            ego = line[0]
            for indic in num_indics:
                if not indic in algopol_per_ego[ego]:
                    continue
                infos[indic].append(algopol_per_ego[ego][indic])
            size += 1        
                
        with open('../Results/%s/Typo_%s/algopol_vs_clusters.csv' % (main_dir, typo), 'a') as to_write:
            csv_w = csv.writer(to_write, delimiter = ';')
            csv_w.writerow([k, size,''] + [round(np.mean(infos[indic]), 2) for indic in num_indics])

In [16]:
typo = 5
main_dir = 'facebook_k5'

with open('../Results/%s/Typo_%s/rs_vs_clusters.csv' % (main_dir, typo), 'w') as to_write:
    csv_w = csv.writer(to_write, delimiter = ';')
    csv_w.writerow(['cluster', 'nb'] + rs_indics)

for k in range(typo):
    infos = {indic : [] for indic in rs_indics}
    nb = 0
    with open('../Results/facebook_k5/Typo_%s/egos_classe_%s.csv' % (typo, k), 'r') as to_read:
        csv_r = csv.reader(to_read, delimiter = ';')
        en_tete = csv_r.next()
        for line in csv_r:
            ego = line[0]
            nb += 1
            for indic in rs_indics:
                if not indic in algopol_per_ego[ego]:
                    continue
                infos[indic].append(algopol_per_ego[ego][indic])
         
        with open('../Results/%s/Typo_%s/rs_vs_clusters.csv' % (main_dir, typo), 'a') as to_write:
            csv_w = csv.writer(to_write, delimiter = ';')
            try:
                csv_w.writerow([k, nb] + [round(np.mean(infos[indic]), 2) for indic in rs_indics])
            except:
                print ego
                print [algopol_per_ego[ego].get(indic, '') for indic in rs_indics]