In [None]:
! pip install -r requirements.txt

#Compte Rendu - Analyse 60 Millions de consommateurs

## Partie 1 : Analyse d’un classement de couches-culottes pour enfants

Code Source

In [None]:
import csv
import numpy as np
from optlang import Model, Variable, Constraint, Objective

In [None]:
def readFileCSVData(file):
    """
    Lit un fichier CSV et retourne YYYY
    :param file:
    :return: ponderation : list
             data : list
             result : list
    """
    ponderation = []
    data = []
    result = []  #derniere colonne
    with open(file, newline='', encoding='utf-8') as f:
        reader = csv.reader(f, delimiter=';')
        for row in reader:
            if row[0] == '':  #lecture des ponderations
                for i in range(1, len(row) - 1):
                    ponderation.append(float(row[i]))
            else:  #lecture des donnees et des resultats
                #donnees contient les notes et result la moyenne
                temp = []
                for i in range(1, len(row) - 1):
                    temp.append(row[i])
                #dicodata={row[0]:temp}
                #dicoresult={row[0]:float(row[len(row)-1])}
                data.append([row[0], temp])
                result.append([row[0], float(row[len(row) - 1])])

    return ponderation, data, result


In [None]:
def readFileCSVInterval(file):
    intervalle = {}
    with open(file, newline='', encoding='utf-8') as f:
        reader = csv.reader(f, delimiter=';')
        for row in reader:
            intervalle[row[0]] = [float(row[1]), float(row[2])]
    return intervalle


In [None]:
def createModel2(ponderation, data, result, intervalle, fixed):
    """
    fixed = True ===> fixer les variables f (question 2.1)
    fixed = False ===> ne pas fixer les variables f (question 2.2)
    """
    nbelements = len(data)
    nbcriteres = len(ponderation)
    #creation des variables
    U = []
    f = []
    for i in range(nbelements):
        x = Variable("f" + str(i + 1), lb=0)
        f.append(x)
        x = []
        for j in range(nbcriteres):
            x1 = Variable("U" + str(i + 1) + str(j + 1), lb=0)
            x.append(x1)
        U.append(x)

    #creation des contraintes (1) a (15)
    C = []
    for i in range(nbelements):
        calcul = 0
        for j in range(nbcriteres):
            calcul += ponderation[j] * U[i][j]
        c = Constraint(calcul - f[i], ub=0, lb=0)
        C.append(c)

    #creation des contraintes (16) a (19)

    for i in range(len(result) - 1):
        if result[i][1] == result[i + 1][1]:
            c = Constraint(f[i] - f[i + 1], ub=0, lb=0)
            C.append(c)
        else:
            c = Constraint(f[i] - f[i + 1] - 0.1, lb=0)
            C.append(c)

    #creation des contraintes (20) a (29)
    for i in range(nbelements):
        for j in range(nbcriteres):
            c = Constraint(U[i][j], ub=intervalle.get(data[i][1][j])[1], lb=intervalle.get(data[i][1][j])[0])
            C.append(c)

    #Fixation des variables f
    if fixed:
        for i in range(nbelements):
            c = Constraint(f[i], lb=result[i][1], ub=result[i][1])
            C.append(c)
    #

    obj = Objective(f[0], direction='max')
    model = Model(name='Modele')
    model.objective = obj
    model.add(C)
    return model

In [None]:
def createModel3(ponderation, data, result, intervalle):
    nbelements = len(data)
    nbcriteres = len(ponderation)
    #creation des variables
    U = []
    f = []
    for i in range(nbelements):
        x = Variable("f" + str(i + 1), lb=0)
        f.append(x)
        x = []
        for j in range(nbcriteres):
            x1 = Variable("U" + str(i + 1) + str(j + 1), lb=0)
            x.append(x1)
        U.append(x)

    #creation des contraintes (1) a (15)
    C = []
    for i in range(nbelements):
        calcul = 0
        for j in range(nbcriteres):
            calcul += ponderation[j] * U[i][j]
        c = Constraint(calcul - f[i], ub=0, lb=0)
        C.append(c)

    #creation des contraintes (16) a (19)
    for i in range(len(result) - 1):

        if i == 3 or i == 8:
            continue

        elif result[i][1] == result[i + 1][1]:
            c = Constraint(f[i] - f[i + 1], ub=0, lb=0)
            C.append(c)
        else:
            c = Constraint(f[i] - f[i + 1] - 0.1, lb=0)
            C.append(c)

    #creation des contraintes (20) a (29)
    for i in range(nbelements):
        for j in range(nbcriteres):
            if i == 1 and j == 2:
                c = Constraint(U[i][j], ub=intervalle.get(data[i][1][j])[1])
                C.append(c)
            else:
                c = Constraint(U[i][j], ub=intervalle.get(data[i][1][j])[1], lb=intervalle.get(data[i][1][j])[0])
                C.append(c)

    obj = Objective(f[0], direction='max')
    model = Model(name='Modele')
    model.objective = obj

    model.add(C)
    return model

In [None]:
def createModel4(ponderation, data, result, intervalle):
    nbelements = len(data)
    nbcriteres = len(ponderation)
    #creation des variables
    U = []
    f = []
    for i in range(nbelements):
        x = Variable("f" + str(i + 1), lb=0)
        f.append(x)
        x = []
        for j in range(nbcriteres):
            x1 = Variable("U" + str(i + 1) + str(j + 1), lb=0)
            x.append(x1)
        U.append(x)

    #creation des contraintes (1) a (15)
    C = []
    for i in range(nbelements):
        calcul = 0
        for j in range(nbcriteres):
            calcul += ponderation[j] * U[i][j]
        c = Constraint(calcul - f[i], ub=0, lb=0)
        C.append(c)

    #creation des contraintes (20) a (29)
    for i in range(nbelements):
        for j in range(nbcriteres):
            c = Constraint(U[i][j], ub=intervalle.get(data[i][1][j])[1], lb=intervalle.get(data[i][1][j])[0])
            C.append(c)

    obj = Objective(f[0], direction='max')
    model = Model(name='Modele')
    model.objective = obj
    #for u in U:
    #model.add(u)
    #model.add(f)
    model.add(C)
    return model

In [None]:
def CheckAdditiveModel(fileData, fileInt, question):
    ponderation, data, result = readFileCSVData(fileData)
    intervalle = readFileCSVInterval(fileInt)
    if question == "2.1":
        model = createModel2(ponderation, data, result, intervalle, False)
    elif question == "2.2":
        model = createModel2(ponderation, data, result, intervalle, True)
    elif question == "3":
        model = createModel3(ponderation, data, result, intervalle)
    elif question == "4":
        model = createModel4(ponderation, data, result, intervalle)
    else:
        print("Existe pas")
        return

    status = model.optimize()
    print("status:", model.status)
    print("objective value:", model.objective.value)
    print("----------")
    for var_name, var in model.variables.items():
        if var_name.startswith("f"):
            print(var_name, "=", var.primal)


Question
1: CheckAdditiveModel

In [None]:
print("Question 2.1")
CheckAdditiveModel('criteres.csv', 'intervalles.csv', "2.1")

Question 2.2

In [None]:
print("Question 2.2")
CheckAdditiveModel('criteres.csv', 'intervalles.csv', "2.2")

Question 3

In [None]:
print("Question 4")
CheckAdditiveModel('criteres.csv', 'intervalles.csv', "4")

Méthode 2 : ELECTRE TRI

Question 5

In [None]:
def concordance_partiel(type, h, bi, j):
    """

    :param h: 1 Couche-culotte (donc liste)
    :param bi: profil (frontière qui YYYY)
    :param j: critère
    :return: 1 si H est au-moins aussi bon que bi sur le critère j, 0 sinon
    """
    print('concord partielle')
    print("h[j] : ", h[j])
    print("bi[j] : ", bi[j])

    if (type == 'max'):
        if h[j] >= bi[j]:
            print("h[j] >= bi[j]")
            return 1
        else:
            print("h[j] < bi[j]")
            return 0
    elif (type == 'min'):
        if h[j] <= bi[j]:
            return 1
        else:
            return 0
    else:
        #print("Calcul concordance partielle impossible car type inconnu : ", type)
        return -1

In [None]:
def concordanceGlobal(h, bi, poids, type):
    """
    :param type: vecteur contenant ['min'/'max'] pour chaque critere
    :param h: Couche culotte (liste)
    :param bi: profil (frontière qui YYYY)
    :param poids: vecteur contenant poids pour chaque critere
    :return: indice de concordance global, -1 si erreur
    """
    print(" ~~~~~~~ ")
    print('concordanceGlobal')
    numerateur = 0.0
    denom = 0.0
    # Pour chaque critère j
    #print('poids',poids)
    #print('len(h)-1 = ',len(h))
    for j in range(1, len(h)):
        print("critère ", j)
        numerateur += poids[j - 1] * concordance_partiel(type[j - 1], h, bi, j)  # len(poids) = len(h)-2
        #TODO : si c'est correct, passer tout en list comprehension pour lisibilite
        print('numerateur', numerateur)
        denom += poids[j - 1]
        print('denom', denom)
        #print('----------------')
        print('Ctemp(h,bi)', numerateur / denom)
        print("")
    print('C(h,bi)', numerateur / denom)
    print(" ~~~~~~~ ")
    return numerateur / denom

In [None]:
def Surclasse(seuilMajorite, h, bi, poids, type):
    """

    :param type: vecteur contenant ['min'/'max'] pour chaque critere
    :param seuilMajorite:
    :param h:
    :param bi:
    :param poids:
    :return:
    """
    print("h ", h)
    print("bi ", bi)
    print("Question : h surclasse t'il bi ?")
    if concordanceGlobal(h, bi, poids, type) >= seuilMajorite:
        print('Oui, H surclasse bi')
        print("")
        return True
    else:
        print('Non, H ne surclasse pas bi')
        print("")
        return False

In [None]:
def AffectationOptimiste(h, classement, poids, type, seuil):
    """

    :param h:
    :param classement:
    :param poids:
    :param type:
    :param seuil:
    :return:
    """
    profil = len(classement) - 1  # OPTIMISATION : commencer à len -2 car len-1 est forcement surclassé
    print('PROFIL ', profil)
    #print('H sur profil :', Surclasse(seuil, h, classement[profil], poids, type))
    #print('profil sur H : ', Surclasse(seuil, classement[profil], h, poids, type))
    print("###############")
    while (not (Surclasse(seuil, classement[profil], h, poids, type)) or (
    Surclasse(seuil, h, classement[profil], poids, type))):  # anciennement 'and'
        print("Bilan : On passe au profil supérieur")
        print("###############")
        profil -= 1
    return profil 

In [None]:
def AffectationPessimiste(h, classement, poids, type, seuil):
    """

    :param h:
    :param classement:
    :param poids:
    :param type:
    :param seuil:
    :return:
    """
    profil = 0
    while not (Surclasse(seuil, h, classement[profil], poids, type)):
        profil += 1
    return profil

In [None]:
def EvalOptimiste(lesCouches, classement, poids, type, seuil):
    """

    :param lesCouches:
    :param classement:
    :param poids:
    :param type:
    :param seuil:
    :return:
    """
    liste = []
    dict = {}
    # pour chaque element de la matrice (une couche culotte), en partant de l'indice max (matrice profils[i][0]), on....
    print("Les couches", lesCouches)
    for couche in range(0, len(lesCouches)):  # len(lesCouches)+1 ?
        dict["Couche " + str(lesCouches[couche][0])] = categorie[
            AffectationOptimiste(lesCouches[couche], classement, poids, type, seuil)]
    return dict

In [None]:
def EvalPessimiste(lesCouches, classement, poids, type, seuil):
    """

    :param lesCouches:
    :param classement:
    :param poids:
    :param type:
    :param seuil:
    :return:
    """
    liste = []
    dict = {}
    for couche in range(0, len(lesCouches)):  # len(lesCouches)+1 ?
        # liste.append(
        #     ["Couche " + str(lesCouches[couche][0]),classement[AffectationPessimiste(lesCouches[couche], classement, poids, type, seuil)][0]]
        # )
        #print("Couche " + str(lesCouches[couche][0]) + " :",classement[AffectationPessimiste(lesCouches[couche], classement, poids, type, seuil)])
        dict["Couche " + str(lesCouches[couche][0])] = categorie[
            AffectationPessimiste(lesCouches[couche], classement, poids, type, seuil) - 1]  #return liste
    #return liste
    return dict


In [None]:
import pandas as pd

poids = [3 / 5, 2 / 5]
types = ['max', 'max']
SEUIL = 0.55
mat = []
matrice_profils = [
    ['Profil 6 : Frontiere Le meilleur, impossible', 100, 100],
    ['Profil 5 : Dans les premiers', 3, 3],
    ['Profil 4 : Moyen +', 2, 2],
    ['Profil 3 : Moyen -', 1, 1],
    ['Profil 2 : Dans les derniers', -1, -1],
    ['Profil 1 : Frontiere Le pire,impossible', -100, -100]
]

categorie = ['Très bon', 'Bon', 'Acceptable', 'Insuffisant', 'Inacceptable']


In [None]:
with open('mat.csv', 'r') as f:
    reader = csv.reader(f, delimiter=';')
    # on parse le fichier, en précisant les types pour chaque colonne
    mat = [[row[0], int(row[1]), int(row[2])] for row in reader]

In [None]:
print("optimiste")
op = EvalOptimiste(mat, matrice_profils, poids, types, 0.55)
print("pessimiste")
pe = EvalPessimiste(mat, matrice_profils, poids, types, 0.55)
final_results = {key: [op[key], pe[key]] for key in op}
final_results
df = pd.DataFrame(final_results)
df = pd.DataFrame.from_records(final_results).T
df.columns = ['Electre Optimiste', 'Electre Pessimiste']

In [None]:
print("optimiste")
op = EvalOptimiste(mat, matrice_profils, poids, types, 0.75)
print("pessimiste")
pe = EvalPessimiste(mat, matrice_profils, poids, types, 0.75)
final_results = {key: [op[key], pe[key]] for key in op}
final_results
df = pd.DataFrame(final_results)
df = pd.DataFrame.from_records(final_results).T
df.columns = ['Electre Optimiste', 'Electre Pessimiste']

Question 7

In [None]:
# TODO

Question 8

In [None]:
class decisionnode:
    def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
        self.col = col  # column index of criteria being tested
        self.value = value  # vlaue necessary to get a true result
        self.results = results  # dict of results for a branch, None for everything except endpoints
        self.tb = tb  # true decision nodes
        self.fb = fb  # false decision nodes

# Divides a set on a specific column. Can handle numeric or nominal values

def divideset(rows, column, value):
    # Make a function that tells us if a row is in the first group
    # (true) or the second group (false)
    split_function = None
    # for numerical values
    if isinstance(value, int) or isinstance(value, float):
        split_function = lambda row: row[column] >= value
    # for nominal values
    else:
        split_function = lambda row: row[column] == value

    # Divide the rows into two sets and return them
    set1 = [row for row in rows if split_function(row)]  # if split_function(row)
    set2 = [row for row in rows if not split_function(row)]
    return (set1, set2)

# Create counts of possible results (last column of each row is the result)
def uniquecounts(rows):
    results = {}
    for row in rows:
        # The result is the last column
        r = row[len(row) - 1]
        if r not in results: results[r] = 0
        results[r] += 1
    return results

from collections import defaultdict

def uniquecounts_dd(rows):
    results = defaultdict(lambda: 0)
    for row in rows:
        r = row[len(row) - 1]
        results[r] += 1
    return dict(results)

# Entropy is the sum of p(x)log(p(x)) across all the different possible results
def entropy(rows):
    from math import log
    log2 = lambda x: log(x) / log(2)
    results = uniquecounts(rows)
    # Now calculate the entropy
    ent = 0.0
    for r in results.keys():
        # current probability of class
        p = float(results[r]) / len(rows)
        ent = ent - p * log2(p)
    return ent

def buildtree(rows, scorefun=entropy):
    if len(rows) == 0: return decisionnode()
    current_score = scorefun(rows)

    best_gain = 0.0
    best_criteria = None
    best_sets = None

    column_count = len(rows[0]) - 1  # last column is result
    for col in range(0, column_count):
        # find different values in this column
        column_values = set([row[col] for row in rows])

        # for each possible value, try to divide on that value
        for value in column_values:
            set1, set2 = divideset(rows, col, value)

            # Information gain
            p = float(len(set1)) / len(rows)
            gain = current_score - p * scorefun(set1) - (1 - p) * scorefun(set2)
            if gain > best_gain and len(set1) > 0 and len(set2) > 0:
                best_gain = gain
                best_criteria = (col, value)
                best_sets = (set1, set2)

    if best_gain > 0:
        trueBranch = buildtree(best_sets[0])
        falseBranch = buildtree(best_sets[1])
        return decisionnode(col=best_criteria[0], value=best_criteria[1],
                            tb=trueBranch, fb=falseBranch)
    else:
        return decisionnode(results=uniquecounts(rows))


def printtree(tree, indent=''):
    # Is this a leaf node?
    if tree.results != None:
        print
        str(tree.results)
    else:
        # Print the criteria
        print
        'Column ' + str(tree.col) + ' : ' + str(tree.value) + '? '

        # Print the branches
        print
        indent + 'True->',
        printtree(tree.tb, indent + '  ')
        print
        indent + 'False->',
        printtree(tree.fb, indent + '  ')

In [None]:
my_data = [
    [3, 3, 5],
    [2, 2, 4],
    [1, 3, 3],
    [1, 3, 3],
    [1, 1, 3],
    [2, 1, 3],
    [2, -1, 3],
    [1, -1, 3],
    [2, -1, 2],
    [2, -2, 2],
    [2, -2, 2],
    [1, -2, 1]
]

In [None]:
printtree(buildtree(my_data))

## 

### Partie 1

In [None]:
print("Question 2.1")
CheckAdditiveModel('criteresAEROSOL.csv', 'intervalles.csv', "2.1")
print("----------------------------------------")
print("Question 2.2")
CheckAdditiveModel('criteresAEROSOL.csv', 'intervalles.csv', "2.2")
print("----------------------------------------")
print("Question 3")
CheckAdditiveModel('criteresAEROSOL.csv', 'intervalles.csv', "3")
print("----------------------------------------")
print("Question 4")
CheckAdditiveModel('criteresAEROSOL.csv', 'intervalles.csv', "4")
print("----------------------------------------")

## Partie 2

## Conclusion

Notre conclusion du dossier est ici