### Módulo para generar las relaciones entre los cursos

In [9]:
# Leer los majors desde el archivo
import json
import pandas as pd

with open('majors.json', 'r', encoding='utf-8') as json_majors:
    j = json_majors.read()
    majors = json.loads(j)

In [10]:
# Calcular las relaciones entre los cursos. Son 6 tipos.
def relation(course1, course2, df):
    g = df.groupby(['ID'])
    concurrency = 0
    xor = 0
    succ_1 = 0
    succ_2 = 0
    succ = 0
    predecessor = 0
    length = 0
    for student, group in g:
        j = None
        a = group[group['SIGLA'] == course1]
        b = group[group['SIGLA'] == course2]
        if a.empty or b.empty:
            xor += 1
        else:
            a = a.iloc[0]['DURACIÓN']
            b = b.iloc[0]['DURACIÓN']
            if a == b:
                concurrency += 1
            elif a == b + 1:
                succ_1 += 1
            elif a == b + 2:
                succ_2 += 1
            elif a > b+2:
                succ += 1
            else:
                predecessor += 1
        length += 1
    return [concurrency/length, succ_1/length, succ_2/length, succ/length, predecessor/length]

In [11]:
# Calcular la matriz de relaciones
def all_relations(courses, df):
    matrix = []
    for course1 in courses:
        for course2 in courses:
            rel_list = [course2]
            if course1 != course2:
                rel = relation(course1, course2, df)
                rel_list.extend(rel)
            matrix.append(rel_list)
    return matrix

# g = all_relations(io_courses, df_io)
# b = all_relations(io_courses, bad)
# df_io_resg = pd.DataFrame(g)
# df_io_resg.to_excel("io_bajo.xlsx")

In [12]:
# Dado una lista de relaciones, seleccionar las que son más representativas.
def define_relations_dict(rel, cota, rel_list):
    relations = rel[:]
    r = rel_list[:]
    m = max(relations)
    i = relations.index(m)
    res = {}
    res[r[i]] = m
    if m >= 0.7:
        return res
    else:
        while True:
            del relations[i]
            del r[i]
            if len(relations) > 0:
                new_m = max(relations)
                if m - new_m > cota:
                    break
                else:
                    i = relations.index(new_m)
                    res[r[i]] = new_m
            else:
                break
        return res

In [13]:
# Dado una lista de relaciones, seleccionar las que son más representativas.
def define_relations_array(relations, cota, rel_names):
    res = []
    for r in rel_names:
        res.append(0)
    rel_dict = define_relations_dict(relations, cota, rel_names)
    for key in rel_dict:
        i = rel_names.index(key)
        res[i] = rel_dict[key]
    return res

In [14]:
# Calcula las relaciones más fuertes, ya no se utiliza.
def strong_rel(courses, df, cota):
    info_dict = {}
    for course1 in courses:
        info_dict[course1] = {}
        for course2 in courses:
            def_rel = None
            if course1 != course2:
                rel = relation(course1, course2, df)
                def_rel = define_relations_dict(rel, cota)
            info_dict[course1][course2] = def_rel
    return info_dict

In [15]:
# Define un diccionario con las relaciones como key y el value es una matriz con el valor de las relaciones.
# Se marcaron como 0 las rel no significativas en la matriz, para ver todas eliminar el filter_rel y cambiarlo en el append
def get_all_rel_by_rel(courses, relations, df, cota):
    result = {}
    for rel in relations:
        result[rel] = []
        for c in courses:
            result[rel].append([])
    count = 0
    for course1 in courses:
        for course2 in courses:
            if course1 != course2:
                rel_by_course = relation(course1, course2, df)
                filter_rel = define_relations_array(rel_by_course, cota, relations)
                for i in range(0,len(rel_by_course)):
                    result[relations[i]][count].append(filter_rel[i])
            else:
                for key in result:
                    result[key][count].append(0)
        count += 1
    return result

#### Calculamos las relaciones para cada major

In [16]:
relations = ['and', 'suc 1', 'suc 2', 'successor', 'predecessor']
for major in majors:
    df_major = pd.read_csv("data/{}.csv".format(major))
    display(df_major)
    break
    rel_dict = get_all_rel_by_rel(majors[major], relations, df_major, 0.2)
    with open("data/{} - relations.json".format(major), 'w') as json_file:
        json.dump(rel_dict, json_file)

Unnamed: 0.1,Unnamed: 0,ID,SIGLA,NOTA FINAL,AÑO,DURACIÓN,SEMESTRE,PPA GLOBAL
0,0,5,ICS1113,4.4,2015,3,1,4.82
1,1,5,ICS2523,4.7,2015,4,2,4.82
2,2,5,ICS2123,4.1,2016,6,2,4.82
3,3,5,ICS2562,5.7,2016,6,2,4.82
4,4,5,ICS2121,4.8,2017,7,1,4.82
5,5,5,ICS2613,4.4,2018,9,1,4.82
6,6,5,ICS2813,5.2,2018,9,1,4.82
7,7,5,ICS2122,5.9,2018,10,2,4.82
8,8,14,ICS1113,4.8,2015,4,2,5.28
9,9,14,ICS2523,5.7,2016,5,1,5.28


##### Para calcular las relaciones de un solo major con algún utilizar las siguientes variables:

In [18]:
MAJOR = 'Computación 2013'
RELACIONES = ['and', 'suc 1', 'suc 2', 'successor', 'predecessor']
for i in range(4):
    df_major = pd.read_csv("data/{} c{}.csv".format(MAJOR, i))
    display(df_major)
    break
    rel_dict = get_all_rel_by_rel(majors[MAJOR], RELACIONES, df_major, 0.2)

    with open("data/{} - relations c{}.json".format(MAJOR, i), 'w') as json_file:
            json.dump(rel_dict, json_file) 

Unnamed: 0.1,Unnamed: 0,ID,SIGLA,NOTA FINAL,AÑO,DURACIÓN,SEMESTRE,PPA GLOBAL
0,0,4,IIC2233,6.2,2016,4,2,6.26
1,1,4,IIC1253,5.3,2017,5,1,6.26
2,2,4,IIC2343,6.2,2017,5,1,6.26
3,3,4,IIC2413,6.6,2017,5,1,6.26
4,4,4,IIC2133,6.8,2017,6,2,6.26
5,5,4,IIC2333,6.5,2017,6,2,6.26
6,6,4,IIC2143,6.6,2018,7,1,6.26
7,7,4,IIC2613,5.6,2018,7,1,6.26
8,8,4,IIC2154,6.6,2018,8,2,6.26
9,9,73,IIC2233,4.3,2013,2,2,4.90


-----