In [1]:
import skfuzzy as fuzz
import pandas as pd
import numpy as np
import re
df = pd.read_csv('fuzzy_dataset.csv')

In [2]:
def transform_value(value):
    try:
        return int(str(value).rstrip(','))
    except:
        return str(value).rstrip(',')
    
medidas = df\
.groupby(by=['integralizacao'])\
.agg([np.min, np.mean, np.max])\
.transpose()\
.reset_index()\
.applymap(str)\
.apply(lambda x: x + ',')\
.groupby('level_0')\
.apply(lambda x: x.sum())\
.rename(columns=lambda x: transform_value(x))

medidas['level_0'] = medidas['level_0'].apply(lambda x: x.split(',')[0] )
drop = ['id', 'Unnamed: 0']
medidas = medidas[~medidas['level_0'].isin(drop)]


valores = df\
.applymap(str)\
.apply(lambda x: x + ',')\
.groupby(by=['integralizacao'])\
.agg([np.sum])\
.transpose()\
.reset_index()\
.rename(columns=lambda x: transform_value(x))

valores = valores[~valores['level_0'].isin(drop)]

def gen_dict(table):
    table.index = table['level_0']
    table_dict = table\
    .iloc[:, 2:]\
    .to_dict()
    
    for key in table_dict.keys():
        inner_dict = table_dict[key]
        for key2 in inner_dict.keys():
            inner_dict[key2] = inner_dict[key2].split(',')
            inner_dict[key2] = [ x for x in inner_dict[key2] if re.match("^(?=.)([+-]?([0-9]*)(\.([0-9]+))?)$", x) is not None ]
            inner_dict[key2] = [ round(float(x), 2) for x in inner_dict[key2] ]
            inner_dict[key2] = sorted(list(set(inner_dict[key2])))
            
    return table_dict

In [3]:
import seaborn as sns

In [4]:
def flatten_dict(df):
    tmp = gen_dict(df)
    rows = []
    for key in sorted(tmp.keys()):
        for key2 in sorted(tmp[key].keys()):
            for value in tmp[key][key2]:
                row = {
                    "integralizacao": key,
                    "campo": key2,
                    "valor": value
                }
                rows.append(row)
    return pd.DataFrame(rows)

dataplot = flatten_dict(valores)
dataplot = dataplot[dataplot['campo'].str.contains('media')]
dataplot = dataplot\
.groupby(by=['integralizacao', 'campo'])\
.agg([np.mean])\
.reset_index()
dataplot.columns = dataplot.columns.get_level_values(0)
dataplot['valor'] = dataplot['valor'].apply(lambda x: 1 if x >= 6.0 else 0)
dataplot.to_csv('medias_por_integralizacao.csv')

In [5]:
import pandas as pd  
import numpy as np
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics

def regressao(df, idx):
    rows = []
    tmp = df[df['integralizacao']==idx].iloc[:, 3:]
    for item in tmp.columns:
        for item2 in tmp.columns:
            if item != item2:
                tmp2 = tmp[[item, item2]].dropna()
                X = tmp2[item].values.reshape(-1, 1)
                y = tmp2[item2].values.reshape(-1, 1)
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
                regressor = LinearRegression()
                regressor.fit(X_train, y_train)
                y_pred = regressor.predict(X_test)
                row = { 'mse' : metrics.mean_absolute_error(y_test, y_pred),
                       'a' : item,
                       'b' : item2 }
                rows.append(row)
    tmp_df = pd.DataFrame(rows).sort_values(by='mse')
    tmp_df['a'] = tmp_df['a'].combine(tmp_df['b'], lambda x,y: x + ',' + y)
    tmp_df = tmp_df[['mse', 'a']]
    tmp_df = tmp_df[tmp_df['mse'] != 0]
    return tmp_df.head(5)

In [6]:
result = regressao(df, 0)
result['integralizacao'] = 0
for item in set(df['integralizacao'].values):
    if item != 0:
        tmp = regressao(df, item)
        tmp['integralizacao'] = item
        result = result.append(tmp)

In [7]:
result['mse'] = result['mse'].round(2)
# Essa é a tabela resultante da regressão linear
result.to_csv('regressao_linear.csv')

In [8]:
tmp_medidas = medidas\
.iloc[:, 2:]\
.reset_index()
tmp_medidas['level_0'] = tmp_medidas['level_0'].apply(lambda x: x.rstrip(','))
tmp_medidas.index = tmp_medidas['level_0']
tmp_medidas = tmp_medidas.drop(columns='level_0')
tmp_medidas = tmp_medidas.to_dict()

for key in tmp_medidas.keys():
    for key2 in tmp_medidas[key]:
        tmp_medidas[key][key2] = [round(float(x), 2) \
                              for x in tmp_medidas[key][key2].split(',')[0:3]]

def get_num(x):
    try:
        assert(x != 'nan')
        return round(float(x), 2)
    except:
        return -1
        
tmp_valores = valores.iloc[:, 2:].to_dict()
for key in tmp_valores.keys():
    for key2 in tmp_valores[key]:
        tmp_valores[key][key2] = sorted(list(set([x for x in [ get_num(x) for x \
                                          in tmp_valores[key][key2].split(',')]
                              if x != -1 ])))

pertinencias = {}
for key in tmp_valores.keys():
    pertinencias[key] = {}
    for key2 in tmp_valores[key]:
        pertinencias[key][key2] = None
        
for key in tmp_valores.keys():
    for key2 in tmp_valores[key]:
        try:
            arr = sorted(np.array(tmp_medidas[key][key2]))
        except:
            continue
        bom = \
        [arr[1]] + [arr[2]] + [arr[2]]
        medio = \
        [arr[0]] + [arr[1]] + [arr[2]]
        ruim = \
        [arr[0]] + [arr[0]] + [arr[1]]
        pertinencias[key][key2] = {
            'bom': fuzz.trimf(np.array(tmp_valores[key][key2]), 
                                             np.array(bom)),
            'medio': fuzz.trimf(np.array(tmp_valores[key][key2]), 
                                             np.array(medio)),
            'ruim': fuzz.trimf(np.array(tmp_valores[key][key2]), 
                                             np.array(ruim))
        }

In [None]:
classificacao = {
    "bom": fuzz.trimf(np.arange(0,10,1), [5, 10, 10]),
    "medio": fuzz.trimf(np.arange(0,10,1), [0, 5, 10]),
    "ruim": fuzz.trimf(np.arange(0,10,1), [0, 0, 5])
}

In [21]:
def get_user_data(df, idx):
    tmp = df[df['id']==idx]
    tmp = tmp\
    .iloc[:, 2:]
    tmp.index = tmp['integralizacao']
    tmp = tmp.drop(columns='integralizacao')
    return tmp.transpose().to_dict()

In [None]:
def activate(user, tmp_valores, pertinencias):
    ativacao = {}
    for key in tmp_valores.keys():
        ativacao[key] = {}
        for key2 in tmp_valores[key]:
            ativacao[key][key2] = None
    
    for key in tmp_valores.keys():
        for key2 in tmp_valores[key]:
            try:
                universe = tmp_valores[key][key2]
                function = pertinencias[key][key2]
                value = user[key][key2]
                ativacao[key][key2] = fuzz.interp_membership(universe, function, value)
            except:
                continue
    return ativacao

activate(get_user_data(df, 30), )