In [1]:
import pandas as pd
import numpy as np

In [2]:
# 0 - Female / 1 - Male

data = np.array([
    [182.88, 81.64, 45.72, 1],
    [180.44, 86.17, 41.91, 1],
    [170.07, 77.10, 45.72, 1],
    [180.44, 65.76, 38.10, 1],
    [152.40, 45.35, 34.29, 0],
    [167.64, 58.96, 36.19, 0],
    [179.83, 68.03, 37.33, 0],
    [175.26, 63.49, 38.10, 0],
])

df = pd.DataFrame(data, columns=["height", "weight", "foot", "gender"])
display(df)

Unnamed: 0,height,weight,foot,gender
0,182.88,81.64,45.72,1.0
1,180.44,86.17,41.91,1.0
2,170.07,77.1,45.72,1.0
3,180.44,65.76,38.1,1.0
4,152.4,45.35,34.29,0.0
5,167.64,58.96,36.19,0.0
6,179.83,68.03,37.33,0.0
7,175.26,63.49,38.1,0.0


In [3]:
def means_stds(df, columns, target):
    means_std = {}

    mean_0 = []
    std_0 = []
    mean_1 = []
    std_1 = []
    
    for i in columns:
        mean_0.append(df[df[target] == 0][i].mean())
        mean_1.append(df[df[target] == 1][i].mean())

        std_0.append(df[df[target] == 0][i].std())
        std_1.append(df[df[target] == 1][i].std())

    means_std["mean_0"] = mean_0
    means_std["mean_1"] = mean_1

    means_std["std_0"] = std_0
    means_std["std_1"] = std_1
    
    return means_std

In [4]:
from scipy.stats import norm

def normpdf(x, mean, std):
    '''var = std ** 2
    denom = std * ((2 * np.pi) ** 0.5)
    num = np.exp(-(x - mean) ** 2 / (2 * var))
    return num / denom'''
    
    return norm.pdf(x, mean, std)

In [5]:
def continuous_data(x, mean, std):
    aux = []
    
    for i in range(len(x)):
        print(i, normpdf(x[i], mean[i], std[i]))
        aux.append(normpdf(x[i], mean[i], std[i]))
        
    aux.append(0.5)
        
    return np.prod(aux)

In [6]:
x = np.array([175, 75, 38])

columns = ["height", "weight", "foot"]

means_std = means_stds(df, columns, "gender")
print(means_std)

female = continuous_data(x, means_std["mean_0"], means_std["std_0"])
male = continuous_data(x, means_std["mean_1"], means_std["std_1"])

print(male, female)
print("Male") if male > female else print("Female")

{'mean_0': [168.7825, 58.9575, 36.4775], 'mean_1': [178.45749999999998, 77.6675, 42.8625], 'std_0': [12.023544610471571, 9.798263026339583, 1.6560067431424714], 'std_1': [5.708744024155695, 8.75944965927274, 3.647797280551647]}
0 0.0290276371964104
1 0.01065755183638047
2 0.1578708883635401
0 0.058172447535766794
1 0.043480603201713694
2 0.04498161176706322
5.688763959198938e-05 2.4419749083584127e-05
Male


## Discrete

In [7]:
data = np.array([
    ["youth", "high", "no", "fair", "no"],
    ["youth", "high", "no", "excellent", "no"],
    ["middle-aged", "high", "no", "fair", "yes"],
    ["senior", "medium", "no", "fair", "yes"],
    ["senior", "low", "yes", "fair", "yes"],
    ["senior", "low", "yes", "excellent", "no"],
    ["middle-aged", "low", "yes", "excellent", "yes"],
    ["youth", "medium", "no", "fair", "no"],
    ["youth", "low", "yes", "fair", "yes"],
    ["senior", "medium", "yes", "fair", "yes"],
    ["youth", "medium", "yes", "excellent", "yes"],
    ["middle-aged", "medium", "no", "excellent", "yes"],
    ["middle-aged", "high", "yes", "fair", "yes"],
    ["senior", "medium", "no", "excellent", "no"],
])

df = pd.DataFrame(data, columns=["age", "income", "student", "credit", "buy"])
display(df)

Unnamed: 0,age,income,student,credit,buy
0,youth,high,no,fair,no
1,youth,high,no,excellent,no
2,middle-aged,high,no,fair,yes
3,senior,medium,no,fair,yes
4,senior,low,yes,fair,yes
5,senior,low,yes,excellent,no
6,middle-aged,low,yes,excellent,yes
7,youth,medium,no,fair,no
8,youth,low,yes,fair,yes
9,senior,medium,yes,fair,yes


In [8]:
p_buy_yes = len(df[df["buy"] == "yes"]) / len(data)
p_buy_no = len(df[df["buy"] == "no"]) / len(data)
print(p_buy_yes, p_buy_no)

0.6428571428571429 0.35714285714285715


In [9]:
def likelihood(x, df, columns, target):
    res = {}
    uniques = df[target].unique()
    for i in range(len(uniques)):
        calc = np.zeros(len(columns))
        for j in range(len(columns)):
            print(target, uniques[i], j)
            calc[j] = len(df[(df[target] == uniques[i]) & (df[columns[j]] == x[j])]) / len(df[df[target] == uniques[i]])
        res[uniques[i]] = np.prod(calc)
    return res

In [10]:
x = np.array([df["age"], df["income"], df["student"], df["credit"]])
columns = ["age", "income", "student", "credit"]

likelihoods = likelihood(x, df, columns, "buy")
print(likelihoods)

buy no 0
buy no 1
buy no 2
buy no 3
buy yes 0
buy yes 1
buy yes 2
buy yes 3
{'no': 1.0, 'yes': 1.0}


In [11]:
final_no = p_buy_no * likelihoods["no"]
final_yes = p_buy_yes * likelihoods["yes"]

print("No:", final_no)
print("Yes:", final_yes)
print("-------")
print("Buy") if final_yes > final_no else print("Not buy")

No: 0.35714285714285715
Yes: 0.6428571428571429
-------
Buy


In [26]:
dados = pd.read_csv('DATA.txt', sep="   ", header=None, engine='python')
dados.columns = ["gender", "age", "risk_factors", "systolic_bp", "s_hr", "st_segment", "ecg_hr", "creatinine", "killp", "event"]
display(dados)

Unnamed: 0,gender,age,risk_factors,systolic_bp,s_hr,st_segment,ecg_hr,creatinine,killp,event
0,1.0,33.0,0.0,132.0,92.266205,1.0,90.0,0.8,1.0,0.0
1,1.0,69.0,0.0,147.0,54.178624,0.0,52.0,1.4,1.0,0.0
2,1.0,63.0,0.0,142.0,41.364843,1.0,44.0,1.1,3.0,1.0
3,0.0,79.0,0.0,147.0,107.379000,1.0,110.0,0.9,1.0,1.0
4,0.0,61.0,0.0,107.0,83.224808,0.0,80.0,1.1,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...
452,1.0,69.0,0.0,95.0,100.960080,0.0,103.0,1.6,3.0,1.0
453,1.0,51.0,1.0,140.0,87.259367,1.0,90.0,1.0,3.0,1.0
454,0.0,57.0,0.0,120.0,114.649890,0.0,117.0,1.0,3.0,1.0
455,1.0,87.0,0.0,149.0,48.788823,1.0,53.0,1.4,1.0,1.0


In [28]:
def fuse_same_sensors(df):
    s_hr = df["s_hr"]
    ecg_hr = df["ecg_hr"]

    s_hr_std = 2
    ecg_hr_std = 0.5

    hr_final = ((np.power(s_hr_std, 2) / (np.power(s_hr_std, 2) + np.power(ecg_hr_std, 2))) * ecg_hr) + ((np.power(ecg_hr_std, 2) / (np.power(s_hr_std, 2) + np.power(ecg_hr_std, 2))) * s_hr)
    print(hr_final)

    df["hr_final"] = hr_final

In [29]:
fuse_same_sensors(dados)
display(dados)

0       90.133306
1       52.128154
2       43.844991
3      109.845824
4       80.189695
          ...    
452    102.880005
453     89.838786
454    116.861758
455     52.752284
456     65.047416
Length: 457, dtype: float64


Unnamed: 0,gender,age,risk_factors,systolic_bp,s_hr,st_segment,ecg_hr,creatinine,killp,event,hr_final
0,1.0,33.0,0.0,132.0,92.266205,1.0,90.0,0.8,1.0,0.0,90.133306
1,1.0,69.0,0.0,147.0,54.178624,0.0,52.0,1.4,1.0,0.0,52.128154
2,1.0,63.0,0.0,142.0,41.364843,1.0,44.0,1.1,3.0,1.0,43.844991
3,0.0,79.0,0.0,147.0,107.379000,1.0,110.0,0.9,1.0,1.0,109.845824
4,0.0,61.0,0.0,107.0,83.224808,0.0,80.0,1.1,1.0,0.0,80.189695
...,...,...,...,...,...,...,...,...,...,...,...
452,1.0,69.0,0.0,95.0,100.960080,0.0,103.0,1.6,3.0,1.0,102.880005
453,1.0,51.0,1.0,140.0,87.259367,1.0,90.0,1.0,3.0,1.0,89.838786
454,0.0,57.0,0.0,120.0,114.649890,0.0,117.0,1.0,3.0,1.0,116.861758
455,1.0,87.0,0.0,149.0,48.788823,1.0,53.0,1.4,1.0,1.0,52.752284


In [30]:
measurements = np.array([dados["s_hr"], dados["ecg_hr"]])
dp = np.array([2, 0.5])

In [32]:
tau = np.sum(1 / np.power(dp, 2))
s = 1 / tau
print(tau)

4.25


In [33]:
# mean = np.sum(measurements[:, 0] / (tau * np.power(dp, 2)))
valores = np.zeros(len(dados.s_hr))
for i in range(len(dados["s_hr"])):
    valores[i] = (dados['s_hr'][i] / (tau * np.power(dp[0], 2))) + (dados['ecg_hr'][i] / (tau * np.power(dp[1], 2)))
    
print(valores)
dados["hr_final_mult"] = valores
# print("Mean:", mean, " / std:", s)

[ 90.13330618  52.12815435  43.84499076 109.84582353  80.18969459
  70.06480118  89.04626494  73.76289947  73.21040512  70.87472729
  75.08032847  76.17818747  80.03978053  57.05206935  87.92001212
  69.75257371  66.76241718  64.80209012  83.207072    85.78580724
  54.84630547 108.83824412  60.16123576  72.10743676  66.204744
  59.88801935  61.18546076  75.11193376  48.74798912 139.89481176
  66.18791165  90.16937112  68.20588988  66.19011247  63.14689212
  57.1317      73.06878235  81.78984224  97.76991606 100.14761176
  81.87362829  74.80680735  77.05217135  77.12498     68.12165371
  48.20210818  52.04364688  60.79957524  90.16276924  56.07818176
  43.76570218  69.06878294  61.11102512  56.78983582  54.17368229
  70.10630476  89.85417759  87.13122312  66.19836176  75.20038676
  73.103834    69.87285535  72.07067918  60.17814388  67.08605782
  86.79541994  90.07438512 108.09586118  61.078465    84.04070347
  69.80883247  71.19702835  95.08264224  57.11183171  77.04732335
 105.7564064

In [39]:
display(dados)
print(sum(dados["hr_final"] == dados["hr_final_mult"]))

Unnamed: 0,gender,age,risk_factors,systolic_bp,s_hr,st_segment,ecg_hr,creatinine,killp,event,hr_final,hr_final_mult
0,1.0,33.0,0.0,132.0,92.266205,1.0,90.0,0.8,1.0,0.0,90.133306,90.133306
1,1.0,69.0,0.0,147.0,54.178624,0.0,52.0,1.4,1.0,0.0,52.128154,52.128154
2,1.0,63.0,0.0,142.0,41.364843,1.0,44.0,1.1,3.0,1.0,43.844991,43.844991
3,0.0,79.0,0.0,147.0,107.379000,1.0,110.0,0.9,1.0,1.0,109.845824,109.845824
4,0.0,61.0,0.0,107.0,83.224808,0.0,80.0,1.1,1.0,0.0,80.189695,80.189695
...,...,...,...,...,...,...,...,...,...,...,...,...
452,1.0,69.0,0.0,95.0,100.960080,0.0,103.0,1.6,3.0,1.0,102.880005,102.880005
453,1.0,51.0,1.0,140.0,87.259367,1.0,90.0,1.0,3.0,1.0,89.838786,89.838786
454,0.0,57.0,0.0,120.0,114.649890,0.0,117.0,1.0,3.0,1.0,116.861758,116.861758
455,1.0,87.0,0.0,149.0,48.788823,1.0,53.0,1.4,1.0,1.0,52.752284,52.752284


403
