# Blood donation dataset

This is a dataset from UCI repository: https://archive.ics.uci.edu/ml/datasets/Blood+Transfusion+Service+Center

The dataset contains data related to blood donation, and the objective is to predict if a person donated blood or not based on his donation history.

For more information access the link above and/or the file <i>transfusion.names</i>

In the end, we will compare the results with other classification algorithms as well: KNN, Multi-Layer Perceptron, Decision Trees and SVC.

## Importing libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.model_selection import KFold

In [2]:
# Since the modules files are located in another directory,
# we need to first append this directory in the Python path
# before importing
import sys
sys.path.append("../..")

import membership
import rule_base
import mamdani
import wangmendel
import defuzz

## Loading the dataset

In [3]:
df = pd.read_csv('transfusion.data', header=None, names=['recency', 'frequency', 'monetary', 'time', 'donated'])
#sort the datase items
df = df.sample(frac=1).reset_index(drop=True)

In [4]:
df.head()

Unnamed: 0,recency,frequency,monetary,time,donated
0,16,8,2000,28,0
1,4,14,3500,86,0
2,14,2,500,14,0
3,16,8,2000,76,0
4,4,10,2500,28,1


In [5]:
df[['recency','frequency','monetary','time', 'donated']] = df[['recency','frequency','monetary','time', 'donated']].apply(pd.to_numeric, errors='coerce')
df.dropna(inplace=True)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 748 entries, 0 to 748
Data columns (total 5 columns):
recency      748 non-null float64
frequency    748 non-null float64
monetary     748 non-null float64
time         748 non-null float64
donated      748 non-null float64
dtypes: float64(5)
memory usage: 35.1 KB


In [7]:
df.describe()

Unnamed: 0,recency,frequency,monetary,time,donated
count,748.0,748.0,748.0,748.0,748.0
mean,9.506684,5.514706,1378.676471,34.282086,0.237968
std,8.095396,5.839307,1459.826781,24.376714,0.426124
min,0.0,1.0,250.0,2.0,0.0
25%,2.75,2.0,500.0,16.0,0.0
50%,7.0,4.0,1000.0,28.0,0.0
75%,14.0,7.0,1750.0,50.0,0.0
max,74.0,50.0,12500.0,98.0,1.0


## Performing the inference and evaluating results

To evaluate the results, we will perform a K-fold cross-validation.

In [30]:
kf = KFold(n_splits=10)

X = df.iloc[:,:-1].as_matrix()
y = df.iloc[:,-1].to_numpy()

score_fuzzy = 0
score_knn = 0
score_nnet = 0
score_tree = 0
score_svm = 0

for train_index, test_index in kf.split(X):    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]


    rule_base = wangmendel.learn_fuzzy_rules(X_train, y_train,
                                        n_regions_inputs=[4,4,4,4],
                                        n_regions_output=2,
                                        name_preffix_inputs=['recency','frequency','monetary','time'],
                                        name_preffix_output='donated')
    
    rule_base = wangmendel.clean_rule_base(rule_base)
    
    #Inference
    results = []
    for data in X_test:

        # If the defuzzified result is above .5 then it is a Yes (1)
        # otherwise it is a No (0)
        z, f_z = mamdani.predict_crisp(data, rule_base)
        if defuzz.centroid(z, f_z)>0.5:
            result = 1
        else:
            result = 0
        
        results.append(result)
        
    #Accuracy fuzzy   
    score_fuzzy += accuracy_score(y_test,results)
    
    
    #KNN
    neigh = KNeighborsClassifier(n_neighbors=5)
    neigh.fit(X_train,y_train)
    neigh_results = neigh.predict(X_test)
    score_knn += accuracy_score(y_test,neigh_results)
    
    
    #NNet
    nnet = MLPClassifier()
    nnet.fit(X_train,y_train)
    nnet_results = nnet.predict(X_test)
    score_nnet += accuracy_score(y_test,nnet_results)
    
    
    #Tree
    tree = DecisionTreeClassifier()
    tree.fit(X_train,y_train)
    tree_results = tree.predict(X_test)
    score_tree += accuracy_score(y_test,tree_results)
    
    
    #SVM
    svm = SVC()
    svm.fit(X_train,y_train)
    svm_results = svm.predict(X_test)
    score_svm += accuracy_score(y_test,svm_results)
    
    
score_fuzzy = score_fuzzy/10
score_knn = score_knn/10
score_nnet = score_nnet/10
score_tree = score_tree/10
score_svm = score_svm/10

print("Score wang-mendel:",score_fuzzy)
print("Score knn:",score_knn)
print("Score nnet:",score_nnet)
print("Score tree:",score_tree)
print("Score svm:",score_svm)

  This is separate from the ipykernel package so we can avoid doing imports until


Score wang-mendel: 0.7645765765765766
Score knn: 0.753945945945946
Score nnet: 0.714954954954955
Score tree: 0.7138198198198198
Score svm: 0.7646306306306307
