In [79]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
cirrhosis_patient_survival_prediction = fetch_ucirepo(id=878) 
  
# data (as pandas dataframes) 
X = cirrhosis_patient_survival_prediction.data.features 
y = cirrhosis_patient_survival_prediction.data.targets 
  
# metadata 
print(cirrhosis_patient_survival_prediction.metadata) 
  
# variable information 
print(cirrhosis_patient_survival_prediction.variables) 


{'uci_id': 878, 'name': 'Cirrhosis Patient Survival Prediction', 'repository_url': 'https://archive.ics.uci.edu/dataset/878/cirrhosis+patient+survival+prediction+dataset-1', 'data_url': 'https://archive.ics.uci.edu/static/public/878/data.csv', 'abstract': 'Utilize 17 clinical features for predicting survival state of patients with liver cirrhosis. The survival states include 0 = D (death), 1 = C (censored), 2 = CL (censored due to liver transplantation).', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Tabular'], 'num_instances': 418, 'num_features': 17, 'feature_types': ['Real', 'Categorical'], 'demographics': ['Age', 'Sex'], 'target_col': ['Status'], 'index_col': ['ID'], 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1989, 'last_updated': 'Fri Nov 03 2023', 'dataset_doi': '10.24432/C5R02G', 'creators': ['E. Dickson', 'P. Grambsch', 'T. Fleming', 'L. Fisher', 'A. Langworthy'], 'intro_paper': {'title': 'Prognos

In [80]:
cirrhosis_patient_survival_prediction.variables

Unnamed: 0,name,role,type,demographic,description,units,missing_values
0,ID,ID,Integer,,unique identifier,,no
1,N_Days,Other,Integer,,number of days between registration and the ea...,,no
2,Status,Target,Categorical,,"status of the patient C (censored), CL (censor...",,no
3,Drug,Feature,Categorical,,type of drug D-penicillamine or placebo,,yes
4,Age,Feature,Integer,Age,age,days,no
5,Sex,Feature,Categorical,Sex,M (male) or F (female),,no
6,Ascites,Feature,Categorical,,presence of ascites N (No) or Y (Yes),,yes
7,Hepatomegaly,Feature,Categorical,,presence of hepatomegaly N (No) or Y (Yes),,yes
8,Spiders,Feature,Categorical,,presence of spiders N (No) or Y (Yes),,yes
9,Edema,Feature,Categorical,,presence of edema N (no edema and no diuretic ...,,no


In [81]:
most_common_hepatomegaly = X['Hepatomegaly'].value_counts().idxmax()
(X['Hepatomegaly'] == 'NA').sum()
X = X[X['Hepatomegaly'] != "NaNN"]

In [82]:
X.isnull().sum()

Drug             105
Age                0
Sex                0
Ascites          105
Hepatomegaly     105
Spiders          105
Edema              0
Bilirubin          0
Cholesterol      105
Albumin            0
Copper           105
Alk_Phos         105
SGOT             105
Tryglicerides    105
Platelets          7
Prothrombin        2
Stage              5
dtype: int64

In [83]:
X = X[X['Cholesterol'] != 'NaNN']
X['Cholesterol'].astype(float).mean()

369.51056338028167

In [84]:
X['Cholesterol'] = X['Cholesterol'].fillna(X['Cholesterol'].astype(float).mean())
X['Cholesterol'] = X['Cholesterol'].astype(float)
X['Cholesterol'].isnull().sum()

0

In [85]:
# get all distinct values of the column Hepatomegaly
X['Hepatomegaly'].value_counts()

Hepatomegaly
Y    147
N    137
Name: count, dtype: int64

In [86]:
# get all distinct values of the column Spiders
X['Spiders'].value_counts()

Spiders
N    202
Y     82
Name: count, dtype: int64

In [87]:
# get all distinct values of the column Ascites
X['Ascites'].value_counts()

Ascites
N    263
Y     21
Name: count, dtype: int64

In [88]:
# get all distinct values of Sex
X['Sex'].value_counts()

Sex
F    346
M     43
Name: count, dtype: int64

In [89]:
# get all distinct values of Drug
X['Drug'].value_counts()

Drug
Placebo            144
D-penicillamine    140
Name: count, dtype: int64

In [90]:
X = X.drop('Drug', axis=1)

In [91]:
most_common_hepatomegaly = X['Hepatomegaly'].value_counts().idxmax()
most_common_hepatomegaly

'Y'

In [92]:
X['Hepatomegaly'] = X['Hepatomegaly'].fillna(most_common_hepatomegaly)

In [93]:
X['Hepatomegaly'].isnull().sum()

0

In [94]:
X['Spiders'] = X['Spiders'].fillna(X['Spiders'].value_counts().idxmax())
X['Spiders'].isnull().sum()

0

In [95]:
X['Ascites'] = X['Ascites'].fillna(X['Ascites'].value_counts().idxmax())
X['Ascites'].isnull().sum()

0

In [96]:
X['Sex'] = X['Sex'].fillna(X['Sex'].value_counts().idxmax())
X['Sex'].isnull().sum()

0

In [97]:
X['Hepatomegaly'] = X['Hepatomegaly'].replace({'Y': 1, 'N': 0}).astype(int)
X['Hepatomegaly'].value_counts()

  X['Hepatomegaly'] = X['Hepatomegaly'].replace({'Y': 1, 'N': 0}).astype(int)


Hepatomegaly
1    252
0    137
Name: count, dtype: int64

In [98]:
X['Spiders'] = X['Spiders'].replace({'Y': 1, 'N': 0}).astype(int)
X['Spiders'].value_counts()

  X['Spiders'] = X['Spiders'].replace({'Y': 1, 'N': 0}).astype(int)


Spiders
0    307
1     82
Name: count, dtype: int64

In [99]:
X['Ascites'] = X['Ascites'].replace({'Y': 1, 'N': 0}).astype(int)
X['Ascites'].value_counts()

  X['Ascites'] = X['Ascites'].replace({'Y': 1, 'N': 0}).astype(int)


Ascites
0    368
1     21
Name: count, dtype: int64

In [100]:
X['Sex'] = X['Sex'].replace({'M': 0, 'F': 1}).astype(int)
X['Sex'].value_counts()

  X['Sex'] = X['Sex'].replace({'M': 0, 'F': 1}).astype(int)


Sex
1    346
0     43
Name: count, dtype: int64

In [101]:
X['Edema'] = X['Edema'].replace({'N': 0, 'S': 1, "Y": 2}).astype(int)
X['Edema'].value_counts()

  X['Edema'] = X['Edema'].replace({'N': 0, 'S': 1, "Y": 2}).astype(int)


Edema
0    331
1     41
2     17
Name: count, dtype: int64

In [102]:
# fill the missing values of the columns that have missing values
X = X[X['Copper'] != "NaNN"]
X['Copper'] = X['Copper'].fillna(X['Copper'].astype(float).mean())
X['Alk_Phos'] = X['Alk_Phos'].fillna(X['Alk_Phos'].astype(float).mean())
X['SGOT'] = X['SGOT'].fillna(X['SGOT'].astype(float).mean())
X = X[X['Tryglicerides'] != "NaNN"]
X['Tryglicerides'] = X['Tryglicerides'].fillna(X['Tryglicerides'].astype(float).mean())
X = X[X['Platelets'] != "NaNN"]
X['Platelets'] = X['Platelets'].fillna(X['Platelets'].astype(float).mean())
X['Prothrombin'] = X['Prothrombin'].fillna(X['Prothrombin'].astype(float).mean())
X['Stage'] = X['Stage'].fillna(X['Stage'].astype(float).mean())

In [103]:
y.value_counts()

Status
C         232
D         161
CL         25
Name: count, dtype: int64

In [104]:
y['Status'] = y['Status'].replace({'C': 0, 'D': 1, 'CL': 2}).astype(int)

  y['Status'] = y['Status'].replace({'C': 0, 'D': 1, 'CL': 2}).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y['Status'] = y['Status'].replace({'C': 0, 'D': 1, 'CL': 2}).astype(int)


In [105]:
import numpy as np
import matplotlib.pyplot as pl
%matplotlib inline

#
# function to compute the sigmoid
sigmoid = lambda x: 1/(1 + np.exp(-x));

def backprop(W1, W2, X, D):
    alpha = 0.9 # Learning rate
    N = X.shape[0]
    for k in range(0, N):
        x = X[k, :].reshape(-1, 1)
        d = D[k].reshape(-1, 1)
        ##############################
        # forward propogation stop
        ##############################
        # calculate the weighted sum of hidden node
        v1 = np.dot(W1, x)
        #pass the weighted sum to the activation function, this gives the output from hidden layer
        y1 = sigmoid(v1);
        v2 = np.dot(W2, y1)
        y = sigmoid(v2)
        #calculate the error, difference between correct output and computed output
        e = d - y
        #calculate delta, derivative of the activation function times the error
        delta = y*(1-y)*e
        ###########################
        # Backward propagation step
        ###########################
        # propagate the output node delta, δ, backward, and calculate the deltas of the hidden layer
        e1 = np.dot(W2.T, delta)
        delta1 = y1*(1-y1)*e1
    
        # Adjust the weights according to the learning rule
        # delta1.shape=(6,1) # column vector of deltas for the hidden layer
        # x.shape=(1,16) # row vector of the current input
        dW1 = alpha*np.dot(delta1,x.T)
        W1 = W1 + dW1
    
        # y1.shape = (1, 3)
        dW2 = alpha*np.dot(delta,y1.T)
        W2 = W2 + dW2
        
    return W1, W2



In [106]:
X = X.astype(float)

In [107]:
#################################
# Testing backprop
#################################
# inputs array
npX = np.array(X)
npy = np.array(y)
print(X.shape)
print(y.shape)
# initialize the weights between input layer and hidden layer
W1 = 2*np.random.rand(6, 16) - 1
# initialize the weights between hidden layer and output layer
W2 = 2*np.random.rand(3, 6) - 1

for epoch in range(1, 10000): #train
    W1, W2 = backprop(W1, W2, npX, npy)


  sigmoid = lambda x: 1/(1 + np.exp(-x));


(381, 16)
(418, 1)


In [108]:
X[0]

KeyError: 0