In [None]:
import numpy as np
import os.path as osp
import pandas as pd
import statsmodels.api as sm

# Load data from a CSV file
BNPdata_path=osp.abspath('/content/BNPdata.csv')
BNPData=pd.read_csv(BNPdata_path)

# Display the data types of each column
BNPData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   ID       35 non-null     int64 
 1   BNP      35 non-null     int64 
 2   Outcome  35 non-null     object
dtypes: int64(2), object(1)
memory usage: 968.0+ bytes


In [None]:
BNPData

Unnamed: 0,ID,BNP,Outcome
0,1,655,S
1,2,660,S
2,3,660,S
3,4,662,S
4,5,662,S
5,6,674,S
6,7,676,S
7,8,680,S
8,9,680,S
9,10,682,S


In [None]:
# Check the default dummy variable coding
dummy_default = pd.get_dummies(BNPData['Outcome'])
print(dummy_default)

    D  S
0   0  1
1   0  1
2   0  1
3   0  1
4   0  1
5   0  1
6   0  1
7   0  1
8   0  1
9   0  1
10  0  1
11  1  0
12  0  1
13  0  1
14  0  1
15  1  0
16  1  0
17  0  1
18  0  1
19  1  0
20  0  1
21  1  0
22  0  1
23  1  0
24  1  0
25  0  1
26  1  0
27  1  0
28  1  0
29  1  0
30  1  0
31  1  0
32  0  1
33  1  0
34  1  0


In [None]:
# Change the dummy variable coding by specifying the reference category ('S')
BNPData['Outcome'] = pd.Categorical(BNPData['Outcome'], categories=['S', 'D'], ordered=True)
dummy_changed = pd.get_dummies(BNPData['Outcome'])
print(dummy_changed)

    S  D
0   1  0
1   1  0
2   1  0
3   1  0
4   1  0
5   1  0
6   1  0
7   1  0
8   1  0
9   1  0
10  1  0
11  0  1
12  1  0
13  1  0
14  1  0
15  0  1
16  0  1
17  1  0
18  1  0
19  0  1
20  1  0
21  0  1
22  1  0
23  0  1
24  0  1
25  1  0
26  0  1
27  0  1
28  0  1
29  0  1
30  0  1
31  0  1
32  1  0
33  0  1
34  0  1


In [None]:
BNPData_encoded = pd.concat([BNPData, dummy_changed], axis=1)

BNPData_encoded.head()

Unnamed: 0,ID,BNP,Outcome,S,D
0,1,655,S,1,0
1,2,660,S,1,0
2,3,660,S,1,0
3,4,662,S,1,0
4,5,662,S,1,0


In [None]:
columns_to_drop= ['ID', 'Outcome', 'S']
BNPData_encoded= BNPData_encoded.drop(columns=columns_to_drop)
print(BNPData_encoded)

    BNP  D
0   655  0
1   660  0
2   660  0
3   662  0
4   662  0
5   674  0
6   676  0
7   680  0
8   680  0
9   682  0
10  683  0
11  687  1
12  687  0
13  689  0
14  692  0
15  696  1
16  700  1
17  701  0
18  703  0
19  708  1
20  708  0
21  710  1
22  719  0
23  719  1
24  725  1
25  727  0
26  728  1
27  728  1
28  731  1
29  731  1
30  737  1
31  738  1
32  741  0
33  747  1
34  747  1


Create Logistic Regression Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# Split the DataFrame into training and testing sets
X = BNPData_encoded['BNP']  # Independent variables
y = BNPData_encoded['D']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a logistic regression model
model = sm.Logit(y_train, sm.add_constant(X_train))  # Adding a constant for the intercept

# Fit the model
result = model.fit()

# Display the summary
print(result.summary())

Optimization terminated successfully.
         Current function value: inf
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                      D   No. Observations:                   28
Model:                          Logit   Df Residuals:                       26
Method:                           MLE   Df Model:                            1
Date:                Wed, 06 Sep 2023   Pseudo R-squ.:                     inf
Time:                        18:19:40   Log-Likelihood:                   -inf
converged:                       True   LL-Null:                        0.0000
Covariance Type:            nonrobust   LLR p-value:                     1.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -46.7626     16.827     -2.779      0.005     -79.743     -13.783
BNP            0.0650      0.024  

  return 1/(1+np.exp(-X))
  return np.sum(np.log(self.cdf(q * linpred)))


In [None]:
# Split the DataFrame into training and testing sets
X = BNPData_encoded.drop(columns=['D'])   # Independent variables (excluding the target 'D')
y = BNPData_encoded['D']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a logistic regression model
model = LogisticRegression()  # No need to add a constant for the intercept

# Fit the model
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Display evaluation metrics
print("Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion)
print("Classification Report:\n", classification_rep)


Accuracy: 0.5714285714285714
Confusion Matrix:
 [[1 0]
 [3 3]]
Classification Report:
               precision    recall  f1-score   support

           0       0.25      1.00      0.40         1
           1       1.00      0.50      0.67         6

    accuracy                           0.57         7
   macro avg       0.62      0.75      0.53         7
weighted avg       0.89      0.57      0.63         7



Predict the probability of death for a given BNP level

In [None]:
# Create a new data point with BNP=700
new_data_point = np.array([[700]])

# Make a prediction
predicted_probabilities = model.predict_proba(new_data_point)

# The predicted_probabilities will contain the probability of each class
print (predicted_probabilities)

[[0.77343084 0.22656916]]




In [None]:
# This is binary classification, you can extract the probability of the positive class (class 1)
probability_of_positive_class = predicted_probabilities[0][1]

print("Predicted Probability of Class 1 (Positive Class):", probability_of_positive_class)

Predicted Probability of Class 1 (Positive Class): 0.22656916073683905
