In [31]:
import pandas as pd

# Load the dataset
file_path = 'antiplatelet.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
data.head()

Unnamed: 0,Sex,Dob,Age,ADP,ASPI,asp,clop,asp_resist,clop_resist,either,one,both,none,graft_thrombosis
0,F,19267.0,72.0,137.0,32.0,Y,Y,N,Y,Y,Y,N,N,Y
1,M,51.0,51.0,60.0,48.0,Y,Y,Y,Y,Y,N,Y,N,N
2,M,61.0,61.0,63.0,41.0,Y,N,Y,N,Y,Y,N,N,Y
3,F,66.0,66.0,60.0,18.0,Y,Y,N,Y,Y,Y,N,N,N
4,M,68.0,68.0,27.0,27.0,Y,Y,N,N,N,N,N,Y,N


In [32]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Handle missing values for 'age'
data.columns = data.columns.str.lower()
data.drop('dob', axis=1, inplace=True)
data['age'].fillna(data['age'].median(), inplace=True)
# Encode categorical variables
data['sex'] = data['sex'].map({'F': 0, 'M': 1})
data['asp'] = data['asp'].map({'N': 0, 'Y': 1})
data['clop'] = data['clop'].map({'N': 0, 'Y': 1})
data['asp_resist'] = data['asp_resist'].map({'N': 0, 'Y': 1})
data['clop_resist'] = data['clop_resist'].map({'N': 0, 'Y': 1})
data['either'] = data['either'].map({'N': 0, 'Y': 1})
data['one'] = data['one'].map({'N': 0, 'Y': 1})
data['both'] = data['both'].map({'N': 0, 'Y': 1})
data['none'] = data['none'].map({'N': 0, 'Y': 1})
data['graft_thrombosis'] = data['graft_thrombosis'].map({'N': 0, 'Y': 1, 'Died': 0})

# # Adding new features
# data['ASP_resist'] = ((data['ADP'] > 32) & (data['Asp'] == 1)).astype(int)
# data['Clop_resist'] = ((data['ASPI'] > 42) & (data['Clop'] == 1)).astype(int)

# Adjusting the approach for resistance features
# data['Resist_Either'] = (((data['ASP_resist'] == 1) & (data['Clop_resist'] == 0)) | ((data['ASP_resist'] == 0) & (data['Clop_resist'] == 1))).astype(int)
# data['Resist_Both'] = ((data['ASP_resist'] == 1) & (data['Clop_resist'] == 1)).astype(int)
# data['Resist_Neither'] = ((data['ASP_resist'] == 0) & (data['Clop_resist'] == 0)).astype(int)


In [33]:
data.describe()

Unnamed: 0,sex,age,adp,aspi,asp,clop,asp_resist,clop_resist,either,one,both,none,graft_thrombosis
count,163.0,164.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0
mean,0.742331,71.810976,57.214724,41.736196,0.91411,0.754601,0.447853,0.404908,0.631902,0.423313,0.208589,0.355828,0.190184
std,0.438698,10.668784,31.19171,28.230495,0.281064,0.431649,0.498806,0.492387,0.483774,0.495607,0.407552,0.480239,0.393656
min,0.0,38.0,5.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,65.0,35.0,24.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,72.0,52.0,33.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
75%,1.0,79.0,75.5,54.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
max,1.0,96.0,171.0,197.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [34]:
sex_mode = data['sex'].mode()[0]
data['sex'].fillna(sex_mode, inplace=True)

In [9]:
data['adp'].fillna(data['adp'].mean(), inplace=True)
data['aspi'].fillna(data['aspi'].mean(), inplace=True)

In [36]:
data.describe()

Unnamed: 0,sex,age,adp,aspi,asp,clop,asp_resist,clop_resist,either,one,both,none,graft_thrombosis
count,164.0,164.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0
mean,0.743902,71.810976,57.214724,41.736196,0.91411,0.754601,0.447853,0.404908,0.631902,0.423313,0.208589,0.355828,0.190184
std,0.437813,10.668784,31.19171,28.230495,0.281064,0.431649,0.498806,0.492387,0.483774,0.495607,0.407552,0.480239,0.393656
min,0.0,38.0,5.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,65.0,35.0,24.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,72.0,52.0,33.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
75%,1.0,79.0,75.5,54.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
max,1.0,96.0,171.0,197.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [37]:
rows_with_graft_thrombosis_null = data[data['graft_thrombosis'].isnull()]

rows_with_graft_thrombosis_null

Unnamed: 0,sex,age,adp,aspi,asp,clop,asp_resist,clop_resist,either,one,both,none,graft_thrombosis
163,1.0,72.0,,,,,,,,,,,


In [40]:
data = data.dropna(subset=['graft_thrombosis'])
data.describe()

Unnamed: 0,sex,age,adp,aspi,asp,clop,asp_resist,clop_resist,either,one,both,none,graft_thrombosis
count,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0,163.0
mean,0.742331,71.809816,57.214724,41.736196,0.91411,0.754601,0.447853,0.404908,0.631902,0.423313,0.208589,0.355828,0.190184
std,0.438698,10.701651,31.19171,28.230495,0.281064,0.431649,0.498806,0.492387,0.483774,0.495607,0.407552,0.480239,0.393656
min,0.0,38.0,5.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,65.0,35.0,24.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,72.0,52.0,33.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
75%,1.0,79.0,75.5,54.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0
max,1.0,96.0,171.0,197.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [45]:
data.drop(['either', 'one', 'both', 'none'], axis=1, inplace=True)
data.to_csv('cleaned_antiplatelet.csv', index=False)

In [None]:
from scipy.stats import chi2_contingency
# Re-categorizing based on resistance to either drug in the updated dataset
data['either_resist'] = ((data['asp_resist'] == 1) | (data['clop_resist'] == 1))

# Create a contingency table for 'either_resist' vs 'graft_thrombosis' in the updated dataset
contingency_table_either_resist = pd.crosstab(data['either_resist'], data['graft_thrombosis'])

# Perform the chi-square test on the updated data
chi2, p_val, dof, expected = chi2_contingency(contingency_table_either_resist)

contingency_table_either_resist, chi2, p_val


(graft_thrombosis  0.0  1.0
 either_resist             
 False              53    5
 True               79   26,
 5.315769596088006,
 0.02113325726279668)

Contingency Table:
	0.0 (No Thrombosis)	1.0 (Thrombosis)
Not Resistant	53	5
Resistant	79	26

In [47]:
# Looking at neither drug resistant patients
# Categorizing patients into those who are not resistant to either drug
data['neither_resist'] = ((data['asp_resist'] == 0) & (data['clop_resist'] == 0))

# Creating the contingency table for the chi-square test focusing on the 'neither_resist' group
contingency_table_neither_resist = pd.crosstab(data['neither_resist'], data['graft_thrombosis'])

# Performing the chi-square test of independence on this new categorization
chi2_neither_resist, p_neither_resist, dof_neither_resist, expected_neither_resist = chi2_contingency(contingency_table_neither_resist)

contingency_table_neither_resist, chi2_neither_resist, p_neither_resist


(graft_thrombosis  0.0  1.0
 neither_resist            
 False              79   26
 True               53    5,
 5.315769596088005,
 0.021133257262796742)

Contingency Table:
	0.0 (No Thrombosis)	1.0 (Thrombosis)
Resistant to Either	79	26
Not Resistant to Either	53	5

In [48]:
# Looking at patients resistant to both drugs
# Categorizing patients based on resistance to both drugs
data['both_resist'] = ((data['asp_resist'] == 1) & (data['clop_resist'] == 1))

# Creating the contingency table for the chi-square test focusing on the 'both_resist' categorization
contingency_table_both_resist = pd.crosstab(data['both_resist'], data['graft_thrombosis'])

# Performing the chi-square test of independence for this categorization
chi2_both_resist, p_both_resist, dof_both_resist, expected_both_resist = chi2_contingency(contingency_table_both_resist)

contingency_table_both_resist, chi2_both_resist, p_both_resist


(graft_thrombosis  0.0  1.0
 both_resist               
 False             109   20
 True               23   11,
 3.9262221276130083,
 0.047538625323607135)

Contingency Table:
	0.0 (No Thrombosis)	1.0 (Thrombosis)
Not Resistant to Both	109	20
Resistant to Both	23	11