In [47]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# **01). Import dataset and Explore dataset type and Quntity**




In [48]:
data = pd.read_csv('/content/heart_disease.csv')
data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40.0,M,ATA,140,289.0,0.0,Normal,172.0,N,0.0,Up,0.0
1,49.0,F,NAP,160,180.0,0.0,Normal,156.0,N,1.0,Flat,1.0
2,37.0,M,ATA,130,283.0,0.0,ST,98.0,N,0.0,Up,0.0
3,48.0,F,,138,214.0,0.0,Normal,108.0,Y,1.5,Flat,1.0
4,54.0,M,NAP,150,195.0,0.0,Normal,122.0,N,0.0,Up,0.0


In [51]:
print('Shape of the data is ', data.shape)

Shape of the data is  (918, 12)


In [52]:
data.dtypes

Unnamed: 0,0
Age,float64
Sex,object
ChestPainType,object
RestingBP,int64
Cholesterol,float64
FastingBS,float64
RestingECG,object
MaxHR,float64
ExerciseAngina,object
Oldpeak,float64


# **02). Dataset Preprocessing**

In [53]:
# Check for null values
print(data.isnull().sum())

Age                5
Sex                6
ChestPainType      5
RestingBP          0
Cholesterol        6
FastingBS          1
RestingECG        13
MaxHR              2
ExerciseAngina     2
Oldpeak            0
ST_Slope           5
HeartDisease       2
dtype: int64


In [54]:
# Drop All null Values
data = data.dropna()

In [55]:
print(data.isnull().sum())
print(data.shape)

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64
(875, 12)


In [56]:
#print characters colum values
print(data['Sex'].unique())
print(data['ChestPainType'].unique())
print(data['RestingECG'].unique())
print(data['ExerciseAngina'].unique())
print(data['ST_Slope'].unique())

['M' 'F']
['ATA' 'NAP' 'ASY' 'TA']
['Normal' 'ST' 'LVH']
['N' 'Y']
['Up' 'Flat' 'Down']


In [57]:
# Sample columns that need encoding
binary_columns = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

# Encode binary columns with 1 and 0
binary_mapping = {
    'M': 1, 'F': 0,
    'ATA': 0, 'NAP': 1, 'ASY': 2, 'TA': 3,
    'Normal': 0, 'ST': 1, 'LVH': 2,
    'N': 0, 'Y': 1,
    'Up': 0, 'Flat': 1, 'Down': 2

}

for col in binary_columns:
    data[col] = data[col].map(binary_mapping)

# Check encoding results
print(data.head())

    Age  Sex  ChestPainType  RestingBP  Cholesterol  FastingBS  RestingECG  \
0  40.0    1              0        140        289.0        0.0           0   
1  49.0    0              1        160        180.0        0.0           0   
2  37.0    1              0        130        283.0        0.0           1   
4  54.0    1              1        150        195.0        0.0           0   
5  39.0    1              1        120        339.0        0.0           0   

   MaxHR  ExerciseAngina  Oldpeak  ST_Slope  HeartDisease  
0  172.0               0      0.0         0           0.0  
1  156.0               0      1.0         1           1.0  
2   98.0               0      0.0         0           0.0  
4  122.0               0      0.0         0           0.0  
5  170.0               0      0.0         0           0.0  


In [58]:
data.dtypes

Unnamed: 0,0
Age,float64
Sex,int64
ChestPainType,int64
RestingBP,int64
Cholesterol,float64
FastingBS,float64
RestingECG,int64
MaxHR,float64
ExerciseAngina,int64
Oldpeak,float64


In [59]:
print(data)

      Age  Sex  ChestPainType  RestingBP  Cholesterol  FastingBS  RestingECG  \
0    40.0    1              0        140        289.0        0.0           0   
1    49.0    0              1        160        180.0        0.0           0   
2    37.0    1              0        130        283.0        0.0           1   
4    54.0    1              1        150        195.0        0.0           0   
5    39.0    1              1        120        339.0        0.0           0   
..    ...  ...            ...        ...          ...        ...         ...   
913  45.0    1              3        110        264.0        0.0           0   
914  68.0    1              2        144        193.0        1.0           0   
915  57.0    1              2        130        131.0        0.0           0   
916  57.0    0              0        130        236.0        0.0           2   
917  38.0    1              1        138        175.0        0.0           0   

     MaxHR  ExerciseAngina  Oldpeak  ST

# **03). Dataset Training**

In [60]:
# Separate features and target variable
X = data.drop('HeartDisease', axis=1)  # Features
y = data['HeartDisease']              # Target


In [61]:
print(X)

      Age  Sex  ChestPainType  RestingBP  Cholesterol  FastingBS  RestingECG  \
0    40.0    1              0        140        289.0        0.0           0   
1    49.0    0              1        160        180.0        0.0           0   
2    37.0    1              0        130        283.0        0.0           1   
4    54.0    1              1        150        195.0        0.0           0   
5    39.0    1              1        120        339.0        0.0           0   
..    ...  ...            ...        ...          ...        ...         ...   
913  45.0    1              3        110        264.0        0.0           0   
914  68.0    1              2        144        193.0        1.0           0   
915  57.0    1              2        130        131.0        0.0           0   
916  57.0    0              0        130        236.0        0.0           2   
917  38.0    1              1        138        175.0        0.0           0   

     MaxHR  ExerciseAngina  Oldpeak  ST

In [62]:
print(y)

0      0.0
1      1.0
2      0.0
4      0.0
5      0.0
      ... 
913    1.0
914    1.0
915    1.0
916    1.0
917    0.0
Name: HeartDisease, Length: 875, dtype: float64


In [63]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [64]:
# Train the SVM model
svm_model = SVC(kernel='rbf', C=1, gamma='scale', random_state=42)
svm_model.fit(X_train, y_train)

In [65]:
# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

NameError: name 'accuracy_score' is not defined