####  Data Dictionary 
`age` - Age of the patient

`sex` - Sex of the patient

`cp` - Chest pain type ~ 0 = Typical Angina, 1 = Atypical Angina, 2 = Non-anginal Pain, 3 = Asymptomatic

`trtbps` - Resting blood pressure (in mm Hg)

`chol` - Cholestoral in mg/dl fetched via BMI sensor

`fbs` - (fasting blood sugar > 120 mg/dl) ~ 1 = True, 0 = False

`restecg` - Resting electrocardiographic results ~ 0 = Normal, 1 = ST-T wave normality, 2 = Left ventricular hypertrophy

`thalachh`  - Maximum heart rate achieved

`oldpeak` - Previous peak

`slp` - Slope

`caa` - Number of major vessels 

`thall` - Thalium Stress Test result ~ (0,3)

`exng` - Exercise induced angina ~ 1 = Yes, 0 = No

`output` - Target variable

<h2>Importing The Modules</h2>

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 

%matplotlib inline 
pd.options.mode.chained_assignment = None

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('cleveland.csv')
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0.0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2.0
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1.0
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0.0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45.0,1.0,1.0,110.0,264.0,0.0,0.0,132.0,0.0,1.2,2.0,0.0,7.0,1.0
299,68.0,1.0,4.0,144.0,193.0,1.0,0.0,141.0,0.0,3.4,2.0,2.0,7.0,2.0
300,57.0,1.0,4.0,130.0,131.0,0.0,0.0,115.0,1.0,1.2,2.0,1.0,7.0,3.0
301,57.0,0.0,2.0,130.0,236.0,0.0,2.0,174.0,0.0,0.0,2.0,1.0,3.0,1.0


In [4]:
df1 = pd.read_csv('va.csv')
df1

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,4,140,260,0,1,112,1,3,2,?,?,2
1,44,1,4,130,209,0,1,127,0,0,?,?,?,0
2,60,1,4,132,218,0,1,140,1,1.5,3,?,?,2
3,55,1,4,142,228,0,1,149,1,2.5,1,?,?,1
4,66,1,3,110,213,1,2,99,1,1.3,2,?,?,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,54,0,4,127,333,1,1,154,0,0,?,?,?,1
196,62,1,1,?,139,0,1,?,?,?,?,?,?,0
197,55,1,4,122,223,1,1,100,0,0,?,?,6,2
198,58,1,4,?,385,1,2,?,?,?,?,?,?,0


In [5]:
df2= pd.read_csv('switzerland.csv')
df2

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,32,1,1,95,0,?,0,127,0,.7,1,?,?,1
1,34,1,4,115,0,?,?,154,0,.2,1,?,?,1
2,35,1,4,?,0,?,0,130,1,?,?,?,7,3
3,36,1,4,110,0,?,0,125,1,1,2,?,6,1
4,38,0,4,105,0,?,0,166,0,2.8,1,?,?,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,70,1,4,115,0,0,1,92,1,0,2,?,7,1
119,70,1,4,140,0,1,0,157,1,2,2,?,7,3
120,72,1,3,160,0,?,2,114,0,1.6,2,2,?,0
121,73,0,3,160,0,0,1,121,0,0,1,?,3,1


In [6]:
df3= pd.read_csv('hungarian.csv')
df3

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289,52,1,4,160,331,0,0,94,1,2.5,?,?,?,1
290,54,0,3,130,294,0,1,100,1,0.0,2,?,?,1
291,56,1,4,155,342,1,0,150,1,3.0,2,?,?,1
292,58,0,2,180,393,0,0,110,1,1.0,2,?,7,1


<h3> Inspecting The Forth Dataset Separately </h3>

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    float64
 1   sex       303 non-null    float64
 2   cp        303 non-null    float64
 3   trestbps  303 non-null    float64
 4   chol      303 non-null    float64
 5   fbs       303 non-null    float64
 6   restecg   303 non-null    float64
 7   thalach   303 non-null    float64
 8   exang     303 non-null    float64
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    float64
 11  ca        303 non-null    object 
 12  thal      303 non-null    object 
 13  target    287 non-null    float64
dtypes: float64(12), object(2)
memory usage: 33.3+ KB


In [8]:
df.describe(include='object')

Unnamed: 0,ca,thal
count,303.0,303.0
unique,7.0,8.0
top,0.0,3.0
freq,173.0,160.0


In [9]:
df.duplicated().sum()

0

In [10]:
df.isna().sum()

age          0
sex          0
cp           0
trestbps     0
chol         0
fbs          0
restecg      0
thalach      0
exang        0
oldpeak      0
slope        0
ca           0
thal         0
target      16
dtype: int64

In [11]:
df['target'].isna().sum()

16

In [12]:
df[df['target'].isna()]

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
72,62.0,1.0,4.0,120.0,267.0,0.0,0.0,99.0,0.0,0.8,0.0,.0,.0,
87,53.0,0.0,3.0,128.0,216.0,0.0,2.0,115.0,0.0,0.0,1.0,0.0,"?,0",
114,62.0,0.0,3.0,130.0,263.0,0.0,0.0,97.0,0.0,0.2,0.0,.0,.0,
131,51.0,1.0,3.0,94.0,27.0,0.0,0.0,54.0,0.0,0.0,0.0,.0,.0,
154,64.0,1.0,4.0,120.0,246.0,0.0,2.0,96.0,0.0,0.2,0.0,.0,.0,
166,52.0,1.0,3.0,138.0,223.0,0.0,0.0,169.0,0.0,0.0,1.0,"?,3",00,
175,57.0,1.0,4.0,152.0,274.0,0.0,0.0,88.0,0.0,0.2,0.0,.0,.0,
192,43.0,1.0,4.0,132.0,247.0,1.0,2.0,143.0,1.0,0.1,2.0,"?,7",01,
222,39.0,0.0,3.0,94.0,99.0,0.0,0.0,79.0,0.0,0.0,0.0,.0,.0,
223,53.0,1.0,4.0,123.0,282.0,0.0,0.0,95.0,0.0,0.0,0.0,.0,.0,


In [13]:
df['ca'].unique()

array(['0.0', '3.0', '2.0', '1.0', '.0', '?,3', '?,7'], dtype=object)

In [14]:
df['thal'].unique()

array(['6.0', '3.0', '7.0', '.0', '?,0', '0,0', '0,1', '?,2'],
      dtype=object)

In [15]:
df[df['ca'] == '?,3']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
166,52.0,1.0,3.0,138.0,223.0,0.0,0.0,169.0,0.0,0.0,1.0,"?,3",0,
302,38.0,1.0,3.0,138.0,175.0,0.0,0.0,173.0,0.0,0.0,1.0,"?,3",0,


In [16]:
df[df['ca'] == '?,7']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
192,43.0,1.0,4.0,132.0,247.0,1.0,2.0,143.0,1.0,0.1,2.0,"?,7",1,
287,58.0,1.0,2.0,125.0,220.0,0.0,0.0,144.0,0.0,0.4,2.0,"?,7",0,


In [17]:
df[df['thal'] == '?,2']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
266,52.0,1.0,4.0,128.0,204.0,1.0,0.0,156.0,1.0,1.0,2.0,0.0,"?,2",


In [18]:
df = df.dropna()

In [19]:
df.describe(include='object')

Unnamed: 0,ca,thal
count,287.0,287.0
unique,4.0,3.0
top,0.0,3.0
freq,171.0,160.0


In [20]:
df['ca'].unique()

array(['0.0', '3.0', '2.0', '1.0'], dtype=object)

In [21]:
df['thal'].unique()

array(['6.0', '3.0', '7.0'], dtype=object)

In [22]:
df = df.apply(pd.to_numeric)

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 287 entries, 0 to 301
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       287 non-null    float64
 1   sex       287 non-null    float64
 2   cp        287 non-null    float64
 3   trestbps  287 non-null    float64
 4   chol      287 non-null    float64
 5   fbs       287 non-null    float64
 6   restecg   287 non-null    float64
 7   thalach   287 non-null    float64
 8   exang     287 non-null    float64
 9   oldpeak   287 non-null    float64
 10  slope     287 non-null    float64
 11  ca        287 non-null    float64
 12  thal      287 non-null    float64
 13  target    287 non-null    float64
dtypes: float64(14)
memory usage: 33.6 KB


--------------------------------------

In [24]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   age       200 non-null    int64 
 1   sex       200 non-null    int64 
 2   cp        200 non-null    int64 
 3   trestbps  200 non-null    object
 4   chol      200 non-null    object
 5   fbs       200 non-null    object
 6   restecg   200 non-null    int64 
 7   thalach   200 non-null    object
 8   exang     200 non-null    object
 9   oldpeak   200 non-null    object
 10  slope     200 non-null    object
 11  ca        200 non-null    object
 12  thal      200 non-null    object
 13  target    200 non-null    int64 
dtypes: int64(5), object(9)
memory usage: 22.0+ KB


In [25]:
df1.describe(include='object')

Unnamed: 0,trestbps,chol,fbs,thalach,exang,oldpeak,slope,ca,thal
count,200,200,200,200,200,200,200,200,200
unique,41,100,3,60,3,15,4,2,4
top,?,0,0,?,1,?,?,?,?
freq,56,49,125,53,95,56,102,198,166


In [26]:
df1['chol'].unique()

array(['260', '209', '218', '228', '213', '0', '236', '267', '166', '220',
       '177', '186', '100', '171', '230', '281', '203', '277', '233',
       '240', '153', '224', '316', '311', '270', '217', '214', '252',
       '339', '216', '276', '458', '241', '384', '297', '248', '308',
       '208', '227', '210', '245', '225', '198', '195', '161', '258',
       '235', '305', '223', '282', '349', '?', '160', '312', '283', '142',
       '211', '306', '222', '202', '197', '204', '274', '192', '298',
       '272', '200', '261', '181', '221', '175', '219', '310', '232',
       '273', '182', '292', '289', '193', '170', '369', '173', '271',
       '244', '285', '243', '237', '165', '287', '256', '264', '226',
       '207', '284', '337', '254', '300', '333', '139', '385'],
      dtype=object)

In [27]:
df1[df1['chol'] == '?']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
107,59,1,4,124,?,0,0,117,1,1,2,?,?,1
125,76,1,3,104,?,0,2,120,0,3.5,3,?,?,4
155,69,1,4,?,?,1,0,?,?,?,?,?,?,2
157,72,1,4,160,?,1,2,130,0,1.5,?,?,?,2
164,51,1,4,?,?,1,2,?,?,?,?,?,7,1
165,48,1,4,140,?,0,0,159,1,1.5,1,?,?,3
181,55,1,3,?,?,0,1,?,?,?,?,?,?,0


In [28]:
df1['chol'] = df1['chol'].astype('str')

df1['chol'] = df1['chol'].str.strip()

df1['chol'] = df1['chol'].apply(pd.to_numeric, errors='coerce')

In [29]:
df1['chol'] = df1['chol'].astype('float')

In [30]:
df1['chol'].unique()

array([260., 209., 218., 228., 213.,   0., 236., 267., 166., 220., 177.,
       186., 100., 171., 230., 281., 203., 277., 233., 240., 153., 224.,
       316., 311., 270., 217., 214., 252., 339., 216., 276., 458., 241.,
       384., 297., 248., 308., 208., 227., 210., 245., 225., 198., 195.,
       161., 258., 235., 305., 223., 282., 349.,  nan, 160., 312., 283.,
       142., 211., 306., 222., 202., 197., 204., 274., 192., 298., 272.,
       200., 261., 181., 221., 175., 219., 310., 232., 273., 182., 292.,
       289., 193., 170., 369., 173., 271., 244., 285., 243., 237., 165.,
       287., 256., 264., 226., 207., 284., 337., 254., 300., 333., 139.,
       385.])

In [31]:
df1['trestbps'].unique()

array(['140', '130', '132', '142', '110', '120', '150', '180', '160',
       '126', '?', '128', '170', '152', '116', '124', '0', '122', '144',
       '154', '125', '104', '136', '134', '138', '178', '146', '135',
       '158', '106', '112', '102', '96', '172', '155', '156', '118',
       '100', '190', '114', '127'], dtype=object)

In [32]:
df1[df1 == '?']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,,,,,,,,,,,,?,?,
1,,,,,,,,,,,?,?,?,
2,,,,,,,,,,,,?,?,
3,,,,,,,,,,,,?,?,
4,,,,,,,,,,,,?,?,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,,,,,,,,,,,?,?,?,
196,,,,?,,,,?,?,?,?,?,?,
197,,,,,,,,,,,?,?,,
198,,,,?,,,,?,?,?,?,?,?,


In [33]:
#df1.drop(df1[df1 == '?'],inplace=True)


In [34]:
df1['trestbps'] = df1['trestbps'].astype('str')

df1['trestbps'] = df1['trestbps'].str.strip()

df1['trestbps'] = df1['trestbps'].apply(pd.to_numeric, errors='coerce')

In [35]:
df1[df1['trestbps'] == '?']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target


In [36]:
df1['trestbps'].unique()

array([140., 130., 132., 142., 110., 120., 150., 180., 160., 126.,  nan,
       128., 170., 152., 116., 124.,   0., 122., 144., 154., 125., 104.,
       136., 134., 138., 178., 146., 135., 158., 106., 112., 102.,  96.,
       172., 155., 156., 118., 100., 190., 114., 127.])

In [37]:
df1['trestbps'] = df1['trestbps'].astype('float')

In [38]:
df1['fbs'].unique()

array(['0', '1', '?'], dtype=object)

In [39]:
df1['fbs'] = df1['fbs'].astype('str')

df1['fbs'] = df1['fbs'].str.strip()

df1['fbs'] = df1['fbs'].apply(pd.to_numeric, errors='coerce')

In [40]:
df1['fbs'] = df1['fbs'].astype('float')

In [41]:
df1['fbs'].unique()

array([ 0.,  1., nan])

In [42]:
df1['fbs'] = df1['fbs'].dropna()

In [43]:
df1[df1['fbs'] == 'nan']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target


In [44]:
df1['restecg'].unique()

array([1, 2, 0], dtype=int64)

In [45]:
df1['restecg'] = df1['restecg'].astype('str')

df1['restecg'] = df1['restecg'].str.strip()

df1['restecg'] = df1['restecg'].apply(pd.to_numeric, errors='coerce')

In [46]:
df1['restecg'] = df1['restecg'].astype('float')

In [47]:
df1['restecg'].unique()

array([1., 2., 0.])

In [48]:
df1['restecg'] = df1['restecg'].astype('float')

In [49]:
df1['thalach'].unique()

array(['112', '127', '140', '149', '99', '120', '105', '141', '157',
       '117', '?', '148', '86', '84', '125', '118', '124', '106', '111',
       '180', '129', '110', '155', '122', '133', '131', '80', '165',
       '107', '128', '160', '97', '161', '130', '108', '123', '144',
       '102', '145', '69', '138', '150', '88', '132', '121', '135', '100',
       '162', '73', '154', '115', '119', '159', '94', '113', '98', '96',
       '151', '126', '93'], dtype=object)

In [50]:
df1['thalach'] = df1['thalach'].astype('str')

df1['thalach'] = df1['thalach'].str.strip()

df1['thalach'] = df1['thalach'].apply(pd.to_numeric, errors='coerce')

df1['thalach'] = df1['thalach'].astype('float')

In [51]:
df1['exang'].unique()

array(['1', '0', '?'], dtype=object)

In [52]:
df1['exang'] = df1['exang'].astype('str')

df1['exang'] = df1['exang'].str.strip()

df1['exang'] = df1['exang'].apply(pd.to_numeric, errors='coerce')

In [53]:
df1['exang'] = df1['exang'].astype('float')

In [54]:
df1['oldpeak'].unique()

array(['3', '0', '1.5', '2.5', '1.3', '-0.5', '2', '0.5', '1', '?', '1.6',
       '4', '3.5', '0.8', '1.7'], dtype=object)

In [55]:
df1['oldpeak'] = df1['oldpeak'].astype('str')

df1['oldpeak'] = df1['oldpeak'].str.strip()

df1['oldpeak'] = df1['oldpeak'].apply(pd.to_numeric, errors='coerce')

df1['oldpeak'] = df1['oldpeak'].astype('float')

In [56]:
df1['slope'].unique()

array(['2', '?', '3', '1'], dtype=object)

In [57]:
df1['slope'] = df1['slope'].astype('str')

df1['slope'] = df1['slope'].str.strip()

df1['slope'] = df1['slope'].apply(pd.to_numeric, errors='coerce')

df1['slope'] = df1['slope'].astype('float')

In [58]:
df1['ca'].unique()

array(['?', '0'], dtype=object)

In [59]:
df1['ca'] = df1['ca'].astype('str')

df1['ca'] = df1['ca'].str.strip()

df1['ca'] = df1['ca'].apply(pd.to_numeric, errors='coerce')

df1['ca'] = df1['ca'].astype('float')

In [60]:
df1['thal'].unique()

array(['?', '3', '7', '6'], dtype=object)

In [61]:
df1['thal'] = df1['thal'].astype('str')

df1['thal'] = df1['thal'].str.strip()

df1['thal'] = df1['thal'].apply(pd.to_numeric, errors='coerce')

df1['thal'] = df1['thal'].astype('float')

In [62]:
df1[df1 == '?']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,,,,,,,,,,,,,,
196,,,,,,,,,,,,,,
197,,,,,,,,,,,,,,
198,,,,,,,,,,,,,,


In [63]:
df1.isna().sum()

age           0
sex           0
cp            0
trestbps     56
chol          7
fbs           7
restecg       0
thalach      53
exang        53
oldpeak      56
slope       102
ca          198
thal        166
target        0
dtype: int64

In [64]:
df1[df1.isna()]

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,,,,,,,,,,,,,,
196,,,,,,,,,,,,,,
197,,,,,,,,,,,,,,
198,,,,,,,,,,,,,,


In [65]:
df1 = df1.dropna()

In [66]:
df1

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
28,56,1,4,120.0,100.0,0.0,0.0,120.0,1.0,1.5,2.0,0.0,7.0,1


---------------------

In [67]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123 entries, 0 to 122
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   age       123 non-null    int64 
 1   sex       123 non-null    int64 
 2   cp        123 non-null    int64 
 3   trestbps  123 non-null    object
 4   chol      123 non-null    int64 
 5   fbs       123 non-null    object
 6   restecg   123 non-null    object
 7   thalach   123 non-null    object
 8   exang     123 non-null    object
 9   oldpeak   123 non-null    object
 10  slope     123 non-null    object
 11  ca        123 non-null    object
 12  thal      123 non-null    object
 13  target    123 non-null    int64 
dtypes: int64(5), object(9)
memory usage: 13.6+ KB


In [68]:
df2.describe(include='object')

Unnamed: 0,trestbps,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
count,123,123,123,123,123,123,123,123,123
unique,21,3,4,68,3,36,4,3,4
top,115,?,0,120,0,0,2,?,?
freq,14,75,85,9,68,42,61,118,52


In [89]:
df2[df2 == '?']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,,,,,,?,,,,,,?,?,
1,,,,,,?,?,,,,,?,?,
2,,,,?,,?,,,,?,?,?,,
3,,,,,,?,,,,,,?,,
4,,,,,,?,,,,,,?,?,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,,,,,,,,,,,,?,,
119,,,,,,,,,,,,?,,
120,,,,,,?,,,,,,,?,
121,,,,,,,,,,,,?,,


In [69]:
df2['trestbps'].unique()

array(['95', '115', '?', '110', '105', '100', '135', '150', '125', '145',
       '140', '155', '160', '120', '130', '165', '80', '180', '170',
       '200', '185'], dtype=object)

In [90]:
df2[df2['trestbps'] == '?']

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
2,35,1,4,?,0,?,0,130,1,?,?,?,7,3
117,69,1,4,?,0,0,1,?,?,?,?,?,7,3


In [None]:
df2.drop([2,117], axis=0, inplace=True)

In [70]:
df2['fbs'].unique()

array(['?', '0', '1'], dtype=object)

In [71]:
df2['restecg'].unique()

array(['0', '?', '1', '2'], dtype=object)

In [72]:
df2['thalach'].unique()

array(['127', '154', '130', '125', '166', '156', '179', '128', '150',
       '120', '144', '176', '99', '122', '145', '140', '138', '133',
       '113', '118', '149', '124', '110', '139', '92', '104', '170',
       '163', '60', '126', '82', '95', '115', '135', '141', '155', '83',
       '97', '98', '100', '148', '103', '121', '131', '182', '105', '175',
       '94', '119', '143', '63', '70', '77', '117', '123', '134', '72',
       '78', '109', '86', '114', '93', '67', '90', '108', '136', '?',
       '157'], dtype=object)

In [73]:
df2['exang'].unique()

array(['0', '1', '?'], dtype=object)

In [74]:
df2['oldpeak'].unique()

array(['.7', '.2', '?', '1', '2.8', '0', '-1.1', '1.6', '-1.5', '1.5',
       '2', '.5', '-.1', '-2.6', '2.1', '-.7', '2.2', '3', '.1', '.3',
       '-2', '-1', '1.8', '1.4', '2.6', '.9', '2.4', '1.1', '.4', '2.5',
       '1.7', '-.8', '-.5', '-.9', '3.7', '1.3'], dtype=object)

In [75]:
df2['slope'].unique()

array(['1', '?', '2', '3'], dtype=object)

In [76]:
df2['ca'].unique()

array(['?', '1', '2'], dtype=object)

In [77]:
df2['thal'].unique()

array(['?', '7', '6', '3'], dtype=object)

--------------

In [78]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294 entries, 0 to 293
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       294 non-null    int64  
 1   sex       294 non-null    int64  
 2   cp        294 non-null    int64  
 3   trestbps  294 non-null    object 
 4   chol      294 non-null    object 
 5   fbs       294 non-null    object 
 6   restecg   294 non-null    object 
 7   thalach   294 non-null    object 
 8   exang     294 non-null    object 
 9   oldpeak   294 non-null    float64
 10  slope     294 non-null    object 
 11  ca        294 non-null    object 
 12  thal      294 non-null    object 
 13  target    294 non-null    int64  
dtypes: float64(1), int64(4), object(9)
memory usage: 32.3+ KB


In [79]:
df3.describe(include='object')

Unnamed: 0,trestbps,chol,fbs,restecg,thalach,exang,slope,ca,thal
count,294,294,294,294,294,294,294,294,294
unique,32,154,3,4,72,3,4,2,4
top,120,?,0,0,150,0,?,?,?
freq,65,23,266,235,29,204,190,291,266


In [80]:
df3['trestbps'].unique()

array(['130', '120', '140', '170', '100', '105', '110', '125', '150',
       '98', '112', '145', '190', '160', '115', '142', '180', '132',
       '135', '?', '108', '124', '113', '122', '92', '118', '106', '200',
       '138', '136', '128', '155'], dtype=object)

In [81]:
df3['chol'].unique()

array(['132', '243', '?', '237', '219', '198', '225', '254', '298', '161',
       '214', '220', '160', '167', '308', '264', '166', '340', '209',
       '260', '211', '173', '283', '194', '223', '315', '275', '297',
       '292', '182', '200', '204', '241', '339', '147', '273', '307',
       '289', '215', '281', '250', '184', '245', '291', '295', '269',
       '196', '268', '228', '358', '201', '249', '266', '186', '207',
       '218', '412', '224', '238', '230', '163', '240', '280', '257',
       '263', '276', '284', '195', '227', '253', '187', '202', '328',
       '168', '216', '129', '190', '188', '179', '210', '272', '180',
       '100', '259', '468', '274', '320', '221', '309', '312', '171',
       '208', '246', '305', '217', '365', '344', '394', '256', '326',
       '277', '270', '229', '85', '347', '251', '222', '287', '318',
       '213', '294', '193', '271', '156', '267', '282', '117', '466',
       '247', '226', '265', '206', '288', '303', '338', '248', '306',
       '529', '3

In [82]:
df3['fbs'].unique()

array(['0', '?', '1'], dtype=object)

In [83]:
df3['restecg'].unique()

array(['2', '0', '1', '?'], dtype=object)

In [84]:
df3['thalach'].unique()

array(['185', '160', '170', '150', '165', '184', '155', '190', '168',
       '180', '178', '172', '130', '142', '98', '158', '129', '146',
       '145', '120', '106', '132', '140', '138', '167', '188', '144',
       '137', '136', '152', '175', '176', '118', '154', '115', '135',
       '122', '110', '90', '116', '174', '125', '?', '148', '100', '164',
       '139', '127', '162', '112', '134', '114', '128', '126', '124',
       '153', '166', '103', '156', '87', '102', '92', '99', '121', '91',
       '108', '96', '82', '105', '143', '119', '94'], dtype=object)

In [85]:
df3['exang'].unique()

array(['0', '1', '?'], dtype=object)

In [86]:
df3['slope'].unique()

array(['?', '2', '1', '3'], dtype=object)

In [87]:
df3['ca'].unique()

array(['?', '0'], dtype=object)

In [88]:
df3['thal'].unique()

array(['?', '6', '3', '7'], dtype=object)