# Project 1: Heart diseases

## Imports

In [1]:
import pandas as pd
from tabula import read_pdf
import matplotlib.pyplot as plt
import numpy as np

## Contents
- [0. Load data](#0.-Load-data)
- [1. Columns](#1.-Columns)
- [2. Clean Up](#2.-Clean-Up)

## 0. Load data

In [20]:
df_H = pd.read_excel('datos\\Hungarian_Switzerland.xlsx', sheet_name=0)
df_S = pd.read_excel('datos\\Hungarian_Switzerland.xlsx', sheet_name=1)
df_Clev = pd.read_csv('datos\\processed.cleveland.data', header = None)
df_LB = read_pdf('datos\\LongBeachData.pdf', pages ='all')

In [21]:
df_Clev.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [22]:
df_H.head()

Unnamed: 0.1,Unnamed: 0,age,sex,Chest pain type,resting blood pressure,serum cholestoral,fasting blood sugar,Resting EEG,maximum heart rate,exercise induced angina,ST depression,slope ST,number of major vessels,thal,diagnosis of heart disease
0,Patient 1,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,Patient 2,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,Patient 3,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,Patient 4,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,Patient 5,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


In [23]:
df_S.head()

Unnamed: 0.1,Unnamed: 0,age,sex,Chest pain type,resting blood pressure,serum cholestoral,fasting blood sugar,Resting EEG,maximum heart rate,exercise induced angina,ST depression,slope ST,number of major vessels,thal,diagnosis of heart disease
0,Patient 1,32,1,1,95,0,?,0,127,0,.7,1,?,?,1
1,Patient 2,34,1,4,115,0,?,?,154,0,.2,1,?,?,1
2,Patient 3,35,1,4,?,0,?,0,130,1,?,?,?,7,3
3,Patient 4,36,1,4,110,0,?,0,125,1,1,2,?,6,1
4,Patient 5,38,0,4,105,0,?,0,166,0,2.8,1,?,?,2


In [24]:
df_LB[0]

Unnamed: 0.1,Unnamed: 0,age,sex,cp,rbtrestbpsp,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,Patient 1,63,1,4,140,260,0,1,112,1,3,2,?,?,2
1,Patient 2,44,1,4,130,209,0,1,127,0,0,?,?,?,0
2,Patient 3,60,1,4,132,218,0,1,140,1,1.5,3,?,?,2
3,Patient 4,55,1,4,142,228,0,1,149,1,2.5,1,?,?,1
4,Patient 5,66,1,3,110,213,1,2,99,1,1.3,2,?,?,0
5,Patient 6,66,1,3,120,0,0,1,120,0,-0.5,1,?,?,0
6,Patient 7,65,1,4,150,236,1,1,105,1,0,?,?,?,3
7,Patient 8,60,1,3,180,0,0,1,140,1,1.5,2,?,?,0
8,Patient 9,60,1,3,120,0,?,0,141,1,2,1,?,?,3
9,Patient 10,60,1,2,160,267,1,1,157,0,0.5,2,?,?,1


In [25]:
df_LB[1]

Unnamed: 0,Patient 32,77,1,4,124,171,0,1.1,110,1.2,2,1.3,?,?.1,3
0,Patient 33,63,1,4,160,230,1,0,105,1,1,2,?,?,3
1,Patient 34,55,1,3,0,0,0,0,155,0,1.5,2,?,?,3
2,Patient 35,52,1,3,122,0,0,0,110,1,2,3,?,?,2
3,Patient 36,64,1,4,144,0,0,1,122,1,1,2,?,?,3
4,Patient 37,60,1,4,?,281,0,1,?,?,?,?,?,?,2
5,Patient 38,60,1,4,120,0,0,0,133,1,2,1,?,7,0
6,Patient 39,58,1,4,?,203,1,0,?,?,?,?,?,?,1
7,Patient 40,59,1,4,154,0,0,1,131,1,1.5,?,0,?,0
8,Patient 41,61,1,3,120,0,0,0,80,1,0,2,?,?,3
9,Patient 42,40,1,4,125,0,1,0,165,0,0,?,?,7,1


## 1. Columns

### Complete attribute documentation:
- 1 id: patient identification number
- 3 age: age in years
- 4 sex: sex (1 = male; 0 = female)
- 9 cp: chest pain type
    - Value 1: typical angina
    - Value 2: atypical angina
    - Value 3: non-anginal pain
    - Value 4: asymptomatic
- 10 trestbps: resting blood pressure (in mm Hg on admission to the 
        hospital)
- 12 chol: serum cholestoral in mg/dl
- 16 fbs: (fasting blood sugar > 120 mg/dl)  (1 = true; 0 = false)
- 19 restecg: resting electrocardiographic results
    - Value 0: normal
    - Value 1: having ST-T wave abnormality (T wave inversions and/or ST 
            elevation or depression of > 0.05 mV)
    - Value 2: showing probable or definite left ventricular hypertrophy
            by Estes' criteria
- 32 thalach: maximum heart rate achieved
- 38 exang: exercise induced angina (1 = yes; 0 = no)
- 40 oldpeak = ST depression induced by exercise relative to rest
- 41 slope: the slope of the peak exercise ST segment
    - Value 1: upsloping
    - Value 2: flat
    - Value 3: downsloping
- 44 ca: number of major vessels (0-3) colored by flourosopy
- 51 thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
     
- predicted

In [26]:
column_names = ['id',' age', 'sex', 'cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal','predicted']  
print(len(column_names))

15


In [27]:
new_column_name = 'id'
new_column_value = [f'Patient {i+1}' for i in range(len(df_Clev))]
df_Clev.insert(0, new_column_name, new_column_value)

In [28]:
df_LB_nohead = {}
for i, df in enumerate(df_LB[1:7]):
    header_row = [df.columns.tolist()]
    header_df = pd.DataFrame(header_row, columns=df.columns)
    df_with_header = pd.concat([header_df, df], ignore_index=True)
    df_LB_nohead[i] = df_with_header

In [29]:
type(df_LB_nohead)

dict

In [30]:
df_S.columns = column_names
df_H.columns = column_names
df_Clev.columns = column_names
df_LB[0].columns = column_names
for _, df in df_LB_nohead.items():
    df.columns = column_names

In [31]:
df_LB_nohead[0]

Unnamed: 0,id,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,predicted
0,Patient 32,77,1,4,124,171,0,1.1,110,1.2,2,1.3,?,?.1,3
1,Patient 33,63,1,4,160,230,1,0.0,105,1,1,2,?,?,3
2,Patient 34,55,1,3,0,0,0,0.0,155,0,1.5,2,?,?,3
3,Patient 35,52,1,3,122,0,0,0.0,110,1,2,3,?,?,2
4,Patient 36,64,1,4,144,0,0,1.0,122,1,1,2,?,?,3
5,Patient 37,60,1,4,?,281,0,1.0,?,?,?,?,?,?,2
6,Patient 38,60,1,4,120,0,0,0.0,133,1,2,1,?,7,0
7,Patient 39,58,1,4,?,203,1,0.0,?,?,?,?,?,?,1
8,Patient 40,59,1,4,154,0,0,1.0,131,1,1.5,?,0,?,0
9,Patient 41,61,1,3,120,0,0,0.0,80,1,0,2,?,?,3


In [32]:
df_with_header = pd.concat([df_S, df_H, df_Clev, df_LB[0], df_LB_nohead[0], df_LB_nohead[1], df_LB_nohead[2], df_LB_nohead[3], df_LB_nohead[4], df_LB_nohead[5]], ignore_index=True)

In [33]:
df_with_header

Unnamed: 0,id,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,predicted
0,Patient 1,32,1,1,95,0,?,0,127,0,.7,1,?,?,1
1,Patient 2,34,1,4,115,0,?,?,154,0,.2,1,?,?,1
2,Patient 3,35,1,4,?,0,?,0,130,1,?,?,?,7,3
3,Patient 4,36,1,4,110,0,?,0,125,1,1,2,?,6,1
4,Patient 5,38,0,4,105,0,?,0,166,0,2.8,1,?,?,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
915,Patient 196,54,0,4,127,333,1,1,154,0,0,?,?,?,1
916,Patient 197,62,1,1,?,139,0,1,?,?,?,?,?,?,0
917,Patient 198,55,1,4,122,223,1,1,100,0,0,?,?,6,2
918,Patient 199,58,1,4,?,385,1,2,?,?,?,?,?,?,0


## 2. Clean Up