## importation des blibliotheque

In [37]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.pyplot as plt



# 1 chargement des données

In [38]:
data=pd.read_csv('data/heart_dataset.csv')

# 2 analyse des données

In [39]:
data.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168.0,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155.0,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125.0,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161.0,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106.0,0,1.9,1,3,2,0


In [40]:
data.shape

(1025, 14)

In [41]:
data.info()

<class 'pandas.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   str    
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   978 non-null    float64
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(2), int64(11), str(1)
memory usage: 112.2 KB


In [42]:
data.isnull().sum()

age          0
sex          0
cp           0
trestbps     0
chol         0
fbs          0
restecg      0
thalach     47
exang        0
oldpeak      0
slope        0
ca           0
thal         0
target       0
dtype: int64

# 3 pretraitement des données

In [43]:
# Gestion des valeurs manquantes
data['thalach']=data['thalach'].fillna(data['thalach'].mean())

data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [44]:
data['chol'].unique()

<StringArray>
['212', '203', '174', '294', '248', '318', '289', '249', '286', '149',
 ...
 '305', '168', '407', '290', '277', '262', '195', '166', '178', '141']
Length: 153, dtype: str

In [45]:
data['chol'] = pd.to_numeric(data['chol'], errors='coerce')


In [46]:
data.info()

<class 'pandas.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1024 non-null   float64
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   float64
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(3), int64(11)
memory usage: 112.2 KB


In [47]:
data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212.0,0,1,168.0,0,1.0,2,2,3,0
1,53,1,0,140,203.0,1,0,155.0,1,3.1,0,0,3,0
2,70,1,0,145,174.0,0,1,125.0,1,2.6,0,0,3,0
3,61,1,0,148,203.0,0,1,161.0,0,0.0,2,1,3,0
4,62,0,0,138,294.0,1,1,106.0,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221.0,0,1,164.0,1,0.0,2,0,2,1
1021,60,1,0,125,258.0,0,0,141.0,1,2.8,1,1,3,0
1022,47,1,0,110,275.0,0,0,118.0,1,1.0,1,1,2,0
1023,50,0,0,110,254.0,0,0,159.0,0,0.0,2,0,2,1


# 4 Diviser les données

In [48]:
data.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='str')

In [49]:
x = data.iloc[ : , :-1]
y = data.iloc[ : , -1]

In [50]:
x

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,52,1,0,125,212.0,0,1,168.0,0,1.0,2,2,3
1,53,1,0,140,203.0,1,0,155.0,1,3.1,0,0,3
2,70,1,0,145,174.0,0,1,125.0,1,2.6,0,0,3
3,61,1,0,148,203.0,0,1,161.0,0,0.0,2,1,3
4,62,0,0,138,294.0,1,1,106.0,0,1.9,1,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221.0,0,1,164.0,1,0.0,2,0,2
1021,60,1,0,125,258.0,0,0,141.0,1,2.8,1,1,3
1022,47,1,0,110,275.0,0,0,118.0,1,1.0,1,1,2
1023,50,0,0,110,254.0,0,0,159.0,0,0.0,2,0,2


In [51]:
y

0       0
1       0
2       0
3       0
4       0
       ..
1020    1
1021    0
1022    0
1023    1
1024    0
Name: target, Length: 1025, dtype: int64

In [52]:
x_tensor=torch.tensor(x.values, dtype=torch.float32)
y_tensor=torch.tensor(y.values,dtype=torch.float32)