In [None]:
'''from google.colab import drive
drive.mount('/content/drive')'''

In [None]:
'''%cd /content/drive/MyDrive/AI & Machine Learning/Deep Learning/ANN-Projects/cardio_dataset'''

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

## **Reading the dataset**

'''.values: Once the CSV file is read using pd.read_csv(),
the .values attribute is used to get the values from the DataFrame.
This essentially converts the data from the DataFrame format to a NumPy array, which is a versatile numerical computing library in Python.'''

In [None]:
dataset=pd.read_csv('H:\My Drive\ANN\Regression\Heart risk level prediction\cardio_dataset.csv').values
print(dataset.shape)

# **Data and Target**

In [None]:
data=dataset[:,0:7]
target=dataset[:,7]

# **Scaling / Normalized**



In [None]:
#numerical features 0-1 athara value wlta gnnwa. 0-1 ma wenna onith na. podi range ekak unhm ati
# x-min / max-min
#check the note
# This is appling for data and targets (only regression problems).

In [None]:
from sklearn.preprocessing import MinMaxScaler #library ekak scaling wlt gnna.
'''The MinMaxScaler is used for scaling numerical data to a specific range, typically between 0 and 1.'''

target=np.reshape(target, (-1,1)) #scaler ekt danna raget eka 2d wdhta tynna oni. ekai mehema kare.
'''The target variable is reshaped using NumPy's reshape function.
The (-1, 1) argument indicates that the reshaped array should have one column, and the number of rows should be automatically
determined based on the size of the original array. This is a common step when dealing with machine learning algorithms
that expect a 2D array as the target variable.'''

scaler_data = MinMaxScaler(feature_range=(0,1)) #make a object, equention = x-min / max-min
scaler_target = MinMaxScaler() #make a object

#scaler_data.fit(data) #fit the data
#scaler_target.fit(target) #fit the target

data_scaled=scaler_data.fit_transform(data)
target_scaled=scaler_target.fit_transform(target)

In [None]:
print (data_scaled[:10])

## **Why Scaling?**

In [None]:
plt.hist(data[:,2],bins=100)
plt.title('Original TC')
plt.xlabel('TC level')
plt.ylabel('Frequency')
plt.show()

In [None]:
plt.hist(data_scaled[:,2],bins=100)
plt.title('Original TC')
plt.xlabel('TC level')
plt.ylabel('Frequency')
plt.show()

## **Other Scaling Methods**


### 1. Standard Scaling

In [None]:
from sklearn.preprocessing import StandardScaler

standard_scaler = StandardScaler()
standard_scaled_data = standard_scaler.fit_transform(data)

In [None]:
plt.hist(standard_scaled_data[:,2],bins=100)
plt.title('Quantiled TC')
plt.xlabel('TC Level')
plt.ylabel('Frequency')
plt.show()

This type of scaling removes mean and scale data to unit variance. It is defined by the formula
![image.png](attachment:image.png)
where mean is the mean of the training samples, and std is the standard deviation of the training samples. The best way to understand it is to look at it in practice. For that we use SciKit Learn and StandardScaler class:

## **Quantile Transformation**

As we mentioned, sometimes machine learning algorithms require that the distribution of our data is uniform or normal. We can achieve that using QuantileTransformer class from SciKit Learn. First, here is how it looks like when we transform our data to uniform distribution:

In [None]:
from sklearn.preprocessing import QuantileTransformer #values tika 0t center karala ganna puluwan meken.

qtrans = QuantileTransformer(output_distribution='normal', random_state=0)
quantile_transformed_data = qtrans.fit_transform(data)

In [None]:
plt.hist(quantile_transformed_data[:,2],bins=100)
plt.show()

# **Train Test Split**

In [None]:
from sklearn.model_selection import train_test_split

train_data, test_data, train_target, test_target = train_test_split(data_scaled, target_scaled,test_size=0.2)

# **FFNN Architecture**

In [None]:
from keras.models import Sequential
from keras.layers import Dense,Dropout
import numpy as np

model = Sequential()
model.add(Dense(128, input_dim=7, activation='sigmoid',kernel_initializer='normal'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='sigmoid'))
model.add(Dense(1, activation='linear'))

model.compile(optimizer='adam',loss='mse',metrics=['mse','mae'])

model.summary()

# **R2 Score**

In [None]:
#cant take accuracy for the regression problem , so caculate R2 score to get idea about model
#accuracy 1t lagai nam good wage. R2 ekath ekta lagai nam good.

In [None]:
from sklearn.metrics import r2_score
import keras
# Take r2 for accuracy in regresion problem 
class CustomCallback(keras.callbacks.Callback):

    def on_epoch_end(self,epoch,logs=None): # can't change the name of the function
        predicted_result=model.predict(test_data)
        
        r2=r2_score(test_target,predicted_result)
        print('epoch ',epoch,'- r2 score:',r2)

# **Save Best Model**

[Full details at keras official site](https://www.tensorflow.org/guide/keras/custom_callback)

In [None]:
from keras.callbacks import ModelCheckpoint #modelcheckpoit= save model
checkpoit = ModelCheckpoint('models/model-{epoch:03d}.model', monitor='val_loss', save_best_only=True, mode='auto')

# **Train The Model**

In [None]:
history=model.fit(train_data,train_target,epochs=200,validation_data=(test_data,test_target),callbacks=[checkpoit,CustomCallback()])

In [None]:
from matplotlib import pyplot as plt

plt.plot(model.history.history['loss'])
plt.plot(model.history.history['val_loss'])
plt.xlabel('# epochs')
plt.ylabel('loss')
plt.show()

# **Problem of Overfitting**

## Solutions,

1. Early Stopping
2. Dropout Layers
3. Batch Normalization
4. Reguralization
5. Reduce the Complexity
6. Data Augmentation

In [None]:
from sklearn.metrics import r2_score

predicted_result=model.predict(test_data)

r2=r2_score(test_target,predicted_result)
print('r2 score:',r2)

In [None]:
print('actual:',test_target[:10].T)
print('predicted:',predicted_result[:10].T)

In [None]:
print('actual inverse scaled:',scaler_target.inverse_transform(test_target[:10]).T)
print('predicted inverse scaled:',scaler_target.inverse_transform(predicted_result[:10]).T)

In [None]:
my_test_data=[1,56,156,42,0,1,0]
my_test_data=scaler_data.transform([my_test_data])
result=model.predict(my_test_data)
actual=7
predicted = scaler_target.inverse_transform(result)

print('Actual Result:',actual)
print('Predicted Result:',predicted)

In [None]:
test_data = np.array([0,63,186,46,1,1,0]).reshape(1,-1)
scaled_test_data = scaler_data.transform(test_data)
result = model.predict(scaled_test_data)
print(result)

In [None]:
scaler_target.inverse_transform(result)

# **Save all the models**

In [None]:
import joblib

joblib.dump(scaler_data,'scaler_data.sav')
joblib.dump(scaler_target,'scaler_target.sav')

In [None]:
model.save('Heart Risk Level Prediction model.h5')