##### Importing required modules


In [29]:
import numpy as np
import pandas as pd

creating a dataframe


In [30]:
data_farame = pd.read_csv("diabetes.csv")

checking heads of dataframe


In [None]:
data_farame.head()

View the number of rows and columns of the dataframe


In [None]:
data_farame.shape

Observe the number of healthy and sick individuals in the dataframe


In [None]:
data_farame["outcome"].value_counts()

##### Display the data as a chart


In [None]:
import matplotlib.pyplot as plt

# Plotting the distribution of outcome
plt.figure(figsize=(10, 6))
plt.hist(
    data_farame["outcome"],
    bins=2,
    rwidth=0.8,
    color="#66b3ff",
    edgecolor="black",
    linewidth=1.5,
)
plt.title("Distribution of Outcome", fontsize=16)
plt.xlabel("Outcome", fontsize=14)
plt.ylabel("Count", fontsize=14)
plt.xticks([0, 1], ["Healthy", "Patient"], fontsize=12)
plt.yticks(fontsize=12)
plt.show()

# Plotting the correlation matrix
plt.figure(figsize=(12, 10))
corr_matrix = data_farame.corr()
plt.imshow(corr_matrix, cmap="viridis", interpolation="nearest")
plt.colorbar()
plt.xticks(
    range(len(corr_matrix.columns)),
    corr_matrix.columns,
    rotation=45,
    fontsize=12,
    ha="right",
)
plt.yticks(range(len(corr_matrix.columns)), corr_matrix.columns, fontsize=12)

# Adding correlation values as text in each cell
for i in range(len(corr_matrix.columns)):
    for j in range(len(corr_matrix.columns)):
        plt.text(
            j,
            i,
            round(corr_matrix.iloc[i, j], 2),
            ha="center",
            va="center",
            color="black",
            fontsize=10,
        )

plt.title("Correlation Matrix", fontsize=16)
plt.tight_layout()
plt.show()

##### Separating the label data from the features


In [35]:
x = data_farame.drop("outcome", axis=1)
y = data_farame["outcome"]

Presenting data as an array to the model


In [36]:
x = np.array(x)
y = np.array(y)

##### Normalizing training data


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(x)
X

##### Training and testing the model


In [38]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
x_train.shape, y_train.shape

In [None]:
x_test.shape, y_test.shape

##### Building the SVM model


In [41]:
from sklearn import svm
svm_model =  svm.SVC(kernel='linear') 
svm_model.fit(x_train, y_train) 

y_pred_train = svm_model.predict(x_train) 
y_pred_test = svm_model.predict(x_test) 

##### Evaluating the model's accuracy, precision, and recall


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

acc_train = accuracy_score(y_true=y_train, y_pred=y_pred_train)
acc_test = accuracy_score(y_true=y_test, y_pred=y_pred_test)
acc_train, acc_test

In [None]:
prec = precision_score(y_test, y_pred_test)
reca = recall_score(y_test, y_pred_test)
prec, reca

##### Saving the model using joblib


In [None]:
import joblib

joblib.dump(scaler, './models/scaler.pkl')
joblib.dump(svm_model, './models/model.pkl')