In [None]:
# %% [markdown]
# Importing Dependencies

# %% [code]
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# %% [markdown]
# Data Collection and Analysis

# %% [code]
diabetes_dataset = pd.read_csv("diabetes.csv")
diabetes_dataset.head()

# %% [code]
# Number of rows and columns
diabetes_dataset.shape

# %% [code]
# Getting the statistics
diabetes_dataset.describe()

# %% [code]
diabetes_dataset['Outcome'].value_counts()

# %% [markdown]
# 0 --> Non-Diabetic
# 
# 1 --> Diabetic

# %% [code]
diabetes_dataset.groupby('Outcome').mean()

# %% [code]
X = diabetes_dataset.drop(columns='Outcome', axis=1)
y = diabetes_dataset['Outcome']

# %% [code]
print(X)
print(y)

# %% [markdown]
# Standardizing Feature Variables

# %% [code]
scaler = StandardScaler()
standardized_data = scaler.fit_transform(X)
X = standardized_data
print(X)
print(y)

# %% [code]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=1111)

# %% [code]
print(X.shape, X_train.shape, X_test.shape)

# %% [markdown]
# Training the Model

# %% [code]
# Training the Support Vector Machine Classifier
classifier = SVC(kernel="linear")
classifier.fit(X_train, y_train)

# %% [markdown]
# Evaluating the Model

# %% [code]
# Accuracy score on training data
X_train_predictions = classifier.predict(X_train)
X_train_accuracy = accuracy_score(X_train_predictions, y_train)

# %% [code]
print("Accuracy score on training data: ", X_train_accuracy)

# %% [code]
# Accuracy score on test data
X_test_predictions = classifier.predict(X_test)
X_test_accuracy = accuracy_score(X_test_predictions, y_test)

# %% [code]
print("Accuracy score on test data: ", X_test_accuracy)

# %% [markdown]
# Saving Model

# %% [code]
import pickle

# %% [code]
filename = 'diabetes_model.sav'
pickle.dump(classifier, open(filename, 'wb'))

# %% [markdown]
# Making a Predictive System

# %% [code]
input_data = (3,107,62,13,48,22.9,0.678,23)

# Changing the input data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshaping the input data as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# Standardize the input data
std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = classifier.predict(std_data)
print(prediction)

if prediction[0] == 0:
    print("The female is Non-diabetic.")
else:
    print("The female is diabetic.")