#### Import Library

In [1]:
import numpy as np 
import pandas as pd 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm 
from sklearn.metrics import accuracy_score

#### Read Dataset

In [2]:
dataset = pd.read_csv('diabetes.csv')

In [4]:
dataset.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
dataset.shape

(768, 9)

In [7]:
dataset['Outcome'].value_counts()

0    500
1    268
Name: Outcome, dtype: int64

### Mengecek Missing Value

In [8]:
dataset.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

#### Split Data & Label

In [11]:
x = dataset.iloc[:,:8]
y = dataset.iloc[:,8:]

In [12]:
x

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [13]:
y

Unnamed: 0,Outcome
0,1
1,0
2,1
3,0
4,1
...,...
763,0
764,0
765,0
766,1


#### Standarisasi Data

In [14]:
scaler = StandardScaler()

In [15]:
scaler.fit(x)

In [16]:
standarized_data = scaler.transform(x)

In [17]:
standarized_data

array([[ 0.63994726,  0.84832379,  0.14964075, ...,  0.20401277,
         0.46849198,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.68442195,
        -0.36506078, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -1.10325546,
         0.60439732, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ..., -0.73518964,
        -0.68519336, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.24020459,
        -0.37110101,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.20212881,
        -0.47378505, -0.87137393]])

In [18]:
x = standarized_data
y = dataset['Outcome']

#### Split Data Training / Data Testing

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=0,stratify=y,test_size=0.2)

In [20]:
print(x.shape, x_test.shape, x_train.shape)

(768, 8) (154, 8) (614, 8)


#### Membuat Data Training Dengan Algoritma SVM

In [21]:
classifier = svm.SVC(kernel='linear')

In [22]:
classifier.fit(x_train,y_train)

#### Evaluasi Model

In [23]:
x_train_prediction = classifier.predict(x_train)
training_accuracy_prediction = accuracy_score(x_train_prediction, y_train)

In [24]:
print(f"Akurasi Data Training : {training_accuracy_prediction}")

Akurasi Data Training : 0.7801302931596091


In [25]:
x_test_prediction = classifier.predict(x_test)
testing_accuracy_prediction = accuracy_score(x_test_prediction,y_test)

In [26]:
print(f"Akurasi Data Testing : {testing_accuracy_prediction}")

Akurasi Data Testing : 0.7792207792207793


#### Membuat Model Prediksi

In [27]:
sample_input_data = (6,148,72,35,0,33.6,0.627,50)

sample_input_data_array = np.array(sample_input_data)

input_data_reshape = sample_input_data_array.reshape(1,-1)

std_data = scaler.transform(input_data_reshape)

print(std_data)

prediction = classifier.predict(std_data)

print(prediction)

# Pengkondisian

if(prediction[0]==0):
    print('Pasien Tidak Terkena Diabetes')
else:
    print('Pasien Terkena Diabetes')

[[ 0.63994726  0.84832379  0.14964075  0.90726993 -0.69289057  0.20401277
   0.46849198  1.4259954 ]]
[1]
Pasien Terkena Diabetes




#### Simpan Model & Upload Streamlit

In [28]:
import pickle

In [29]:
filename = 'diabetes_model.sav'
pickle.dump(classifier,open(filename,'wb'))