In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:
df = pd.read_excel("data/diabetes_pregnant_1000_records.xlsx")
df.head()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,Age,Outcome
0,6,94.544379,70.12028,44.653415,86.284348,23.543743,39,0
1,3,91.912649,63.488812,30.397803,129.288558,31.294917,25,0
2,12,129.995199,60.770617,15.422918,137.314897,28.919243,43,0
3,14,112.262935,42.737369,31.499616,22.290931,33.194468,45,0
4,10,60.0,57.080069,35.594805,45.638898,41.575985,22,0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Pregnancies    1000 non-null   int64  
 1   Glucose        1000 non-null   float64
 2   BloodPressure  1000 non-null   float64
 3   SkinThickness  1000 non-null   float64
 4   Insulin        1000 non-null   float64
 5   BMI            1000 non-null   float64
 6   Age            1000 non-null   int64  
 7   Outcome        1000 non-null   int64  
dtypes: float64(5), int64(3)
memory usage: 62.6 KB


In [4]:
df.shape

(1000, 8)

In [5]:
df.isnull().sum()

Pregnancies      0
Glucose          0
BloodPressure    0
SkinThickness    0
Insulin          0
BMI              0
Age              0
Outcome          0
dtype: int64

In [6]:
df.duplicated().sum()

np.int64(0)

In [7]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'Age', 'Outcome'],
      dtype='object')

In [8]:
X = df.drop("Outcome", axis=1)
y = df["Outcome"]


In [9]:
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,Age
0,6,94.544379,70.12028,44.653415,86.284348,23.543743,39
1,3,91.912649,63.488812,30.397803,129.288558,31.294917,25
2,12,129.995199,60.770617,15.422918,137.314897,28.919243,43
3,14,112.262935,42.737369,31.499616,22.290931,33.194468,45
4,10,60.0,57.080069,35.594805,45.638898,41.575985,22


In [10]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Outcome, dtype: int64

In [11]:
y.tail()

995    0
996    1
997    0
998    0
999    0
Name: Outcome, dtype: int64

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)


In [13]:
X_train.shape

(800, 7)

In [14]:
X_test.shape

(200, 7)

In [15]:
y_train.shape

(800,)

In [16]:
y_test.shape

(200,)

In [17]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [18]:
model = SVC(kernel='rbf', C=1, gamma='scale')
model.fit(X_train, y_train)


0,1,2
,C,1
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [19]:
y_pred = model.predict(X_test)


In [20]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))



Accuracy: 0.88

Confusion Matrix:
 [[141   8]
 [ 16  35]]


In [21]:
def predict_new(preg, glu, bp, skin, ins, bmi, age):
    point = np.array([[preg, glu, bp, skin, ins, bmi, age]])
    point_scaled = scaler.transform(point)
    result = model.predict(point_scaled)
    return "Diabetic" if result[0] == 1 else "Not Diabetic"


In [22]:
predict_new(4, 165, 82, 35, 130, 36.5, 40)




'Diabetic'

In [23]:
predict_new(2, 130, 70, 28, 80, 30.0, 29)




'Not Diabetic'

In [24]:
predict_new(1, 92, 60, 22, 65, 24.8, 22)




'Not Diabetic'