# **Necessary Imports**

In [81]:
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pickle

# **Kaggle Setup (API Key .json via Upload)**

In [None]:
! pip install kaggle --quiet

from google.colab import files

files.upload()

! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

! chmod 600 ~/.kaggle/kaggle.json

# **Kaggle Setup (API Key .json from G-Drive)**

In [None]:
! pip install kaggle --quiet

kaggle_creds_path = "drive/MyDrive/kaggle.json"

from google.colab import drive
drive.mount("/content/drive")

! mkdir ~/.kaggle

! cp {kaggle_creds_path} ~/.kaggle/

! chmod 600 ~/.kaggle/kaggle.json

drive.flush_and_unmount()

# **Download Dataset**

In [None]:
dataset = "uciml/pima-indians-diabetes-database"

! kaggle datasets download -d {dataset}

! unzip {dataset.split("/")[1] + ".zip"} -d {dataset.split("/")[1]}

# **Loading Dataset**

In [37]:
df = pd.read_csv('pima-indians-diabetes-database/diabetes.csv')

In [None]:
df.sample(5)

# **Exploratory Data Analysis (EDA)**

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
sns.heatmap(df.corr(), annot=True)

# **Feature Scaling**

In [None]:
scaler = StandardScaler()

columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']

df[columns] = scaler.fit_transform(df[columns])

df.head(5)

# **Getting Inputs & Labels**

In [39]:
X = df.iloc[:,:-1]
y = df['Outcome']

In [None]:
print(X.shape, y.shape)

# **Splitting Dataset Into Train & Test**

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
print("Train Set: ", X_train.shape, y_train.shape)
print("Test Set: ", X_test.shape, y_test.shape)

# **Model**

In [54]:
model = KNeighborsClassifier()

# **Model Training**

In [None]:
model.fit(X_train, y_train)

# **Accuracy Of The Model**

In [None]:
print(accuracy_score(y_test, model.predict(X_test))*100)

# **Exporting The Model**

In [82]:
pickle.dump(model, open("diabetes.pkl",'wb'))