In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import pickle

def get_clean_data():
    data = pd.read_csv("diabetes.csv")
    return data

def create_model():
    data = get_clean_data()
    X = data.drop(["Outcome"], axis=1)
    y = data["Outcome"]
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.30, random_state=42)
    
    model = LogisticRegression()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    print("Accuracy of our model is:", accuracy_score(y_test, y_pred))
    print("Classification report:\n", classification_report(y_test, y_pred))
    
    return model, scaler

def main():
    model, scaler = create_model()
    with open("model.pkl", "wb") as pickle_out:
        pickle.dump(model, pickle_out)
    with open("scaler.pkl", "wb") as scaler_out:
        pickle.dump(scaler, scaler_out)

if __name__ == "__main__":
    main()

Accuracy of our model is: 0.7359307359307359
Classification report:
               precision    recall  f1-score   support

           0       0.80      0.79      0.80       151
           1       0.62      0.62      0.62        80

    accuracy                           0.74       231
   macro avg       0.71      0.71      0.71       231
weighted avg       0.74      0.74      0.74       231

