

---

#**Logistic Regression with MinMaxScaler on a CSV file (Diabetes)**

---



In [45]:
import pandas as pd

In [26]:
# Load the dataset
df = pd.read_csv('diabetes.csv', header=None, names=['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'Diabetes', 'Age', 'Class'])

In [27]:
# Display the first few rows of the dataset
df.head(2)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,Diabetes,Age,Class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0


In [28]:
# Display random samples from the dataset
df.sample(2)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,Diabetes,Age,Class
237,0,179,90,27,0,44.1,0.686,23,1
249,1,111,86,19,0,30.1,0.143,23,0


In [29]:
# Check the shape of the dataset
df.shape

(768, 9)

In [30]:
# Get information about the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Pregnancies    768 non-null    int64  
 1   Glucose        768 non-null    int64  
 2   BloodPressure  768 non-null    int64  
 3   SkinThickness  768 non-null    int64  
 4   Insulin        768 non-null    int64  
 5   BMI            768 non-null    float64
 6   Diabetes       768 non-null    float64
 7   Age            768 non-null    int64  
 8   Class          768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [31]:
# Select features (X) and target (y)
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [32]:
print(X)

     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0              6      148             72             35        0  33.6   
1              1       85             66             29        0  26.6   
2              8      183             64              0        0  23.3   
3              1       89             66             23       94  28.1   
4              0      137             40             35      168  43.1   
..           ...      ...            ...            ...      ...   ...   
763           10      101             76             48      180  32.9   
764            2      122             70             27        0  36.8   
765            5      121             72             23      112  26.2   
766            1      126             60              0        0  30.1   
767            1       93             70             31        0  30.4   

     Diabetes  Age  
0       0.627   50  
1       0.351   31  
2       0.672   32  
3       0.167   21  
4     

In [33]:
print(y)

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Class, Length: 768, dtype: int64


In [34]:
# Scale the features using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler

ms = MinMaxScaler()
Xscaled = ms.fit_transform(X)

In [35]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(X, y, random_state=1, test_size=0.25)
xstrain, xstest, ystrain, ystest = train_test_split(Xscaled, y, random_state=1, test_size=0.25)

In [36]:
# Train the Logistic Regression model
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model2 = LogisticRegression()

In [37]:
model.fit(xtrain, ytrain)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [38]:
model2.fit(xstrain, ystrain)

In [39]:
# Make predictions on the test data
predictions = model.predict(xtest)
predictions2 = model2.predict(xstest)

In [40]:
# Print prediction probabilities
print(model.predict_proba(xtest)[0:10])

[[0.5782672  0.4217328 ]
 [0.6767115  0.3232885 ]
 [0.84994822 0.15005178]
 [0.95110289 0.04889711]
 [0.79044472 0.20955528]
 [0.71895071 0.28104929]
 [0.62547503 0.37452497]
 [0.89725634 0.10274366]
 [0.84148183 0.15851817]
 [0.79090089 0.20909911]]


In [41]:
print(model2.predict_proba(xstest)[0:10])

[[0.51866849 0.48133151]
 [0.69543093 0.30456907]
 [0.7909034  0.2090966 ]
 [0.88822612 0.11177388]
 [0.7443152  0.2556848 ]
 [0.74527594 0.25472406]
 [0.66944059 0.33055941]
 [0.83451907 0.16548093]
 [0.80770663 0.19229337]
 [0.77810069 0.22189931]]


In [42]:
# Evaluate the model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("Accuracy without scaling:", accuracy_score(ytest, predictions))
print("Accuracy with scaling:", accuracy_score(ystest, predictions2))

Accuracy without scaling: 0.7760416666666666
Accuracy with scaling: 0.7708333333333334


In [43]:
print("Confusion Matrix without scaling:\n", confusion_matrix(ytest, predictions))
print("Confusion Matrix with scaling:\n", confusion_matrix(ystest, predictions2))

Confusion Matrix without scaling:
 [[109  14]
 [ 29  40]]
Confusion Matrix with scaling:
 [[110  13]
 [ 31  38]]


In [44]:
print("Classification Report without scaling:\n", classification_report(ytest, predictions))
print("Classification Report with scaling:\n", classification_report(ystest, predictions2))

Classification Report without scaling:
               precision    recall  f1-score   support

           0       0.79      0.89      0.84       123
           1       0.74      0.58      0.65        69

    accuracy                           0.78       192
   macro avg       0.77      0.73      0.74       192
weighted avg       0.77      0.78      0.77       192

Classification Report with scaling:
               precision    recall  f1-score   support

           0       0.78      0.89      0.83       123
           1       0.75      0.55      0.63        69

    accuracy                           0.77       192
   macro avg       0.76      0.72      0.73       192
weighted avg       0.77      0.77      0.76       192

