In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


In [2]:
df = pd.read_csv("anemia.csv")

In [3]:
print(df.head())

   Gender  Hemoglobin   MCH  MCHC   MCV  Result
0       1        14.9  22.7  29.1  83.7       0
1       0        15.9  25.4  28.3  72.0       0
2       0         9.0  21.5  29.6  71.2       1
3       0        14.9  16.0  31.4  87.5       0
4       1        14.7  22.0  28.2  99.5       0


In [4]:
df.shape

(1421, 6)

In [5]:
df.columns

Index(['Gender', 'Hemoglobin', 'MCH', 'MCHC', 'MCV', 'Result'], dtype='object')

In [6]:
# Feature Columns and Target Variable
X = df[['Gender', 'Hemoglobin', 'MCH', 'MCHC', 'MCV']]  # Independent variables
y = df['Result']  # Target variable (1 = Anaemic, 0 = Not Anaemic)

In [7]:
X

Unnamed: 0,Gender,Hemoglobin,MCH,MCHC,MCV
0,1,14.9,22.7,29.1,83.7
1,0,15.9,25.4,28.3,72.0
2,0,9.0,21.5,29.6,71.2
3,0,14.9,16.0,31.4,87.5
4,1,14.7,22.0,28.2,99.5
...,...,...,...,...,...
1416,0,10.6,25.4,28.2,82.9
1417,1,12.1,28.3,30.4,86.9
1418,1,13.1,17.7,28.1,80.7
1419,0,14.3,16.2,29.5,95.2


In [8]:
 #Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Feature Scaling (Standardizing the features)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# Train a Logistic Regression model
logistic = LogisticRegression(max_iter=1000)
logistic.fit(X_train, y_train)

In [11]:
prediction = logistic.predict(X_test)
accuracy_scorelr = accuracy_score(y_test, prediction)
print("Accuracy of Logistic Regression:", accuracy_scorelr)

Accuracy of Logistic Regression: 0.9894736842105263


In [2]:
# Function to make predictions on a new input row
def predict_anemia(gender, hemoglobin, mch, mchc, mcv):
    # Prepare the input data (create a DataFrame with a single row)
    input_data = pd.DataFrame([[gender, hemoglobin, mch, mchc, mcv]], columns=['Gender', 'Hemoglobin', 'MCH', 'MCHC', 'MCV'])

    # Scale the input data using the same scaler as the training data
    input_data_scaled = scaler.transform(input_data)

    # Predict using the trained model
    prediction = logistic.predict(input_data_scaled)

    # Return the prediction (1 = Anaemic, 0 = Not Anaemic)
    return 'Anaemic as prediction 1' if prediction[0] == 1 else 'Not Anaemic as prediction is 0'

# Taking input from the user
def get_user_input():
    print("Please enter the following details:")

    # Collecting input from the user
    gender = int(input("Gender (0 for Female, 1 for Male): "))
    hemoglobin = float(input("Hemoglobin level: "))
    mch = float(input("MCH: "))
    mchc = float(input("MCHC: "))
    mcv = float(input("MCV: "))

    # Predict whether the person is anaemic or not
    result = predict_anemia(gender, hemoglobin, mch, mchc, mcv)

    # Print the result
    print(f"The person is: {result}")

# Call the function to get user input and predict
get_user_input()

Please enter the following details:


In [12]:
import pickle

# Save the model
with open('model.pkl', 'wb') as model_file:
    pickle.dump(logistic, model_file)

# Save the scaler
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
