In [22]:
# Install Pyspark  if not already present
!pip install pyspark



In [24]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
from pyspark.ml.classification import LogisticRegression, RandomForestClassifier, LinearSVC, GBTClassifier
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.sql import SparkSession

# Initialize Spark session
spark = SparkSession.builder.appName("DiabetesClassification").getOrCreate()

In [27]:
from pyspark.ml.classification import LogisticRegressionModel, RandomForestClassificationModel, LinearSVCModel, GBTClassificationModel
from pyspark.sql import Row

# Define paths to the saved models
lr_model_path = r"/content/drive/MyDrive/Colab_Saved_ML_Models/Diabetes_Model/lr_model_diabetes"
rf_model_path = r"/content/drive/MyDrive/Colab_Saved_ML_Models/Asthma_Model/lr_model_asthma"
lsvc_model_path = r"/content/drive/MyDrive/Colab_Saved_ML_Models/Arthritis_Model/lr_model_arthritis"
gbt_model_path = r"/content/drive/MyDrive/Colab_Saved_ML_Models/HeartAttack_Model/lr_model_heartattack"

# Load pretrained models
lr_model = LogisticRegressionModel.load(lr_model_path)
rf_model = LogisticRegressionModel.load(rf_model_path)
lsvc_model = LogisticRegressionModel.load(lsvc_model_path)
gbt_model = LogisticRegressionModel.load(gbt_model_path)



In [29]:
def get_user_input():
    def get_numeric_input(prompt):
        while True:
            try:
                value = float(input(prompt))
                if value > 0:
                    return value
                else:
                    print("Invalid input. Please enter a positive numeric value!")
            except ValueError:
                print("Invalid input. Please enter a numeric value!")

    def get_indexed_input(prompt, options):
        while True:
            try:
                value = int(input(prompt))
                if value in options:
                    return value
                else:
                    print(f"Invalid input. Please enter one of the following options: {options}")
            except ValueError:
                print(f"Invalid input! Please enter a number from the options: {options}")

    def map_last_checkup(value):
        mapping = {
            0: 0.5,
            1: 1.5,
            2: 3.5,
            3: 5
        }
        return mapping.get(value, None)  # Returns None if value is not found in mapping

    user_input = {}

    user_input['SleepHours'] = get_numeric_input("How many hours of sleep do you get on average each night? ")
    user_input['WeightInKilograms'] = get_numeric_input("What is your current weight in kilograms? ")

    Height = get_numeric_input("What is your height in centimeters? ")
    BMI = user_input['WeightInKilograms'] / ((Height / 100) ** 2)

    user_input['BMI'] = BMI
    user_input['HeightInCentimeters'] = Height
    user_input['SexIndexed'] = get_indexed_input(
        "What is your gender? (0 for Female, 1 for Male): ", [0, 1]
    )
    user_input['PhysicalActivitiesIndexed'] = get_indexed_input(
        "Do you engage in physical exercise weekly? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['HadAnginaIndexed'] = get_indexed_input(
        "Have you ever experienced angina (chest pain)? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['HadStrokeIndexed'] = get_indexed_input(
        "Have you ever had a stroke? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['HadSkinCancerIndexed'] = get_indexed_input(
        "Have you ever been diagnosed with skin cancer? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['HadCOPDIndexed'] = get_indexed_input(
        "Do you often have a cough with mucus that has lasted for over three months in the last two years? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['HadDepressiveDisorderIndexed'] = get_indexed_input(
        "Have you ever been diagnosed with a depressive disorder? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['HadKidneyDiseaseIndexed'] = get_indexed_input(
        "Have you ever been diagnosed with any kind of kidney disease? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['DeafOrHardOfHearingIndexed'] = get_indexed_input(
        "Do you have any difficulties with your hearing? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['BlindOrVisionDifficultyIndexed'] = get_indexed_input(
        "Do you have any blindness or vision difficulties? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['DifficultyConcentratingIndexed'] = get_indexed_input(
        "Do you have difficulty concentrating, remembering, or making decisions? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['DifficultyWalkingIndexed'] = get_indexed_input(
        "Do you have difficulty walking or climbing stairs? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['DifficultyDressingBathingIndexed'] = get_indexed_input(
        "Do you have difficulty dressing or bathing? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['DifficultyErrandsIndexed'] = get_indexed_input(
        "Do you face any challenges in managing your own regular tasks? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['ChestScanIndexed'] = get_indexed_input(
        "Have you had a chest scan in the past? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['AlcoholDrinkersIndexed'] = get_indexed_input(
        "Do you drink alcohol? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['HIVTestingIndexed'] = get_indexed_input(
        "Have you ever been tested for HIV? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['FluVaxLast12Indexed'] = get_indexed_input(
        "Have you received a flu vaccine in the last 12 months? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['PneumoVaxEverIndexed'] = get_indexed_input(
        "Have you ever received a pneumonia vaccine? (0 for No, 1 for Yes): ", [0, 1]
    )
    user_input['GeneralHealthIndex'] = get_indexed_input(
        "How would you rate your general health? (0 for Excellent, 1 for Very good, 2 for Good, 3 for Fair, 4 for Poor): ", [0, 1, 2, 3, 4]
    )

    last_checkup_index = get_indexed_input(
        "When was your last health check-up? (0 for Within past year, 1 for Within past 2 years, 2 for Within past 5 years, 3 for 5 or more years ago): ", [0, 1, 2, 3]
    )
    user_input['LastCheckupTimeIndex'] = map_last_checkup(last_checkup_index)  # Use the mapping function to convert

    user_input['SmokerStatusIndex'] = get_indexed_input(
        "Do you Smoke? (0 for Never smoked, 1 for Former smoker, 2 for Current smoker - some days, 3 for Current smoker - every day): ", [0, 1, 2, 3]
    )
    user_input['AgeCategory'] = get_indexed_input(
        "Which age category do you fall into? (0 for Age 18 to 24, 1 for Age 25 to 29, 2 for Age 30 to 34, 3 for Age 35 to 39, 4 for Age 40 to 44, "
        "5 for Age 45 to 49, 6 for Age 50 to 54, 7 for Age 55 to 59, 8 for Age 60 to 64, 9 for Age 65 to 69, 10 for Age 70 to 74, "
        "11 for Age 75 to 79, 12 for Age 80 or older): ", list(range(13))
    )

    return user_input

# Get user input
user_input = get_user_input()

# Convert user input to DataFrame
user_row = Row(**user_input)
user_df = spark.createDataFrame([user_row])

# Define feature columns
feature_columns = [
    'SleepHours', 'WeightInKilograms', 'BMI', 'HeightInCentimeters', 'SexIndexed', 'PhysicalActivitiesIndexed', 'HadAnginaIndexed', 'HadStrokeIndexed',
    'HadSkinCancerIndexed', 'HadCOPDIndexed', 'HadDepressiveDisorderIndexed', 'HadKidneyDiseaseIndexed', 'DeafOrHardOfHearingIndexed',
    'BlindOrVisionDifficultyIndexed', 'DifficultyConcentratingIndexed', 'DifficultyWalkingIndexed', 'DifficultyDressingBathingIndexed',
    'DifficultyErrandsIndexed', 'ChestScanIndexed', 'AlcoholDrinkersIndexed', 'HIVTestingIndexed', 'FluVaxLast12Indexed', 'PneumoVaxEverIndexed',
    'GeneralHealthIndex', 'LastCheckupTimeIndex', 'SmokerStatusIndex', 'AgeCategory'
]

print(len(feature_columns))

# Ensure the columns are in the correct order
user_df = user_df.select(feature_columns)

# Optionally, show the DataFrame00

user_df.show()

# Assuming feature columns need to be assembled into a single feature vector
assembler = VectorAssembler(inputCols=feature_columns, outputCol="features")
user_df = assembler.transform(user_df)

# Optionally, show the DataFrame with features column
user_df.show()


How many hours of sleep do you get on average each night? 5
What is your current weight in kilograms? 50
What is your height in centimeters? 178
What is your gender? (0 for Female, 1 for Male): 1
Do you engage in physical exercise weekly? (0 for No, 1 for Yes): 0
Have you ever experienced angina (chest pain)? (0 for No, 1 for Yes): 0
Have you ever had a stroke? (0 for No, 1 for Yes): 0
Have you ever been diagnosed with skin cancer? (0 for No, 1 for Yes): 0
Do you often have a cough with mucus that has lasted for over three months in the last two years? (0 for No, 1 for Yes): 1
Have you ever been diagnosed with a depressive disorder? (0 for No, 1 for Yes): 1
Have you ever been diagnosed with any kind of kidney disease? (0 for No, 1 for Yes): 0
Do you have any difficulties with your hearing? (0 for No, 1 for Yes): 0
Do you have any blindness or vision difficulties? (0 for No, 1 for Yes): 0
Do you have difficulty concentrating, remembering, or making decisions? (0 for No, 1 for Yes): 0
Do

In [30]:
# Make predictions with each model
lr_predictions = lr_model.transform(user_df)
rf_predictions = rf_model.transform(user_df)
lsvc_predictions = lsvc_model.transform(user_df)
gbt_predictions = gbt_model.transform(user_df)

# Show predictions
print("Logistic Regression Prediction:")
lr_predictions.select("prediction").show()

print("Random Forest Prediction:")
rf_predictions.select("prediction").show()

print("Linear SVM Prediction:")
lsvc_predictions.select("prediction").show()

print("Gradient-Boosted Trees Prediction:")
gbt_predictions.select("prediction").show()

Logistic Regression Prediction:
+----------+
|prediction|
+----------+
|       0.0|
+----------+

Random Forest Prediction:
+----------+
|prediction|
+----------+
|       0.0|
+----------+

Linear SVM Prediction:
+----------+
|prediction|
+----------+
|       0.0|
+----------+

Gradient-Boosted Trees Prediction:
+----------+
|prediction|
+----------+
|       0.0|
+----------+



In [31]:
1# Show the probabilities for Logistic Regression
print("Logistic Regression - Probabilities:")
lr_predictions.select("probability").show(10, truncate=False)

# Show the probabilities for Random Forest
print("Random Forest - Probabilities:")
rf_predictions.select("probability").show(10, truncate=False)

# Show the probabilities for Gradient-Boosted Trees
print("Arthritis - Probabilities:")
lsvc_predictions.select("probability").show(10, truncate=False)

# Show the probabilities for Gradient-Boosted Trees
print("Gradient-Boosted Trees - Probabilities:")
gbt_predictions.select("probability").show(10, truncate=False)


Logistic Regression - Probabilities:
+------------------------------------------+
|probability                               |
+------------------------------------------+
|[0.9957157367798635,0.0042842632201365305]|
+------------------------------------------+

Random Forest - Probabilities:
+----------------------------------------+
|probability                             |
+----------------------------------------+
|[0.7687432923710725,0.23125670762892747]|
+----------------------------------------+

Arthritis - Probabilities:
+----------------------------------------+
|probability                             |
+----------------------------------------+
|[0.8518575893107174,0.14814241068928258]|
+----------------------------------------+

Gradient-Boosted Trees - Probabilities:
+-----------------------------------------+
|probability                              |
+-----------------------------------------+
|[0.9882155744137042,0.011784425586295777]|
+------------------------------