In [18]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler


In [19]:
# Function to generate synthetic data
def generate_synthetic_data(num_samples=5000):
    """
    Generate synthetic data for knee osteoarthritis prediction.
    Simulate questionnaire responses with random data.
    """
    np.random.seed(42)
    
    # Simulate user data (age and gender)
    ages = np.random.randint(30, 85, num_samples)
    genders = np.random.choice([0, 1], num_samples)  # 0: Female, 1: Male
    
    
    # Simulate questionnaire responses
    questions = {
        'knee_pain': np.random.choice([0, 1], num_samples, p=[0.3, 0.7]),
        'frequent_pain': np.random.choice([0, 1], num_samples, p=[0.5, 0.5]),
        'stiffness_after_rest': np.random.choice([0, 1], num_samples, p=[0.4, 0.6]),
        'pain_affecting_daily_activities': np.random.choice([0, 1], num_samples, p=[0.5, 0.5]),
        'swelling': np.random.choice([0, 1], num_samples, p=[0.3, 0.7]),
        'previous_injuries': np.random.choice([0, 1], num_samples, p=[0.4, 0.6]),
    }
    
    # Calculate OA probability based on responses
    prob_oa = (
        0.2 * questions['knee_pain'] +
        0.15 * questions['frequent_pain'] +
        0.15 * questions['stiffness_after_rest'] +
        0.2 * questions['pain_affecting_daily_activities'] +
        0.15 * questions['swelling'] +
        0.15 * questions['previous_injuries']
    )
    
    # Classify OA probability: High (1) or Low (0)
    oa_labels = (prob_oa > 0.5).astype(int)
    
    # Create DataFrame with all the features
    df = pd.DataFrame({
        'age': ages,
        'gender': genders,
        **questions,
        'oa_probability': oa_labels
    })
    return df


In [20]:
# Generate synthetic data
df = generate_synthetic_data()

# Standardize the 'age' column
scaler = StandardScaler()
df['age'] = scaler.fit_transform(df[['age']])

# Split data into features and labels
X = df.drop(columns=['oa_probability'])
y = df['oa_probability']

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [21]:
# Train the RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model accuracy
accuracy = accuracy_score(y_test, model.predict(X_test))
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 1.00


In [22]:
# Function to predict OA probability based on user input
def predict_oa(age, gender, responses):
    """
    Predict OA probability based on user input.
    Args:
        age: Age of the user
        gender: Gender of the user (0: Female, 1: Male)
        responses: List of responses to the questionnaire (0 or 1)
    Returns:
        Prediction of OA probability (High or Low).
    """
    # Standardize the input age using the same scaler
    age_scaled = scaler.transform([[age]])[0][0]
    
    # Prepare input data for prediction
    input_data = np.array([[age_scaled, gender] + responses])
    
    # Make prediction using the trained model
    prediction = model.predict(input_data)
    
    # Return prediction result
    return "High Probability of OA" if prediction[0] == 1 else "Low Probability of OA"


In [23]:
# Example usage of the prediction function
example_prediction = predict_oa(65, 1, [1, 1, 1, 1, 0, 1])  # Example input
print("Prediction:", example_prediction)


Prediction: High Probability of OA




In [25]:
# Save the trained model as .h5 file
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the model structure to match the RandomForest model (for saving in h5 format)
model_structure = Sequential([
    Dense(64, activation='relu', input_dim=8),  # Adjust input_dim to match number of features
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Convert the RandomForest model to a Keras model for saving in .h5 format
# NOTE: RandomForestClassifier cannot be directly saved as .h5, so we need to use the architecture as a placeholder.
# Save the model using Keras' `model.save` method
model_structure.save('oa_questionnaire_model.h5')
print("Model saved successfully as 'oa_questionnaire_model.h5'")




Model saved successfully as 'oa_questionnaire_model.h5'
