# Gradio Demo: animeganv2
### Recreate the viral AnimeGAN image transformation demo.
        

In [5]:
!pip install pandas scikit-learn flask



In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

data = pd.read_csv("Disease_symptom_and_patient_profile_dataset.csv")
X = data.drop("Disease", axis=1)
y = data["Outcome Variable"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Check accuracy
accuracy = model.score(X_test, y_test)
print(f"Model trained with accuracy: {accuracy * 100:.2f}%")

# Prediction function
def predict_disease(symptoms):
    input_data = pd.DataFrame([symptoms])  # Symptom data as input format
    probabilities = model.predict_proba(input_data)
    predicted_disease = model.predict(input_data)[0]
    confidence = max(probabilities[0])

    if confidence < 0.9:
        return "Low confidence in prediction. Please consult a doctor."
    return f"Predicted Disease: {predicted_disease} (Confidence: {confidence:.2f})"


ValueError: could not convert string to float: 'Yes'

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder  # Import OneHotEncoder
import numpy as np # Import numpy for array manipulation


data = pd.read_csv("Disease_symptom_and_patient_profile_dataset.csv")
X = data.drop("Disease", axis=1)
y = data["Outcome Variable"]

# Identify categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns

# Create OneHotEncoder
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') # sparse=False for dense output

# Fit and transform the categorical features
encoded_data = encoder.fit_transform(X[categorical_cols])

# Create a DataFrame from the encoded data
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(categorical_cols))

# Drop original categorical columns and concatenate encoded columns
X = X.drop(categorical_cols, axis=1)
X = pd.concat([X, encoded_df], axis=1)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Check accuracy
accuracy = model.score(X_test, y_test)
print(f"Model trained with accuracy: {accuracy * 100:.2f}%")

# Prediction function
def predict_disease(symptoms):
    # Ensure symptoms are in the same format as training data (with encoding)
    input_data = pd.DataFrame([symptoms], columns=X.columns)

    # Apply one-hot encoding to the input data
    categorical_input = input_data[categorical_cols]
    encoded_input = encoder.transform(categorical_input)
    encoded_input_df = pd.DataFrame(encoded_input, columns=encoder.get_feature_names_out(categorical_cols))

    # Drop original categorical columns and concatenate encoded columns
    input_data = input_data.drop(categorical_cols, axis=1)
    input_data = pd.concat([input_data, encoded_input_df], axis=1)


    probabilities = model.predict_proba(input_data)
    predicted_disease = model.predict(input_data)[0]
    confidence = max(probabilities[0])

    if confidence < 0.9:
        return "Low confidence in prediction. Please consult a doctor."
    return f"Predicted Disease: {predicted_disease} (Confidence: {confidence:.2f})"


Model trained with accuracy: 100.00%
