In [1]:
# ml_dashboard.py
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# App Title
st.title("ML-Powered Data Dashboard")
st.write("Upload a dataset, visualize it, and build machine learning models!")

# File Upload
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

if uploaded_file is not None:
    # Load dataset
    df = pd.read_csv(uploaded_file)
    st.subheader("Dataset Preview")
    st.dataframe(df.head())

    # Display dataset info
    st.subheader("Dataset Info")
    st.write("Shape:", df.shape)
    st.write("Columns:", list(df.columns))
    st.write("Data Types:")
    st.write(df.dtypes)

    # Handle missing values (simple drop for now)
    if st.checkbox("Drop rows with missing values"):
        df = df.dropna()
        st.write("Updated Shape (after dropping rows):", df.shape)

    # ML Section
    st.subheader("Machine Learning Model Builder")

    # Select Target Variable
    target = st.selectbox("Select the target variable", df.columns)

    # Exclude target variable from features
    features = df.drop(columns=[target])
    feature_columns = features.columns

    # Encode categorical variables if any
    if features.select_dtypes(include="object").shape[1] > 0:
        features = pd.get_dummies(features)
        st.write("Encoded Features:", features.columns)

    # Split Data
    X_train, X_test, y_train, y_test = train_test_split(
        features, df[target], test_size=0.2, random_state=42
    )

    # Select Task Type
    task = st.radio("Select Task", ["Regression", "Classification"])

    if task == "Classification":
        # Train Random Forest Classifier
        model = RandomForestClassifier()
        model.fit(X_train, y_train)

        # Evaluate Model
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        st.write(f"Model Accuracy: {accuracy:.2f}")

        # Feature Importance
        st.subheader("Feature Importance")
        feature_importance = pd.Series(model.feature_importances_, index=feature_columns)
        st.bar_chart(feature_importance)

    elif task == "Regression":
        # Train Random Forest Regressor
        model = RandomForestRegressor()
        model.fit(X_train, y_train)

        # Evaluate Model
        y_pred = model.predict(X_test)
        rmse = mean_squared_error(y_test, y_pred, squared=False)
        st.write(f"Model RMSE: {rmse:.2f}")

        # Feature Importance
        st.subheader("Feature Importance")
        feature_importance = pd.Series(model.feature_importances_, index=feature_columns)
        st.bar_chart(feature_importance)

    # Prediction Section
    st.subheader("Make Predictions")
    st.write("Input data for prediction:")
    user_input = {}
    for col in feature_columns:
        user_input[col] = st.number_input(f"Value for {col}", value=0.0)
    input_df = pd.DataFrame([user_input])

    if st.button("Predict"):
        prediction = model.predict(input_df)
        st.write(f"Prediction: {prediction[0]}")

else:
    st.write("Upload a CSV file to get started.")

2024-11-19 13:31:43.194 
  command:

    streamlit run /Users/steven/.pyenv/versions/okc/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
