In [12]:
# Import necessary libraries
import streamlit as st
import pandas as pd
import numpy as np
from pycaret.classification import *
from pycaret.regression import *

# Define functions for data loading, EDA, and model training
def load_data(file_path):
    # Function to load data from file_path
    data = pd.read_csv(file_path)  # Example: For CSV files
    return data

def perform_eda(data):
    """Performs exploratory data analysis."""
    st.write(data.head())  # Display first few rows
    st.subheader("Data Types")
    st.write(data.dtypes)

    # Add more EDA functionalities
    import matplotlib.pyplot as plt

    # Distribution of numerical features (optional)
    for col in data.select_dtypes(include=[np.number]):
        data[col].hist(figsize=(5, 4))
        plt.xlabel(col)
        plt.ylabel("Count")
        plt.title(f"Distribution of {col}")
        plt.xticks(rotation=45)
        plt.tight_layout()
        st.pyplot()

    # Missing value analysis
    missing_values = data.isnull().sum()
    st.subheader("Missing Values")
    st.write(missing_values)

def train_model(data, target_variable, selected_models):
    # Function to train machine learning models using PyCaret
    if data[target_variable].dtype == 'object':
        task = 'classification'
    else:
        task = 'regression'
        
    if task == 'classification':
        clf = setup(data, target=target_variable)
        best_model = compare_models(include=selected_models)
    elif task == 'regression':
        reg = setup(data, target=target_variable)
        best_model = compare_models(include=selected_models)
    return best_model

# Create Streamlit web app
st.title('Machine Learning App with PyCaret and Streamlit')
st.sidebar.title('Options')
st.set_option('deprecation.showPyplotGlobalUse', False)

# Sidebar options
file_uploaded = st.sidebar.file_uploader('Upload Dataset (CSV format)', type=['csv'])

if file_uploaded is not None:
    data = load_data(file_uploaded)
    st.success('Data successfully loaded!')

    st.subheader('Exploratory Data Analysis')
    perform_eda(data)
    target_variable = st.selectbox('Select Target Variable', data.columns)
    experiment = setup(data, target=target_variable)
    table_df = experiment.pull() 
    st.dataframe(table_df)
    st.subheader('Machine Learning Model Training')
    selected_models = st.multiselect('Select Models to Train', ['lr', 'knn', 'dt', 'rf', 'xgboost'])

    if st.button('Train Model'):
        st.write('Best Model:')
        best_model = train_model(data, target_variable, selected_models)
        st.write(best_model)

2024-04-24 01:51:17.182 
════════════════════════════════════════════════
deprecation.showPyplotGlobalUse IS NO LONGER SUPPORTED.

The support for global pyplot instances is planned to be removed soon.

Please update <user defined>.
════════════════════════════════════════════════

