#Set Up the Environment

In [None]:
!pip install pandas scikit-learn matplotlib seaborn shap joblib streamlit


Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Downloading streamlit-1.39.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.3/79.3 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[

#Load and Preprocess the Dataset

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('/content/user_behavior_dataset.csv')

# Encode categorical variables
label_encoder = LabelEncoder()
data['Device Model'] = label_encoder.fit_transform(data['Device Model'])
data['Operating System'] = label_encoder.fit_transform(data['Operating System'])
data['Gender'] = label_encoder.fit_transform(data['Gender'])

# Split features and target variable
X = data.drop(['User ID', 'User Behavior Class'], axis=1)
y = data['User Behavior Class']

# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


#Train a RandomForest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Train the RandomForest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_report(y_test, y_pred))


Model Accuracy: 100.00%
Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00        27
           2       1.00      1.00      1.00        29
           3       1.00      1.00      1.00        34
           4       1.00      1.00      1.00        27
           5       1.00      1.00      1.00        23

    accuracy                           1.00       140
   macro avg       1.00      1.00      1.00       140
weighted avg       1.00      1.00      1.00       140



#Explainable AI (XAI) Using SHAP

In [None]:
import shap

# Initialize SHAP Explainer
explainer = shap.TreeExplainer(rf_model)

# Calculate SHAP values
shap_values = explainer.shap_values(X_test)

# Plot SHAP summary plot for one class (or average across classes)
shap.summary_plot(shap_values[1], X_test, feature_names=X.columns)


AssertionError: The shape of the shap_values matrix does not match the shape of the provided data matrix.

#Save the Model Using joblib

In [None]:
import joblib

# Save the trained model
joblib.dump(rf_model, 'user_behavior_model.pkl')
print("Model saved as 'user_behavior_model.pkl'")


Model saved as 'user_behavior_model.pkl'


#Build a Streamlit Dashboard

In [None]:
!pip install streamlit
import streamlit as st
import pandas as pd
import joblib

# Load the trained model
model = joblib.load('user_behavior_model.pkl')

# Define a function for app recommendations
def recommend_apps(user_class):
    recommendations = {
        1: ['Instagram', 'YouTube', 'WhatsApp'],
        2: ['LinkedIn', 'Outlook', 'Zoom'],
        3: ['Netflix', 'Spotify', 'Reddit'],
        4: ['Google Maps', 'Uber', 'TripAdvisor']
    }
    return recommendations.get(user_class, ['No recommendations available'])

# Streamlit app UI
st.title("User Behavior Prediction and App Recommendation")

uploaded_file = st.file_uploader("Upload a CSV File", type="csv")

if uploaded_file:
    data = pd.read_csv(uploaded_file)

    # Display uploaded data
    st.write("Uploaded Data:", data.head())

    # Make predictions
    predictions = model.predict(data)
    data['Predicted Class'] = predictions

    # Display predictions
    st.write("Predictions:", data[['Predicted Class']])

    # Provide app recommendations
    class_selection = st.selectbox("Select User Class for Recommendations", [1, 2, 3, 4])
    apps = recommend_apps(class_selection)
    st.write(f"Recommended Apps for Class {class_selection}: {apps}")




2024-10-26 07:23:41.065 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
