In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l1_l2
import numpy as np

# Load the dataset
data = pd.read_csv('social_engagement_balanced.csv')

# Define features and target
X = data.drop(['text_data', 'overloaded'], axis=1)
y = data['overloaded'].astype(int)

# Apply one-hot encoding to categorical features
X = pd.get_dummies(X, columns=['event_type', 'location', 'time_of_day'], drop_first=True)

# Standardize numerical features
scaler = StandardScaler()
numerical_features = ['duration', 'engagement_level']
X[numerical_features] = scaler.fit_transform(X[numerical_features])

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Apply SMOTE to balance the training data
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Build the improved neural network model
model = Sequential([
    Input(shape=(X_train_smote.shape[1],)),
    Dense(256, activation='relu', kernel_regularizer=l1_l2(l1=0.0001, l2=0.001)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(128, activation='relu', kernel_regularizer=l1_l2(l1=0.0001, l2=0.001)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu', kernel_regularizer=l1_l2(l1=0.0001, l2=0.001)),
    BatchNormalization(),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
model.fit(X_train_smote, y_train_smote, epochs=20, batch_size=16, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')


Epoch 1/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 6ms/step - accuracy: 0.8992 - loss: 0.7673 - val_accuracy: 0.9936 - val_loss: 0.4320
Epoch 2/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - accuracy: 0.9655 - loss: 0.4646 - val_accuracy: 0.9994 - val_loss: 0.2700
Epoch 3/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.9831 - loss: 0.2867 - val_accuracy: 1.0000 - val_loss: 0.1654
Epoch 4/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.9905 - loss: 0.1782 - val_accuracy: 1.0000 - val_loss: 0.1088
Epoch 5/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.9924 - loss: 0.1253 - val_accuracy: 1.0000 - val_loss: 0.0803
Epoch 6/20
[1m1251/1251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - accuracy: 0.9947 - loss: 0.0945 - val_accuracy: 1.0000 - val_loss: 0.0714
Epoch 7/20
[1m

In [5]:
model.save('cognitive_engagement_1_model.h5')



In [6]:

# Example 1: Likely Overloaded Person
overloaded_input = pd.DataFrame({
    'duration': [120],          # 2-hour long activity
    'engagement_level': [9],    # High engagement required
    'event_type': ['meeting'],  # Complex meeting
    'location': ['office'],     # Office environment
    'time_of_day': ['afternoon']# Afternoon (when fatigue might be higher)
})

# Example 2: Likely Not Overloaded Person
underloaded_input = pd.DataFrame({
    'duration': [30],           # Short 30-minute activity
    'engagement_level': [3],    # Low engagement required
    'event_type': ['break'],    # Taking a break
    'location': ['home'],       # Comfortable home environment
    'time_of_day': ['morning']  # Fresh morning hours
})

model = tf.keras.models.load_model('cognitive_engagement_1_model.h5')
# Process and predict for both examples
def predict_cognitive_load(input_data):
    # One-hot encode categorical features
    processed_input = pd.get_dummies(input_data, columns=['event_type', 'location', 'time_of_day'])
    
    # Ensure all columns from training data are present
    missing_cols = set(X.columns) - set(processed_input.columns)
    for col in missing_cols:
        processed_input[col] = 0
    processed_input = processed_input[X.columns]
    
    # Scale numerical features
    processed_input[numerical_features] = scaler.transform(processed_input[numerical_features])
    
    # Make prediction
    prediction = model.predict(processed_input)
    probability = prediction[0][0]
    
    return probability

# Test both scenarios
print("=== Testing Overloaded Scenario ===")
overload_prob = predict_cognitive_load(overloaded_input)
print(f"Input parameters:")
print(overloaded_input.to_string())
print(f"Probability of cognitive overload: {overload_prob:.2f}")
print(f"Is person likely to be overloaded? {'Yes' if overload_prob >= 0.5 else 'No'}\n")

print("=== Testing Underloaded Scenario ===")
underload_prob = predict_cognitive_load(underloaded_input)
print(f"Input parameters:")
print(underloaded_input.to_string())
print(f"Probability of cognitive overload: {underload_prob:.2f}")
print(f"Is person likely to be overloaded? {'Yes' if underload_prob >= 0.5 else 'No'}")



=== Testing Overloaded Scenario ===
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step
Input parameters:
   duration  engagement_level event_type location time_of_day
0       120                 9    meeting   office   afternoon
Probability of cognitive overload: 1.00
Is person likely to be overloaded? Yes

=== Testing Underloaded Scenario ===
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Input parameters:
   duration  engagement_level event_type location time_of_day
0        30                 3      break     home     morning
Probability of cognitive overload: 0.00
Is person likely to be overloaded? No


In [7]:
import joblib

# Save the model (you've already done this)
model.save('cognitive_engagement_model.h5')

# Save the scaler
joblib.dump(scaler, 'scaler.joblib')

# Save the column names
joblib.dump(X.columns.tolist(), 'feature_columns.joblib')



['feature_columns.joblib']

In [None]:
import shap
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from imblearn.over_sampling import SMOTE
import tensorflow as tf
import numpy as np

model = "cognitive_engagement_model.h5"

def analyze_feature_importance(model, X_test):
    """Analyze and visualize feature importance using SHAP values"""
    # Create SHAP explainer
    explainer = shap.KernelExplainer(model.predict, X_test[:100])
    shap_values = explainer.shap_values(X_test[:100])
    
    # Calculate feature importance
    feature_importance = {}
    for i, col in enumerate(X_test.columns):
        feature_importance[col] = np.abs(shap_values[0][:, i]).mean()
    
    # Visualize SHAP values
    plt.figure(figsize=(10, 6))
    shap.summary_plot(shap_values[0], X_test[:100], plot_type="bar")
    plt.title("Feature Importance Analysis")
    plt.tight_layout()
    plt.show()
    
    return feature_importance

def analyze_cognitive_patterns(data):
    """Analyze patterns in cognitive load based on different factors"""
    patterns = {
        'time_patterns': data.groupby('time_of_day')['overloaded'].mean(),
        'location_patterns': data.groupby('location')['overloaded'].mean(),
        'event_patterns': data.groupby('event_type')['overloaded'].mean(),
        'duration_correlation': data['duration'].corr(data['overloaded']),
        'engagement_correlation': data['engagement_level'].corr(data['overloaded'])
    }
    return patterns

def generate_recommendations(input_data, prediction_prob, feature_importance):
    """Generate detailed recommendations based on input factors"""
    recommendations = []
    risk_factors = []
    mitigation_strategies = []
    
    # Duration analysis
    duration = input_data['duration'].iloc[0]
    if duration > 60:
        risk_level = 'high' if duration > 90 else 'moderate'
        risk_factors.append({
            'factor': 'Duration',
            'value': f"{duration} minutes",
            'risk_level': risk_level,
            'impact': 'Extended duration increases cognitive load'
        })
        recommendations.extend([
            "Break down the activity into 45-minute segments",
            f"Include {5 if duration <= 90 else 10} minute breaks between segments",
            "Use Pomodoro technique (25 min work + 5 min break)"
        ])

    # Engagement level analysis
    engagement = input_data['engagement_level'].iloc[0]
    if engagement > 7:
        risk_factors.append({
            'factor': 'Engagement Level',
            'value': f"{engagement}/10",
            'risk_level': 'high' if engagement > 8 else 'moderate',
            'impact': 'High mental engagement can lead to faster fatigue'
        })
        recommendations.extend([
            "Incorporate micro-breaks every 20 minutes",
            "Use active rest techniques (brief stretching, eye exercises)",
            "Alternate between high and low engagement tasks"
        ])

    # Time of day analysis
    time_recommendations = {
        'morning': ["Utilize peak alertness for complex tasks",
                   "Schedule high-engagement activities"],
        'afternoon': ["Take a short walk after lunch",
                     "Use active engagement techniques to maintain focus"],
        'evening': ["Reduce screen brightness",
                   "Focus on wrap-up and planning tasks"]
    }
    
    time_of_day = input_data['time_of_day'].iloc[0]
    recommendations.extend(time_recommendations.get(time_of_day, []))

    # Event type specific recommendations
    event_type = input_data['event_type'].iloc[0]
    event_recommendations = {
        'meeting': [
            "Use structured agenda to maintain focus",
            "Implement 5-minute breaks every 45 minutes",
            "Encourage active participation to maintain engagement"
        ],
        'training': [
            "Include practical exercises",
            "Use varied learning methods",
            "Schedule regular reflection periods"
        ],
        'focus_work': [
            "Use time-blocking technique",
            "Minimize distractions",
            "Set clear milestones"
        ]
    }
    recommendations.extend(event_recommendations.get(event_type, []))

    # Location-based recommendations
    location = input_data['location'].iloc[0]
    location_recommendations = {
        'office': [
            "Use noise-canceling headphones if needed",
            "Find quiet spaces for high-focus work",
            "Adjust environmental factors (lighting, temperature)"
        ],
        'home': [
            "Maintain a dedicated workspace",
            "Establish clear work boundaries",
            "Ensure proper ergonomic setup"
        ],
        'remote': [
            "Take regular screen breaks",
            "Maintain virtual social connections",
            "Set up proper lighting for video calls"
        ]
    }
    recommendations.extend(location_recommendations.get(location, []))

    # Calculate overall risk score (0-100)
    risk_score = int(prediction_prob * 100)
    risk_level = 'High' if risk_score > 75 else 'Moderate' if risk_score > 50 else 'Low'

    return {
        'overload_probability': prediction_prob,
        'risk_score': risk_score,
        'risk_level': risk_level,
        'risk_factors': risk_factors,
        'recommendations': list(set(recommendations)),  # Remove duplicates
        'primary_contributors': sorted(
            feature_importance.items(),
            key=lambda x: abs(x[1]),
            reverse=True
        )[:3]  # Top 3 contributing factors
    }

def predict_and_analyze(input_data, model, scaler, X_test):
    """Main function to predict and provide detailed analysis"""
    # Preprocess input
    processed_input = pd.get_dummies(input_data, columns=['event_type', 'location', 'time_of_day'])
    
    # Ensure all columns from training data are present
    missing_cols = set(X.columns) - set(processed_input.columns)
    for col in missing_cols:
        processed_input[col] = 0
    processed_input = processed_input[X.columns]
    
    # Scale numerical features
    processed_input[numerical_features] = scaler.transform(processed_input[numerical_features])
    
    # Get prediction and feature importance
    prediction = model.predict(processed_input)
    probability = prediction[0][0]
    feature_importance = analyze_feature_importance(model, X_test)
    
    # Generate detailed analysis
    analysis = generate_recommendations(input_data, probability, feature_importance)
    
    # Create detailed report
    report = f"""
    Cognitive Load Analysis Report
    ============================
    
    Overall Assessment:
    ------------------
    Risk Level: {analysis['risk_level']}
    Overload Probability: {analysis['overload_probability']:.2f}
    Risk Score: {analysis['risk_score']}/100
    
    Key Risk Factors:
    ----------------
    """
    
    for factor in analysis['risk_factors']:
        report += f"\n• {factor['factor']}: {factor['value']}"
        report += f"\n  Impact: {factor['impact']}"
        report += f"\n  Risk Level: {factor['risk_level'].title()}"
    
    report += "\n\nTop Contributing Factors:"
    report += "\n----------------------"
    for factor, importance in analysis['primary_contributors']:
        report += f"\n• {factor}: {abs(importance):.3f}"
    
    report += "\n\nRecommendations:"
    report += "\n---------------"
    for i, rec in enumerate(analysis['recommendations'], 1):
        report += f"\n{i}. {rec}"
    
    return analysis, report

# Example usage:
test_input = pd.DataFrame({
    'duration': [120],
    'engagement_level': [8],
    'event_type': ['meeting'],
    'location': ['office'],
    'time_of_day': ['afternoon']
})

analysis, report = predict_and_analyze(test_input, model, scaler, X_test)
print(report)

# Visualize the analysis
def visualize_analysis(analysis):
    """Create visualizations for the analysis"""
    # Risk score gauge
    plt.figure(figsize=(15, 5))
    
    # Plot 1: Risk Score Gauge
    plt.subplot(131)
    plt.pie([analysis['risk_score'], 100-analysis['risk_score']], 
            colors=['red' if analysis['risk_score'] > 75 else 'orange' if analysis['risk_score'] > 50 else 'green', 'lightgray'],
            startangle=90)
    plt.title(f"Risk Score: {analysis['risk_score']}%")
    
    # Plot 2: Top Contributors
    plt.subplot(132)
    contributors = analysis['primary_contributors']
    plt.bar([x[0] for x in contributors], [abs(x[1]) for x in contributors])
    plt.xticks(rotation=45)
    plt.title("Top Contributing Factors")
    
    plt.tight_layout()
    plt.show()

# Visualize the analysis
visualize_analysis(analysis)

NameError: name 'scaler' is not defined