In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the Adult dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = [
    "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
    "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
    "hours-per-week", "native-country", "income"
]
data = pd.read_csv(url, names=column_names, skipinitialspace=True, na_values="?")

# Data preprocessing
data = data.dropna()
data['income'] = (data['income'] == '>50K').astype(int)

# One-hot encode categorical variables
data_encoded = pd.get_dummies(data, columns=[
    "workclass", "education", "marital-status", "occupation",
    "relationship", "race", "sex", "native-country"
])

# Split features and target
X = data_encoded.drop('income', axis=1)
y = data_encoded['income']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Data preparation completed.")

Data preparation completed.


In [2]:
class RecursiveRewardModel:
    def __init__(self):
        self.subtask_models = {}
        self.feature_groups = {
            'demographics': ['age', 'race', 'sex', 'native-country'],
            'education': ['education', 'education-num'],
            'employment': ['workclass', 'occupation', 'hours-per-week'],
            'financial': ['capital-gain', 'capital-loss', 'fnlwgt']
        }

    def train_subtask_model(self, feature_group, X, y):
        subtask_features = [col for col in X.columns if any(feat in col for feat in self.feature_groups[feature_group])]
        subtask_X = X[subtask_features]
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(subtask_X, y)
        self.subtask_models[feature_group] = model

    def predict_with_oversight(self, X, overseer):
        subtask_predictions = {}
        for group, model in self.subtask_models.items():
            subtask_features = [col for col in X.columns if any(feat in col for feat in self.feature_groups[group])]
            subtask_X = X[subtask_features]
            subtask_pred = model.predict_proba(subtask_X)[:, 1]
            subtask_predictions[group] = subtask_pred
            
            # Overseer reviews subtask predictions
            if not overseer.review_subtask(group, subtask_pred):
                print(f"Overseer intervened in {group} subtask.")
                subtask_predictions[group] = overseer.adjust_prediction(subtask_pred)

        # Combine subtask predictions (simple average for demonstration)
        final_prediction = np.mean(list(subtask_predictions.values()), axis=0)
        return (final_prediction > 0.5).astype(int)

class HumanOverseer:
    def review_subtask(self, subtask_name, predictions):
        # Simulate human oversight (in reality, this would involve human judgment)
        return np.random.random() > 0.2  # 20% chance of intervention

    def adjust_prediction(self, predictions):
        # Simulate human adjustment (in reality, this would involve human decision-making)
        return predictions + np.random.normal(0, 0.1, predictions.shape)

# Usage
rrm = RecursiveRewardModel()
overseer = HumanOverseer()

# Train subtask models
for group in rrm.feature_groups:
    rrm.train_subtask_model(group, X_train, y_train)

# Make predictions with oversight
y_pred = rrm.predict_with_oversight(X_test, overseer)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with Recursive Reward Modeling and oversight: {accuracy:.4f}")

Overseer intervened in demographics subtask.
Accuracy with Recursive Reward Modeling and oversight: 0.8071


In [4]:
import random
import numpy as np
from sklearn.inspection import permutation_importance

class DebateAgent:
    def __init__(self, name, model, X, y, feature_names):
        self.name = name
        self.model = model
        self.feature_names = feature_names
        self.feature_importance = self.calculate_feature_importance(X, y)

    def calculate_feature_importance(self, X, y):
        perm_importance = permutation_importance(self.model, X, y, n_repeats=10, random_state=42)
        return {feature: importance for feature, importance in zip(self.feature_names, perm_importance.importances_mean)}

    def generate_argument(self):
        feature, importance = max(self.feature_importance.items(), key=lambda x: x[1])
        return f"{self.name} argues that '{feature}' is the most important feature with an importance score of {importance:.4f}"

    def cross_examine(self, argument):
        feature = argument.split("'")[1]
        own_importance = self.feature_importance[feature]
        return f"{self.name} questions: Is '{feature}' really that important? In my analysis, it has an importance of only {own_importance:.4f}"

def debate_round(agent1, agent2):
    arg1 = agent1.generate_argument()
    arg2 = agent2.generate_argument()
    cross1 = agent1.cross_examine(arg2)
    cross2 = agent2.cross_examine(arg1)
    return [arg1, arg2, cross1, cross2]

def human_judge(debate_transcript):
    # In a real scenario, this would involve human judgment
    return random.choice(["Agent 1 wins", "Agent 2 wins"])

# Assuming X_train and X_train_scaled are available from previous preprocessing steps
feature_names = X_train.columns.tolist()

# Train two different models for debate
model1 = RandomForestClassifier(n_estimators=100, random_state=42)
model2 = RandomForestClassifier(n_estimators=100, random_state=24)
model1.fit(X_train_scaled, y_train)
model2.fit(X_train_scaled, y_train)

# Create debate agents
agent1 = DebateAgent("Agent 1", model1, X_train_scaled, y_train, feature_names)
agent2 = DebateAgent("Agent 2", model2, X_train_scaled, y_train, feature_names)

# Conduct debate
debate_transcript = debate_round(agent1, agent2)
for argument in debate_transcript:
    print(argument)

result = human_judge(debate_transcript)
print(f"\nJudgement: {result}")

KeyboardInterrupt: 

In [None]:
class CognitiveSubtask:
    def __init__(self, name, model, features):
        self.name = name
        self.model = model
        self.features = features

    def execute(self, X):
        return self.model.predict_proba(X[self.features])[:, 1]

def train_subtask_model(X, y, features):
    model = RandomForestClassifier(n_estimators=50, random_state=42)
    model.fit(X[features], y)
    return model

def human_oversight(subtask_name, predictions):
    # Simulate human oversight (in reality, this would involve human judgment)
    print(f"Human reviewing {subtask_name} predictions...")
    return predictions  # In this simple example, we're not modifying the predictions

# Define subtasks
subtasks = [
    CognitiveSubtask("Demographics", train_subtask_model(X_train, y_train, ['age', 'race_White', 'sex_Male']), 
                     ['age', 'race_White', 'sex_Male']),
    CognitiveSubtask("Education", train_subtask_model(X_train, y_train, ['education-num', 'education_Bachelors']), 
                     ['education-num', 'education_Bachelors']),
    CognitiveSubtask("Employment", train_subtask_model(X_train, y_train, ['hours-per-week', 'workclass_Private']), 
                     ['hours-per-week', 'workclass_Private'])
]

# Execute factored cognition process
subtask_results = []
for subtask in subtasks:
    result = subtask.execute(X_test)
    overseen_result = human_oversight(subtask.name, result)
    subtask_results.append(overseen_result)

# Combine subtask results (simple average for demonstration)
final_prediction = np.mean(subtask_results, axis=0)
y_pred = (final_prediction > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy with Factored Cognition approach: {accuracy:.4f}")

In [None]:
class AdaptiveOversightProtocol:
    def __init__(self, model, confidence_threshold):
        self.model = model
        self.confidence_threshold = confidence_threshold
        self.total_predictions = 0
        self.human_reviewed_predictions = 0

    def predict_with_oversight(self, X):
        self.total_predictions += len(X)
        probabilities = self.model.predict_proba(X)
        confidence = np.max(probabilities, axis=1)
        predictions = np.argmax(probabilities, axis=1)

        for i, (pred, conf) in enumerate(zip(predictions, confidence)):
            if conf < self.confidence_threshold:
                self.human_reviewed_predictions += 1
                # Simulate human review (in reality, this would involve human judgment)
                print(f"Low confidence prediction ({conf:.4f}). Human review required.")
                # For simplicity, we're not changing the prediction here
            else:
                print(f"High confidence prediction ({conf:.4f}). No human review required.")

        return predictions

    def get_oversight_stats(self):
        return f"Total predictions: {self.total_predictions}, Human-reviewed predictions: {self.human_reviewed_predictions}"

# Train a model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Create adaptive oversight protocol
protocol = AdaptiveOversightProtocol(model, confidence_threshold=0.8)

# Make predictions with adaptive oversight
y_pred = protocol.predict_with_oversight(X_test_scaled[:20])  # Using only first 20 samples for brevity

accuracy = accuracy_score(y_test[:20], y_pred)
print(f"\nAccuracy with Adaptive Oversight: {accuracy:.4f}")
print(protocol.get_oversight_stats())