# Process Human Data

In [1]:
import pandas as pd

# load data 
data = pd.read_csv('../simulation/data_w_simulation.csv')

print(data.shape)

# remove 'Finished' having value 0
data = data[data['Finished'] == 1]

# dimensions of the data
print(data.shape)

(356, 240)
(350, 240)


In [2]:
# remove '8) English language reading/comprehension ability:' having value that is not 5
print(data.shape)
data = data[data['8) English language reading/comprehension ability:'] == 5]
print(data.shape)

(350, 240)
(290, 240)


In [3]:
# exclude the 18th column having null values
print(data.shape)
data = data.dropna(subset=[data.columns[17]])
print(data.shape)

(290, 240)
(276, 240)


In [4]:
import pandas as pd
import statsmodels.api as sm

# Define the predictors and target variables
predictors = ['openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism']
# targets = ['ethic_sum', 'risk_sum']
targets = ['ethic_1_1', 'ethic_2_1', 'ethic_3_1', 'ethic_4_1', 'ethic_5_1']
# Function to perform simple linear regression and print results
def simple_linear_regression(X, y, predictor_name, target_name):
    X = sm.add_constant(X)  # Add a constant term to the predictor
    model = sm.OLS(y, X).fit()
    
    print(f"\nRegression Results for {predictor_name} predicting {target_name}")
    print(f"Coefficient: {model.params.iloc[1]:.4f}")
    print(f"P-value: {model.pvalues.iloc[1]:.4f}")
    print(f"R-squared: {model.rsquared:.4f}")
    print(f"Adjusted R-squared: {model.rsquared_adj:.4f}")
    print(f"Standard Error: {model.bse.iloc[1]:.4f}")
    print(f"T-statistic: {model.tvalues.iloc[1]:.4f}")
    print("\nModel Summary:")
    print(model.summary().tables[1])

# Perform regression for each predictor and target combination
for target in targets:
    print(f"\n{'='*50}")
    print(f"Analysis for {target}")
    print(f"{'='*50}")
    for predictor in predictors:
        X = data[predictor]
        y = data[target]
        simple_linear_regression(X, y, predictor, target)

print("\nRegression analysis complete.")


Analysis for ethic_1_1

Regression Results for openness predicting ethic_1_1
Coefficient: -0.0566
P-value: 0.0055
R-squared: 0.0278
Adjusted R-squared: 0.0242
Standard Error: 0.0202
T-statistic: -2.7976

Model Summary:
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.2910      0.893      9.284      0.000       6.533      10.049
openness      -0.0566      0.020     -2.798      0.006      -0.096      -0.017

Regression Results for conscientiousness predicting ethic_1_1
Coefficient: 0.0307
P-value: 0.1842
R-squared: 0.0064
Adjusted R-squared: 0.0028
Standard Error: 0.0230
T-statistic: 1.3314

Model Summary:
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 4.4917      1.017      4.417      0.000       2.490       6.494
con

## Examine the specific risk taking columns

In [5]:
# import pandas as pd
import statsmodels.api as sm

# Define the predictors and target variables
predictors = ['openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism']
targets = ['ethic_sum', 'risk_sum']

# Function to perform simple linear regression and print results
def simple_linear_regression(X, y, predictor_name, target_name):
    X = sm.add_constant(X)  # Add a constant term to the predictor
    model = sm.OLS(y, X).fit()
    
    print(f"\nRegression Results for {predictor_name} predicting {target_name}")
    print(f"Coefficient: {model.params.iloc[1]:.4f}")
    print(f"P-value: {model.pvalues.iloc[1]:.4f}")
    print(f"R-squared: {model.rsquared:.4f}")
    print(f"Adjusted R-squared: {model.rsquared_adj:.4f}")
    print(f"Standard Error: {model.bse.iloc[1]:.4f}")
    print(f"T-statistic: {model.tvalues.iloc[1]:.4f}")
    print("\nModel Summary:")
    print(model.summary().tables[1])

# Perform regression for each predictor and target combination
for target in targets:
    print(f"\n{'='*50}")
    print(f"Analysis for {target}")
    print(f"{'='*50}")
    for predictor in predictors:
        X = data[predictor]
        y = data[target]
        simple_linear_regression(X, y, predictor, target)

print("\nRegression analysis complete.")


Analysis for ethic_sum

Regression Results for openness predicting ethic_sum
Coefficient: -0.0402
P-value: 0.4486
R-squared: 0.0021
Adjusted R-squared: -0.0015
Standard Error: 0.0530
T-statistic: -0.7589

Model Summary:
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         28.0792      2.340     11.999      0.000      23.472      32.686
openness      -0.0402      0.053     -0.759      0.449      -0.145       0.064

Regression Results for conscientiousness predicting ethic_sum
Coefficient: 0.2294
P-value: 0.0001
R-squared: 0.0537
Adjusted R-squared: 0.0503
Standard Error: 0.0581
T-statistic: 3.9445

Model Summary:
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                16.3242      2.567      6.360      0.000      11.271      21.377
co

# Process Simulation Data

In [6]:
# List of thresholds
thresholds = [0]

# List of base target names
base_targets = ['moral', 'risk']

# Generate all target names
targets = [f"{base}_{threshold}_sum" for base in base_targets for threshold in thresholds]

# Perform regression for each predictor and target combination
for target in targets:
    print(f"\n{'='*50}")
    print(f"Analysis for {target}")
    print(f"{'='*50}")
    for predictor in predictors:
        X = data[predictor]
        y = data[target]
        simple_linear_regression(X, y, predictor, target)

print("\nRegression analysis complete.")


Analysis for moral_0_sum

Regression Results for openness predicting moral_0_sum
Coefficient: -0.1376
P-value: 0.0050
R-squared: 0.0283
Adjusted R-squared: 0.0248
Standard Error: 0.0487
T-statistic: -2.8268

Model Summary:
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         24.8678      2.149     11.574      0.000      20.638      29.098
openness      -0.1376      0.049     -2.827      0.005      -0.233      -0.042

Regression Results for conscientiousness predicting moral_0_sum
Coefficient: 0.1399
P-value: 0.0115
R-squared: 0.0231
Adjusted R-squared: 0.0195
Standard Error: 0.0550
T-statistic: 2.5441

Model Summary:
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                12.7824      2.427      5.267      0.000       8.005      17.5

In [7]:
# List of thresholds
thresholds = [1]

# List of base target names
base_targets = ['moral', 'risk']

moral_scenarios = ["Confidential_Info","Underage_Drinking","Exam_Cheating","Honest_Feedback","Workplace_Theft"]

# Generate all target names
targets = [f"moral_{threshold}_{scenario}" for scenario in moral_scenarios for threshold in thresholds]

# Perform regression for each predictor and target combination
for target in targets:
    print(f"\n{'='*50}")
    print(f"Analysis for {target}")
    print(f"{'='*50}")
    for predictor in predictors:
        X = data[predictor]
        y = data[target]
        simple_linear_regression(X, y, predictor, target)

print("\nRegression analysis complete.")


Analysis for moral_1_Confidential_Info

Regression Results for openness predicting moral_1_Confidential_Info
Coefficient: -0.0319
P-value: 0.0178
R-squared: 0.0203
Adjusted R-squared: 0.0168
Standard Error: 0.0134
T-statistic: -2.3847

Model Summary:
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.4857      0.591      5.900      0.000       2.323       4.649
openness      -0.0319      0.013     -2.385      0.018      -0.058      -0.006

Regression Results for conscientiousness predicting moral_1_Confidential_Info
Coefficient: 0.0017
P-value: 0.9115
R-squared: 0.0000
Adjusted R-squared: -0.0036
Standard Error: 0.0152
T-statistic: 0.1112

Model Summary:
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 2.0239      0.672 