In [1]:
import pandas as pd
import numpy as np

# Load the original CSV file
file_path = 'Employee_testdata_with_performance.csv'
df = pd.read_csv(file_path)

# Define the number of samples to generate
num_samples = df.shape[0]

# Agent-based modeling simulation for generating synthetic data
np.random.seed(42)  # For reproducibility

def agent_behavior(performance):
    # Simulate behavior based on performance
    base_value = performance / 100  # Normalize performance to a value between 0 and 1
    engagement = np.random.normal(base_value, 0.1)
    collaboration = np.random.normal(base_value, 0.1)
    flexibility = np.random.normal(base_value, 0.1)
    
    # Ensure values are within the range [0, 1]
    engagement = np.clip(engagement, 0, 1)
    collaboration = np.clip(collaboration, 0, 1)
    flexibility = np.clip(flexibility, 0, 1)
    
    return engagement * 100, collaboration * 100, flexibility * 100

# Generate synthetic data
synthetic_data = [agent_behavior(performance) for performance in df['Performance']]
synthetic_df = pd.DataFrame(synthetic_data, columns=['TeamEngagement', 'Collaboration', 'Flexibility'])

# Merge the synthetic data with the original data
df_updated = pd.concat([df, synthetic_df], axis=1)

# Save the updated DataFrame to a new CSV file
updated_file_path = 'Employee_testdata_with_synthetic_behavior_agent_based.csv'
df_updated.to_csv(updated_file_path, index=False)

# Display the first few rows of the updated dataframe
df_updated.head()


Unnamed: 0,EmployeeID,Education,JoiningYear,City,Country,Region,PaymentTier,Age,Gender,ExperienceInCurrentDomain,Performance,TeamEngagement,Collaboration,Flexibility
0,1,Bachelors,2017,Toronto,Canada,North America,3,34,Male,0,74.967142,79.934283,73.584499,81.444027
1,2,Bachelors,2013,Pune,India,Asia,1,28,Female,3,68.617357,83.847656,66.275823,66.275987
2,3,Bachelors,2014,New Delhi,India,Asia,3,38,Female,2,76.476885,92.269014,84.151233,71.782142
3,4,Masters,2016,Vancouver,Canada,North America,3,27,Male,5,85.230299,90.655899,80.596122,80.573001
4,5,Masters,2017,Berlin,Germany,Europe,3,24,Male,2,67.658466,70.078089,48.525664,50.409288


In [3]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate mean values and standard deviations for each variable
mean_values = synthetic_df.mean()
std_values = synthetic_df.std()

# Create a DataFrame for plotting
plot_df = pd.DataFrame({
    'Variable': mean_values.index,
    'Mean': mean_values.values,
    'Standard Deviation': std_values.values
})

plt.figure(figsize=(10, 6))

# Create a bar plot with error bars
bar_plot = sns.barplot(x='Variable', y='Mean', data=plot_df, palette='viridis', ci=None)

# Add error bars
for index, value in enumerate(plot_df['Mean']):
    plt.errorbar(index, value, yerr=plot_df['Standard Deviation'][index], fmt='o', color='black', capsize=5)

# Add annotations for mean values
for index, value in enumerate(plot_df['Mean']):
    plt.text(index, value + 1, f'{value:.2f}', ha='center', va='bottom')

# Set titles and labels
plt.title('Mean Values of Synthetic Data Variables with Standard Deviation')
plt.xlabel('Variables')
plt.ylabel('Mean Value')
plt.xticks(rotation=45)  # Rotate x labels for better readability
plt.tight_layout()

plt.show()


NameError: name 'synthetic_df' is not defined