# Code: Data Simulation
## Sasha Lawson

### Section 1: Setup

In [1]:
#Import statements needed.
import pandas as pd
import numpy as np
import random

In [2]:
#Create a function to simulate data across various columns. "rows" being the number of rows.
def simulate_data(rows):
    
    #Define the various fake companies.
    companies = ['Company A', 'Company B', 'Company C', 'Company D', 'Company E', 'Company F', 'Company G', 'Company H']
    
    #Define the various roles and their associated industries.
    roles_to_industries = {
        'Software Engineer': ['Tech', 'Telecommunications', 'Finance'],
        'Project Manager': ['Tech', 'Finance', 'Manufacturing', 'Healthcare'],
        'HR Specialist': ['Tech', 'Healthcare', 'Finance', 'Retail', 'Education'],
        'Accountant': ['Finance', 'Retail', 'Manufacturing', 'Education'],
        'Marketing Manager': ['Tech', 'Retail', 'Education', 'Telecommunications'],
        'Customer Support': ['Tech', 'Retail', 'Telecommunications'],
        'Operations Manager': ['Manufacturing', 'Energy', 'Retail'],
        'Product Designer': ['Tech', 'Manufacturing', 'Retail'],
        'Business Analyst': ['Tech', 'Finance', 'Healthcare', 'Retail'],
        'Data Scientist': ['Tech', 'Healthcare', 'Finance', 'Energy'],
        'Data Analyst': ['Tech', 'Healthcare', 'Finance', 'Education'],
        'Machine Learning Engineer': ['Tech', 'Healthcare', 'Finance'],
        'Sales Executive': ['Retail', 'Telecommunications', 'Finance'],
        'Legal Advisor': ['Tech', 'Finance', 'Healthcare'],
        'Doctor': ['Healthcare'],
        'Nurse': ['Healthcare'],
        'Teacher': ['Education'],
        'Construction Worker': ['Manufacturing', 'Energy'],
        'Electrician': ['Energy', 'Manufacturing'],
        'Mechanical Engineer': ['Manufacturing', 'Energy']
    }
    
    #Define the various locations that should be included.
    locations = ['New York', 'San Francisco', 'Austin', 'Remote', 'Seattle', 'Chicago', 'Boston', 'London']
    
    #Define the various location sizes that should be included.
    company_sizes = ['1-10', '11-50', '51-250', '251-500', '501+']
    
    
    #Create a function to create the simulated satisfcation scores.
    def satisfaction_scores():
        
        #Return the various categoies with a randomly assigned score.
        return {
            'Work_Life_Balance': random.randint(40, 100),
            'Career_Growth': random.randint(30, 90),
            'Compensation': random.randint(50, 100),
            'Leadership': random.randint(40, 90),
            'Colleagues': random.randint(60, 100)
        }
    
    
    #Create the dataset.
    data = []
    
    
    #Loop through the number of defined "rows".
    for index in range(1, rows + 1): 
    
    
        #Randomly selects a role from a given industry.
        role = random.choice(list(roles_to_industries.keys()))
        
        #Associates the industry with the role choosen.
        industry = random.choice(roles_to_industries[role])
    
        #Get the randomized satisfaction.
        satisfaction = satisfaction_scores()
        
        
        #Get the randomly assigned data for the current "row".
        row = {
            'Index': index,
            
            'Company': random.choice(companies),
            'Role': role,
            'Industry': industry,
            'Location': random.choice(locations),
            'Company_Size': random.choice(company_sizes),
            
            'Work_Life_Balance': satisfaction['Work_Life_Balance'],
            'Career_Growth': satisfaction['Career_Growth'],
            'Compensation': satisfaction['Compensation'],
            'Leadership': satisfaction['Leadership'],
            'Colleagues': satisfaction['Colleagues']
        }
        
        #Add the current "row".
        data.append(row)
    
    
    return pd.DataFrame(data)

In [3]:
#Create a function to pivot/melt the simulated data.
def simulate_data_pivot(df):
    
    #Set the columns to keep and melt.
    index = ['Index']
    category_vars = ['Work_Life_Balance', 'Career_Growth', 'Compensation', 'Leadership', 'Colleagues']
    
    #Melt the dataframe.
    melted_df = df.melt(
        id_vars = index,
        value_vars = category_vars,
        var_name = 'Category_Pivot',
        value_name = 'Score_Pivot'
    )
    
    #Rename column as needed.
    melted_df.rename(columns = {'Index': 'Index_Pivot'}, inplace = True)
    
    #Rename the data in the "Category_Pivot" column as needed.
    melted_df['Category_Pivot'] = melted_df['Category_Pivot'].replace({
    'Work_Life_Balance': 'Work Life Balance',
    'Career_Growth': 'Career Growth',
    'Compensation': 'Compensation',
    'Leadership': 'Leadership',
    'Colleagues': 'Colleagues'
    })
    
    
    return melted_df

In [4]:
#Create a function to define role descriptions.
def role_descriptions():
    
    #Define role descriptions in a dictionary.
    role_descriptions = {
        'Software Engineer': 'A Software Engineer designs, develops, tests, and maintains software applications or systems. They work with various programming languages, frameworks, and tools to ensure the software functions correctly, is scalable, and is user-friendly. They are also responsible for debugging and improving existing code, collaborating with other developers, and following industry best practices and coding standards.',
        'Project Manager': 'A Project Manager oversees the planning, execution, and completion of projects within a set timeframe and budget. They define project scope, manage resources, set milestones, and ensure effective communication among teams. They also identify potential risks and develop mitigation strategies, ensuring the project stays within the agreed-upon parameters. Project Managers often act as the primary point of contact between stakeholders and the project team.',
        'HR Specialist': 'An HR Specialist is responsible for managing the human resources functions of an organization, including recruitment, employee relations, performance management, training and development, and compliance with labor laws. They ensure that the organization has the right talent to meet its goals while fostering a positive workplace culture. HR specialists also handle administrative tasks such as payroll processing, benefits administration, and maintaining employee records.',
        'Accountant': 'An Accountant is responsible for managing and maintaining financial records, ensuring compliance with regulatory requirements, and preparing financial statements. They handle budgeting, forecasting, tax preparation, and financial reporting. Accountants work to ensure that a company’s financial activities are conducted in accordance with accounting standards and help provide accurate data to support decision-making. They may also assist in auditing processes and provide recommendations for financial improvements.',
        'Marketing Manager': 'A Marketing Manager develops and implements marketing strategies to promote products or services and drive business growth. They oversee market research, identify customer needs, and manage promotional campaigns across various channels, including digital marketing, advertising, public relations, and events. The Marketing Manager works closely with sales, product development, and creative teams to ensure alignment with business objectives and effectively reach target audiences.',
        'Customer Support': 'Customer Support professionals assist customers by providing answers to inquiries, resolving issues, and guiding them through product or service usage. They are responsible for ensuring a positive customer experience by addressing concerns, troubleshooting problems, and offering solutions. Customer Support agents often handle customer feedback, provide product knowledge, and escalate more complex issues to other departments as needed. They play a critical role in customer retention and satisfaction.',
        'Operations Manager': 'An Operations Manager ensures that the day-to-day operations of an organization run efficiently and effectively. They oversee operational processes, manage logistics, coordinate with other departments, and improve operational systems to optimize productivity and reduce costs. They also manage the supply chain, handle resource allocation, and ensure that quality standards are met. The Operations Manager works to streamline workflows, improve processes, and ensure the organization’s goals are achieved on time.',
        'Product Designer': 'A Product Designer focuses on creating user-centered designs for products, ensuring that both functionality and aesthetics are balanced. They conduct user research, create wireframes, prototypes, and visual designs, and work closely with engineering teams to ensure that the design is feasible and meets user needs. Product Designers also test prototypes, gather feedback, and iterate on designs to enhance user experiences. They strive to make products intuitive, visually appealing, and accessible.',
        'Business Analyst': 'A Business Analyst analyzes business processes, identifies opportunities for improvement, and provides data-driven insights to support decision-making. They work with stakeholders to gather requirements, analyze data, and develop solutions that help streamline operations, reduce costs, or enhance customer satisfaction. Business Analysts also help create strategies for business growth, define key performance indicators (KPIs), and monitor progress toward business objectives.',
        'Data Scientist': 'A Data Scientist analyzes large sets of complex data to uncover patterns, trends, and insights that can inform business decisions. They apply statistical models, machine learning algorithms, and advanced analytics techniques to interpret data and make predictions. Data Scientists work with cross-functional teams to define business challenges, clean and preprocess data, build models, and communicate insights. They also create automated systems for data analysis and ensure that the models are scalable and efficient.',
        'Data Analyst': 'A Data Analyst interprets data and provides actionable insights that help businesses make informed decisions. They collect, process, and analyze data from various sources, using statistical methods and data visualization techniques to identify trends, patterns, and correlations. Data Analysts often work with stakeholders to define data needs, generate reports, and present findings that support decision-making processes. They play a key role in improving business operations and strategies by providing data-driven recommendations.',
        'Machine Learning Engineer': 'A Machine Learning Engineer builds and deploys machine learning models that enable systems to learn from data and improve over time. They design algorithms, train models, and develop systems that can automate processes or make predictions. Machine Learning Engineers work with large datasets, clean and prepare data, and optimize models for efficiency. They collaborate with data scientists and software engineers to integrate machine learning models into applications or systems, and ensure that the models perform well in production environments.',
        'Sales Executive': 'A Sales Executive is responsible for driving sales and revenue growth by identifying business opportunities, building relationships with clients, and closing deals. They work to understand customer needs, present products or services, negotiate terms, and close sales. Sales Executives often work with marketing teams to generate leads, develop sales strategies, and meet sales targets. They maintain relationships with existing customers, follow up on leads, and continuously identify new sales opportunities.',
        'Legal Advisor': 'A Legal Advisor provides legal counsel to organizations or individuals, ensuring compliance with laws and regulations. They review contracts, negotiate terms, and provide advice on legal risks and business decisions. Legal Advisors help organizations navigate complex legal frameworks, handle disputes, and represent the company in legal matters. They ensure that the business operates within the boundaries of the law and may also assist with intellectual property rights, corporate governance, and regulatory compliance.',
        'Doctor': 'A Doctor diagnoses and treats illnesses and injuries, providing medical care to patients in various healthcare settings such as hospitals, clinics, and private practices. They perform physical examinations, order and interpret diagnostic tests, and develop treatment plans. Doctors also educate patients on preventive care, manage chronic conditions, and collaborate with other healthcare professionals to deliver comprehensive care. They play a crucial role in improving public health and saving lives.',
        'Nurse': 'A Nurse provides care and support to patients, assisting in their recovery and well-being. They administer medications, monitor patient health, and coordinate care with other healthcare professionals. Nurses also educate patients and families on health management and prevention, perform medical procedures, and maintain detailed medical records. They work in diverse settings, including hospitals, clinics, schools, and community centers, contributing to the overall health of the community.',
        'Teacher': 'A Teacher educates students in various subjects, preparing them for future academic or professional success. They develop lesson plans, deliver engaging instruction, and assess student performance. Teachers foster a positive and inclusive learning environment, adapting teaching methods to meet diverse student needs. Beyond academics, they mentor students, encourage critical thinking, and promote personal growth. Teachers play a vital role in shaping the next generation and building stronger communities.',
        'Construction Worker': 'A Construction Worker participates in building and infrastructure projects, performing tasks such as site preparation, material handling, and equipment operation. They follow blueprints and instructions from supervisors to ensure structures are built safely and efficiently. Construction Workers may specialize in areas such as carpentry, masonry, or concrete work, contributing their skills to residential, commercial, or industrial projects. Their work is essential for creating and maintaining the built environment.',
        'Electrician': 'An Electrician installs, maintains, and repairs electrical systems and equipment in residential, commercial, and industrial settings. They ensure wiring, lighting, and control systems are functioning safely and efficiently. Electricians interpret blueprints, follow building codes, and troubleshoot electrical issues. They play a critical role in maintaining power infrastructure and ensuring the safety and reliability of electrical systems.',
        'Mechanical Engineer': 'A Mechanical Engineer designs, develops, and tests mechanical systems and devices, working on projects that range from small components to large machinery. They use engineering principles and computer-aided design (CAD) software to create efficient and functional systems. Mechanical Engineers analyze materials, optimize designs for performance and cost, and oversee manufacturing processes. Their expertise is crucial in industries like manufacturing, energy, automotive, and aerospace.'
    }
    

    #Return the converted dictionary as a dataframe.
    return pd.DataFrame(list(role_descriptions.items()), columns = ['Role_Title', 'Role_Description'])

In [5]:
#Create a function to simluate benchmark data based on the passed in dataframe.
def simulate_benchmark(df):
    
    #Calculate mean score with a various of -50/50 for each role based on categories.
    benchmark = (
        df[['Role'] + ['Work_Life_Balance', 'Career_Growth', 'Compensation', 'Leadership', 'Colleagues']]
        .groupby(['Role'])
        .mean()
        .apply(lambda score: score + np.random.uniform(-50, 50, size = len(x)))
        .reset_index()
    )
    
    #Rename the benchmark columns as "[column]_Benchmark".
    benchmark.rename(
        columns = {col: f'{col}_Benchmark' for col in benchmark.columns},
        inplace = True
    )
    
    #Create index column.
    benchmark['Index_Benchmark'] = benchmark.index + 1
    
    #Move index to the front of the dataframe.
    benchmark.insert(benchmark.columns.get_loc('Role_Benchmark'), 'Index_Benchmark', benchmark.pop('Index_Benchmark'))
    
    
    return benchmark

In [6]:
#Create a function to pivot/melt the benchmark.
def simulate_benchmark_pivot(df):
    
    #Set the columns to keep and melt.
    index = ['Index_Benchmark']
    category_vars = ['Work_Life_Balance_Benchmark', 'Career_Growth_Benchmark', 'Compensation_Benchmark',
                     'Leadership_Benchmark', 'Colleagues_Benchmark']
    
    #Melt the dataframe.
    melted_df = df.melt(
        id_vars = index,
        value_vars = category_vars,
        var_name = 'Category_Benchmark_Pivot',
        value_name = 'Score_Benchmark_Pivot'
    )
    
    #Rename column as needed.
    melted_df.rename(columns = {'Index_Benchmark': 'Index_Benchmark_Pivot'}, inplace=True)
    
    #Rename the data in the "Category_Benchmark_Pivot" column as needed.
    melted_df['Category_Benchmark_Pivot'] = melted_df['Category_Benchmark_Pivot'].replace({
    'Work_Life_Balance_Benchmark': 'Work Life Balance',
    'Career_Growth_Benchmark': 'Career Growth',
    'Compensation_Benchmark': 'Compensation',
    'Leadership_Benchmark': 'Leadership',
    'Colleagues_Benchmark': 'Colleagues'
    })
    
    
    return melted_df

### Section 2: Implementation

In [7]:
#Implement methods to create the datasets.
role_data = role_descriptions()

simulated_data = simulate_data(rows = 5000)
simulated_data_pivot = simulate_data_pivot(simulated_data)

benchmark_data = simulate_benchmark(simulated_data)
benchmark_data_pivot = simulate_benchmark_pivot(benchmark_data)



#Export simluated data.
simulated_data.to_excel('Simulated_Data.xlsx', index = False, engine = 'openpyxl')
simulated_data_pivot.to_excel('Simulated_Data_Pivot.xlsx', index = False, engine = 'openpyxl')

#Export simluated data.
role_data.to_excel('Role_Data.xlsx', index = False, engine = 'openpyxl')

#Export benchmark data.
benchmark_data.to_excel('Benchmark_Data.xlsx', index = False, engine = 'openpyxl')
benchmark_data_pivot.to_excel('Benchmark_Data_Pivot.xlsx', index = False, engine = 'openpyxl')