In [22]:
import random, csv
from evaluator import *
from skills_generator import skills_generator

In [23]:
def generate_random_answers():
    """Generate a random dict of answers for the test
    
    Returns:
        random_dict: a dictionary object of randomised answers
    """
    random_dict = {}
    sex = ['Male', 'Female']
    random_dict['Sex'] = sex[random.randint(0,1)]
    random_dict['Looking for'] = skills_generator()
    random_dict['Skills'] = skills_generator()
    random_dict['Age'] = random.randint(18,55)
    # TODO: Generate a random name or ID
    random_dict['Name'] = 'X'
    # TODO: Generate random country
    random_dict['Country'] = 'GBR'
    # We have 120 questions in our test.
    for q in range(1,121):
        random_dict[f"Q{q}"] = random.randint(1,5)
    print(random_dict)
    return random_dict

In [24]:
def generate_random_results(n):
    """Generate IPIP-NEO results from rand data
    
    Args:
        n(int): Number of rand results to generate
    
    Returns:
        results: yields results. Therefore we have a generator
            which is useful if you're creating a large dataset
    """
    for i in range(0,n):
        rand_data = generate_random_answers()
        results = evaluator(rand_data)
        yield results

In [25]:
# Generate 20,000 rows and dump it into a CSV = 1.8mb
# The results are more detail than what I dump
# I am only dumping the scores from each trait
with open('sample_data.csv', 'w') as f:
    results_generator = generate_random_results(1000)
    once = False
    for i in results_generator:
        if(once==False):
            w = csv.DictWriter(f, i['scores'].keys())
            w.writeheader()
            once = True
        w.writerow(i['scores'])

{'Sex': 'Male', 'Looking for': ['medicine', 'medical research', 'board certified', 'healthcare', 'healthcare consulting', 'revenue cycle', 'hipaa', 'health information management', 'icd10', 'icd9cm', 'cms1500', 'eandm coding', 'icd9', 'hcfa', 'ub04', 'cpt'], 'Skills': ['innovation', 'disruptive technologies', 'open innovation', 'randd', 'polymers', 'resin', 'polymer chemistry', 'coatings', 'pigments'], 'Age': 29, 'Name': 'X', 'Country': 'GBR', 'Q1': 5, 'Q2': 4, 'Q3': 2, 'Q4': 5, 'Q5': 3, 'Q6': 3, 'Q7': 2, 'Q8': 4, 'Q9': 5, 'Q10': 5, 'Q11': 3, 'Q12': 1, 'Q13': 1, 'Q14': 3, 'Q15': 2, 'Q16': 2, 'Q17': 5, 'Q18': 4, 'Q19': 2, 'Q20': 3, 'Q21': 1, 'Q22': 2, 'Q23': 5, 'Q24': 3, 'Q25': 2, 'Q26': 2, 'Q27': 5, 'Q28': 2, 'Q29': 2, 'Q30': 5, 'Q31': 1, 'Q32': 5, 'Q33': 1, 'Q34': 3, 'Q35': 4, 'Q36': 3, 'Q37': 4, 'Q38': 5, 'Q39': 1, 'Q40': 5, 'Q41': 2, 'Q42': 3, 'Q43': 5, 'Q44': 1, 'Q45': 5, 'Q46': 4, 'Q47': 3, 'Q48': 1, 'Q49': 4, 'Q50': 2, 'Q51': 3, 'Q52': 2, 'Q53': 2, 'Q54': 4, 'Q55': 2, 'Q56': 3, '

{'Sex': 'Female', 'Looking for': ['finance', 'strategic financial planning', 'financial advisory', 'financial advice', 'savings', 'wealth management', 'investments', 'investment advisory', 'retirement', 'profit sharing', 'retirement planning', 'roth ira', 'long term care insurance', 'disability insurance'], 'Skills': ['analytics', 'predictive modeling', 'sas programming', 'cdisc', 'edc', 'cro', 'clinical development', 'ichgcp', 'cro', 'clinical monitoring', 'edc'], 'Age': 52, 'Name': 'X', 'Country': 'GBR', 'Q1': 2, 'Q2': 2, 'Q3': 3, 'Q4': 5, 'Q5': 4, 'Q6': 5, 'Q7': 3, 'Q8': 1, 'Q9': 3, 'Q10': 5, 'Q11': 1, 'Q12': 4, 'Q13': 1, 'Q14': 3, 'Q15': 3, 'Q16': 5, 'Q17': 3, 'Q18': 2, 'Q19': 4, 'Q20': 3, 'Q21': 3, 'Q22': 5, 'Q23': 2, 'Q24': 5, 'Q25': 4, 'Q26': 1, 'Q27': 1, 'Q28': 3, 'Q29': 5, 'Q30': 2, 'Q31': 4, 'Q32': 5, 'Q33': 5, 'Q34': 1, 'Q35': 4, 'Q36': 2, 'Q37': 2, 'Q38': 5, 'Q39': 2, 'Q40': 3, 'Q41': 2, 'Q42': 4, 'Q43': 1, 'Q44': 5, 'Q45': 5, 'Q46': 5, 'Q47': 1, 'Q48': 4, 'Q49': 2, 'Q50': 

{'Sex': 'Male', 'Looking for': ['engineering', 'electrical engineering', 'substation', 'transformer', 'high voltage', 'switchgear', 'power systems', 'electric power', 'power distribution', 'transformer', 'electricians', 'generators', 'turbines', 'wind turbines', 'wind resource assessment', 'wind'], 'Skills': ['civil engineering', 'transportation engineering', 'synchro', 'traffic engineering', 'traffic impact studies', 'traffic simulation', 'transportation planning', 'road traffic', 'traffic signal design'], 'Age': 38, 'Name': 'X', 'Country': 'GBR', 'Q1': 5, 'Q2': 5, 'Q3': 1, 'Q4': 3, 'Q5': 3, 'Q6': 1, 'Q7': 1, 'Q8': 3, 'Q9': 2, 'Q10': 2, 'Q11': 2, 'Q12': 1, 'Q13': 2, 'Q14': 4, 'Q15': 3, 'Q16': 1, 'Q17': 1, 'Q18': 5, 'Q19': 1, 'Q20': 5, 'Q21': 3, 'Q22': 4, 'Q23': 4, 'Q24': 2, 'Q25': 5, 'Q26': 1, 'Q27': 5, 'Q28': 5, 'Q29': 5, 'Q30': 3, 'Q31': 5, 'Q32': 4, 'Q33': 2, 'Q34': 4, 'Q35': 3, 'Q36': 2, 'Q37': 1, 'Q38': 5, 'Q39': 5, 'Q40': 4, 'Q41': 1, 'Q42': 4, 'Q43': 2, 'Q44': 1, 'Q45': 3, 'Q46

{'Sex': 'Male', 'Looking for': ['aviation', 'airports', 'commercial aviation', 'charter', 'airlines', 'airport management', 'airline management', 'aviation security', 'civil aviation', 'flight safety', 'aircraft', 'flights', 'charter', 'type rating', 'flight training'], 'Skills': ['finance', 'corporate finance', 'financial modeling', 'financial analysis'], 'Age': 41, 'Name': 'X', 'Country': 'GBR', 'Q1': 2, 'Q2': 5, 'Q3': 1, 'Q4': 2, 'Q5': 3, 'Q6': 2, 'Q7': 5, 'Q8': 1, 'Q9': 1, 'Q10': 3, 'Q11': 4, 'Q12': 5, 'Q13': 1, 'Q14': 5, 'Q15': 2, 'Q16': 3, 'Q17': 1, 'Q18': 3, 'Q19': 4, 'Q20': 5, 'Q21': 1, 'Q22': 1, 'Q23': 1, 'Q24': 1, 'Q25': 1, 'Q26': 5, 'Q27': 4, 'Q28': 5, 'Q29': 5, 'Q30': 4, 'Q31': 5, 'Q32': 1, 'Q33': 3, 'Q34': 4, 'Q35': 4, 'Q36': 2, 'Q37': 5, 'Q38': 3, 'Q39': 5, 'Q40': 4, 'Q41': 2, 'Q42': 5, 'Q43': 4, 'Q44': 4, 'Q45': 5, 'Q46': 1, 'Q47': 2, 'Q48': 2, 'Q49': 2, 'Q50': 2, 'Q51': 3, 'Q52': 2, 'Q53': 3, 'Q54': 3, 'Q55': 5, 'Q56': 4, 'Q57': 1, 'Q58': 2, 'Q59': 1, 'Q60': 1, 'Q61': 2