# Candidate Generator
Creates a list of candidates. The file is saved to `data/candidates.csv`.

In [1]:
import re
import random

UCID_PREFIX = 'DT'
PADDING_LEN = 4

CEFR_LIST = ['A1.0', 'A1.1', 'A1.2', 'A2.0', 'A2.1', 'A2.2', 'B1.0', 'B1.1', 'B1.2', 'B2.0', 'B2.1', 'B2.2', 'C1.0']

def getRandomCEFR() -> str:
    return random.choice(CEFR_LIST)
    

def incrementCandidate(ucid: str = None) -> str:
    '''
    Returns the next candidate ID.

    Parameters:
        ucid (str): a unique candidate ID of the form `DT0001`

    Returns:
        nextUcid (str): a unique candidate ID of the form `DT0001`
    '''
    if ucid is None:
        return UCID_PREFIX + '0001'
    
    m = re.search(r'\d+', ucid)
    if m is None:
        raise Exception(
            "Unique candidate identifer (ucid) must contain a number")

    id = int(m.group(0))
    if (id >= 0):
        id = id + 1
    else:
        raise Exception("""Unique candidate identifier (ucid) must be a number greater than
                        or equal to 0""")

    nextUcid = UCID_PREFIX + str(id).zfill(PADDING_LEN)
    return nextUcid


## Create the Randomised Set
First we create the randomised set of candidates (as a list of tuples). We decided to split the list into thirds, with $\theta$ values of -1.5, 0.0, 1.5. Estimated CEFRs are chosen at random.

In [2]:
# generate a list for tuples containing the candidate info:
# (ucid, theta, estimated_cefr)
# ('DT0001', -1.5, 'A1.1')
#

MAX_CANDIDATES = 400
START_ID = 'DT0000'

candidates = []
ucid = START_ID
for c in range(1, MAX_CANDIDATES+1):
    ucid = incrementCandidate(ucid)
    if c < (MAX_CANDIDATES * 0.3):
        theta = -1.5
    elif c > (MAX_CANDIDATES * 0.6):
        theta = 1.5
    else:
        theta = 0.0
    estimated_cefr = getRandomCEFR()
    rand_cand = (ucid, theta, estimated_cefr)
    candidates.append(rand_cand)

## Write to File
Finally we write out to the CSV data file.

In [3]:
import csv

with open('data/candidates.csv', 'w', newline='') as csvfile:
    candidate_writer = csv.writer(csvfile)
    candidate_writer.writerow(('UCID', 'Theta', 'SeflRatedAbility'))
    for c in candidates:
        candidate_writer.writerow(c)
    

In [4]:
!head data/candidates2.csv

UCID,Theta,SeflRatedAbility
DT0001,-1.5,C1.0
DT0002,-1.5,C1.0
DT0003,-1.5,A2.1
DT0004,-1.5,B2.2
DT0005,-1.5,B1.1
DT0006,-1.5,B1.0
DT0007,-1.5,B1.1
DT0008,-1.5,C1.0
DT0009,-1.5,A1.0
