In [26]:
import pandas as pd
import os

In [27]:
def make_code_columns(raw_coded):
    meta_cols = []
    coder_cols = []
    renamer = {}
    for col in raw_coded.columns:
        shortened = col
        if col[0:3] == "[M]":
            shortened = col[4:-2]
            meta_cols.append(shortened)
        if col[0:3] == "[C]":
            shortened = col[3:]
            coder_cols.append(shortened)
        renamer[col] = shortened


    # first two meta columns are elective code and institution, which we use as the index
    # third meta column is title
    index_cols = meta_cols[0:2]
    keep_cols = index_cols + coder_cols
    base = raw_coded.rename(columns=renamer)[keep_cols]

    long1 = base.melt(index_cols)

    long2 = long1.join(long1['value'].str.split('; ', expand=True))

    long3 = long2.drop('value', axis=1)
    long3 = long3.rename(columns={'variable': 'coder'})

    long4 = long3.melt(index_cols + ['coder'])
    long4 = long4.rename(columns={'value': 'code'})[index_cols + ['coder','code']]
    long4.fillna("",inplace = True)
    long4 = long4[long4.code.str.len()>0]

    long5 = long4.groupby(index_cols + ['code']).count().unstack().fillna(0)
    long5.columns = long5.columns.get_level_values(1)
    return long5



In [35]:
def has_agreement_inset_code(code_in_columns, value_set, code):
    return code_in_columns[code].isin(value_set)

def has_agreement_inset_any_code(code_in_columns, value_set):
    has_agreement = False
    for col in code_in_columns.columns:
        has_agreement = has_agreement | has_agreement_inset_code(code_in_columns, value_set, col)
    return has_agreement

def has_agreement_4_plus_code(code_in_columns_code):
    return has_agreement_inset_code(code_in_columns, [4,5,6,7], code)

def has_agreement_23_code(code_in_columns, code):
    return has_agreement_inset_code(code_in_columns, [2,3], code)

def has_agreement_4_plus_any_code(code_in_columns):
    return has_agreement_inset_any_code(code_in_columns, [4,5,6,7])

def has_agreement_23_any_code(code_in_columns):
    return has_agreement_inset_any_code(code_in_columns, [2,3])

def has_overall_agreement_code(code_in_columns, code):
    return (has_agreement_4_plus_code(code_in_columns, code)) & ~(has_agreement_23_code(code_in_columns, code))

def has_overall_agreement_any_code(code_in_columns):
    return (has_agreement_4_plus_any_code(code_in_columns)) & ~(has_agreement_23_any_code(code_in_columns))


In [44]:
#dump_file = "20230612WG6_Trial_Sample50-export-20230616-013559.csv"
#dump_file = "20230622WG6_AllElectivesPart1-export-20230702-050135.csv"
#dump_file = "20230622WG6_AllElectivesPart2-export-20230702-050210.csv"
#dump_file = "20230622WG6_AllElectivesPart1-export-20230705-123616.csv"
first_pass_files = ["20230622WG6_AllElectivesPart1-export-20230705-123616.csv", "20230622WG6_AllElectivesPart2-export-20230705-123639.csv"]

first_pass_contents = []
for first_pass_file in first_pass_files:
    raw = pd.read_csv(os.path.join("coded", first_pass_file), dtype='string').fillna("")
    first_pass_contents.append(raw)
raw_coded = pd.concat(first_pass_contents)
raw_coded.to_csv(os.path.join("coded","first_pass_codes_raw.csv"))

code_in_columns = make_code_columns(raw_coded)
code_in_columns.to_csv(os.path.join("coded","first_pass_codes_columns.csv"))
code_in_columns[~has_overall_agreement_any_code(code_in_columns)]

Unnamed: 0_level_0,code,01,02,03,04,07,08,09,10,13,14,...,MSF,NC,OS,PDC,SDF,SE,SEC,SEP,SF,SPD
elective,institution,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1DL034,Uppsala_Sweden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1DL231,Uppsala_Sweden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1DL251,Uppsala_Sweden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0
1DL301,Uppsala_Sweden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1DL311,Uppsala_Sweden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
inf-EAeS,CAU_Germany,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
inf-EERP,CAU_Germany,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
inf-EnlOpt,CAU_Germany,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
inf-IoT,CAU_Germany,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0


In [85]:
def electives_with_codes(codes):
    code_in_columns = pd.read_csv(os.path.join("coded","first_pass_codes_columns.csv"))
    has_a_code = False
    for code in codes:
        has_a_code = has_a_code | (code_in_columns[code] > 0)
    index_of_electives_with_code = code_in_columns[has_a_code][['institution','elective']]
    def row_included(row):
        this_inst = row.institution
        this_elective = row.elective
        return index_of_electives_with_code[(index_of_electives_with_code.institution == this_inst) &
                                           (index_of_electives_with_code.elective == this_elective)]['elective'].count() >0
    all_electives = pd.read_csv("all_electives.csv")
    return all_electives[all_electives.apply(row_included, axis=1)]
#    return all_electives.apply(row_included, axis=1)

In [87]:
electives_with_codes(["AR"])

Unnamed: 0,institution,elective,overview,title,summary,content,ilo
14,Durham_England,COMP3617,<h2>Durham_England COMP3617</h2><h2>title</h2>...,COMP3617: \nVIRTUAL AND AUGMENTED REALITY,\n\t\t\t<li>This course will introduce the pri...,\n\t\t\t<li>Introduction: history of VR/AR dev...,"\n\t\t\t\t<li>On completion of the module, stu..."
16,Durham_England,COMP3637,<h2>Durham_England COMP3637</h2><h2>title</h2>...,COMP3637: \nCompiler Design,\n\t\t\t<li>To understand how a compiler for a...,\n\t\t\t<li>Compiler frontend: Foundations of ...,\n\t\t\t\t<li>An understanding of the theory a...
24,Durham_England,COMP4117,<h2>Durham_England COMP4117</h2><h2>title</h2>...,COMP4117: \nQUANTUM COMPUTING,\n\t\t\t<li>To introduce students to Quantum I...,\n\t\t\t<li>Qubits and Quantum Key Distributio...,"\n\t\t\t\t<li>On completion of the module, stu..."
85,Glasgow_Scotland,COMPSCI4007,<h2>Glasgow_Scotland COMPSCI4007</h2><h2>title...,Computer Architecture (H) COMPSCI4007,"<p id=""_COMPSCI40070""><span id=""_COMPSCI40071""...","<p id=""_COMPSCI400725""><span id=""_COMPSCI40072...","<p id=""_COMPSCI400730""><span id=""_COMPSCI40073..."
88,Glasgow_Scotland,COMPSCI4082,<h2>Glasgow_Scotland COMPSCI4082</h2><h2>title...,Distributed and Parallel Technologies (H) COMP...,"<p id=""_COMPSCI40820""><span id=""_COMPSCI40821""...","<p id=""_COMPSCI408231""><span id=""_COMPSCI40823...","<p id=""_COMPSCI408256""><span id=""_COMPSCI40825..."
109,Uppsala_Sweden,1DL321,<h2>Uppsala_Sweden 1DL321</h2><h2>title</h2>Co...,Compiler Design I,,,<li>structure a compiler as a sequence of dis...
115,Uppsala_Sweden,1TD062,<h2>Uppsala_Sweden 1TD062</h2><h2>title</h2>Hi...,High Performance Programming,,,<li>implement computational algorithms to eff...
130,CAU_Germany,Inf-DigSig,<h2>CAU_Germany Inf-DigSig</h2><h2>title</h2>M...,Multimedia Signal Processing and Data Compress...,There will be basic features of audiovisual in...,\n\n1) Basics of human perception\n\nPsycho-op...,\n\nThe processing of real-world signals in th...
149,Kennesaw_USA,CS4522,<h2>Kennesaw_USA CS4522</h2><h2>title</h2>HPC ...,HPC & Parallel Programming,This course will introduce parallel programmin...,,1. Discuss the different processor and memory ...
150,Kennesaw_USA,CS4524,<h2>Kennesaw_USA CS4524</h2><h2>title</h2>Clou...,Cloud Computing,This course discusses the fundamental concepts...,,1. Explain fundamental concepts and architectu...


In [76]:
all_electives = pd.read_csv("all_electives.csv")
all_electives

Unnamed: 0,institution,elective,overview,title,summary,content,ilo
0,Durham_England,COMP3421,<h2>Durham_England COMP3421</h2><h2>title</h2>...,COMP3421: \nCOMPUTER SCIENCE INTO SCHOOLS,\n\t\t\t<li>To develop a range of key skills i...,\n\t\t\t<li>A competitive interview system wil...,\n\t\t\t\t<li>On successful completion of this...
1,Durham_England,COMP3477,<h2>Durham_England COMP3477</h2><h2>title</h2>...,COMP3477: \nALGORITHMIC GAME THEORY,\n\t\t\t<li> The aim of the module is to intro...,\n\t\t\t<li>Introduction to Game Theory: what ...,"\n\t\t\t\t<li>On completion of the module, stu..."
2,Durham_England,COMP3487,<h2>Durham_England COMP3487</h2><h2>title</h2>...,COMP3487: \nBIOINFORMATICS,\n\t\t\t<li>To introduce students to applicati...,\n\t\t\t<li>Dynamic programming algorithms for...,"\n\t\t\t\t<li>On completion of the module, stu..."
3,Durham_England,COMP3467,<h2>Durham_England COMP3467</h2><h2>title</h2>...,COMP3467: \nADVANCED COMPUTER SYSTEMS,\n\t\t\t<li> To provide understanding of the r...,\n\t\t\t<li> Computer Architecture including c...,"\n\t\t\t\t<li>On completion of the module, stu..."
4,Durham_England,COMP3491,<h2>Durham_England COMP3491</h2><h2>title</h2>...,COMP3491: \nCODES AND CRYPTOGRAPHY,\n\t\t\t<li>To give an introduction to the fun...,\n\t\t\t<li>Data compression: lossless compres...,"\n\t\t\t\t<li>On completion of the module, stu..."
...,...,...,...,...,...,...,...
270,VirginiaTech_USA,CS4704,<h2>VirginiaTech_USA CS4704</h2><h2>title</h2>...,Software Engineering Capstone,,Senior project course integrating software eng...,
271,VirginiaTech_USA,CS4784,<h2>VirginiaTech_USA CS4784</h2><h2>title</h2>...,Human-Computer Interaction Capstone,,"Advanced, project-based course in Human-Comput...",
272,VirginiaTech_USA,CS4804,<h2>VirginiaTech_USA CS4804</h2><h2>title</h2>...,Introduction to Artificial Intelligence,,"Overview of the areas of problem solving, game...",
273,VirginiaTech_USA,CS4824,<h2>VirginiaTech_USA CS4824</h2><h2>title</h2>...,Machine Learning,,Algorithms and principles involved in machine ...,
