In [6]:
import pandas as pd
import os

In [7]:
dump_files = ["20230622WG6_AllElectivesPart1-export-20230705-123616.csv", "20230622WG6_AllElectivesPart2-export-20230705-123639.csv"]
dump_contents = []
for dump_file in dump_files:
    raw = pd.read_csv(os.path.join("coded",dump_file), dtype='string').fillna("")
    dump_contents.append(raw)
raw_coded = pd.concat(dump_contents)
annotated = raw_coded[raw_coded['Annotations'].str.len() >0]
annotated.columns

Index(['Title', 'Text', '[M] elective: ', '[M] institution: ', '[M] title: ',
       '[C] Barker, Lecia', '[C] Bradley, Steven', '[C] Hooshangi, Sara',
       '[C] Kunkeler, Thom', '[C] Lennon, Ruth', '[C] Parkinson, Jack',
       '[C] Sibia, Naaz (Naaz Sibia)', 'Annotations',
       '[C] Altin, Rukiye (rukiye)', '[C] McNeill, Fiona',
       '[C] Minguillón, Julià (Julià Minguillón)',
       '[C] Peltsverger, Svetlana'],
      dtype='object')

In [8]:
meta_cols = []
renamer = {}
for col in annotated.columns:
    shortened = col
    if col[0:3] == "[M]":
        print ("[M] " )
        shortened = col[4:-2]
        meta_cols.append(shortened)
    renamer[col] = shortened

renamer

[M] 
[M] 
[M] 


{'Title': 'Title',
 'Text': 'Text',
 '[M] elective: ': 'elective',
 '[M] institution: ': 'institution',
 '[M] title: ': 'title',
 '[C] Barker, Lecia': '[C] Barker, Lecia',
 '[C] Bradley, Steven': '[C] Bradley, Steven',
 '[C] Hooshangi, Sara': '[C] Hooshangi, Sara',
 '[C] Kunkeler, Thom': '[C] Kunkeler, Thom',
 '[C] Lennon, Ruth': '[C] Lennon, Ruth',
 '[C] Parkinson, Jack': '[C] Parkinson, Jack',
 '[C] Sibia, Naaz (Naaz Sibia)': '[C] Sibia, Naaz (Naaz Sibia)',
 'Annotations': 'Annotations',
 '[C] Altin, Rukiye (rukiye)': '[C] Altin, Rukiye (rukiye)',
 '[C] McNeill, Fiona': '[C] McNeill, Fiona',
 '[C] Minguillón, Julià (Julià Minguillón)': '[C] Minguillón, Julià (Julià Minguillón)',
 '[C] Peltsverger, Svetlana': '[C] Peltsverger, Svetlana'}

In [9]:
# first two meta columns are elective code and institution, which we use as the index
# third meta column is title
index_cols = meta_cols[0:2]
keep_cols = index_cols + ["Annotations"]
export = annotated.rename(columns=renamer)[keep_cols]

# Ignore the Uppsala electives with no content
export = export[~(export.institution=="Uppsala_Sweden")]
export = export[~(export.elective.isin(["cxinfr11245","cxinfr11246","cxinfr11248","cxinfr11083"]))]
export.to_csv("annotated_electives.csv")
export

Unnamed: 0,elective,institution,Annotations
1,COMP3491,Durham_England,Annotation: I think this is security because o...
2,COMP3507,Durham_England,Annotation: Low confidence on this. Complexity...
10,COMP3677,Durham_England,"Annotation: This is related to AI, but also to..."
12,COMP4117,Durham_England,Annotation: quantum computing; User: Sara Hoos...
13,COMP4127,Durham_England,Annotation: very low confidence; User: Lecia B...
16,COMP4187,Durham_England,Annotation: could be one of many sciences; Use...
18,cxinfr08033,Edinburgh_UK,Annotation: ; User: Jack Parkinson; Timestamp:...
19,cxinfr10052,Edinburgh_UK,Annotation: According to the ACM 2023 curricul...
23,cxinfr10084,Edinburgh_UK,Annotation: As with previous experiential lear...
27,cxinfr10083,Edinburgh_UK,Annotation: data science is not always about A...
