# Concept Map Analysis
Analyse a set of dynamic graphs, computing for the change in [betweenness](https://en.wikipedia.org/wiki/Betweenness_centrality) centrality pre- / post-intervention.

The intervention is an interdisciplinary lesson, and students are asked to create a concept map at the beginning and at the end of the lesson. Concept maps are created from a set of given concepts. Mapping is performed in [Concept Map Creator](https://www.ddi.uni-konstanz.de/forschung/forschungsprojekte/concept-map-creator/).

The resulting concept maps can be downloaded from _Concept Map Creator_ as zip-file per class. Each zip-file contains a hierarchical structure, with a folder per student, and all the student's graphs contained therein.

The following libraries need to be installed, preferrably via conda:
  * graph_tool
  * tqdm
  * pandas
  * jupyter
  * ipython

```conda install jupyter ipython tqdm graph_tool pandas```


## Import data
  * read zip file
  * find students with at least two graphs
  * pick the oldest and most recent graph
  * analyse each graph for per-concept node centrality
  * return a dictionary of {concept : change}

## Massage Graphs
The produced graphs cannot be read by networkx nor graph_tool as they do not conform to the graphml spec. 

Let's massage them a little:
   * fix graphml namespace definitions: 
     * replace `xmlns="http://graphml.graphdrawing.org/xmlns/graphml"` by `xmlns="http://graphml.graphdrawing.org/xmlns"`.
     * replace `xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns/graphml` by `xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns`
   * fix graphml attribute definitions:
     * add `attr.type="string"` to the keys for with id `d2`, `d7`, and `d8`.
     * remove the illegal `d13` key with `<key for="graphml"`
   * change graph from directed to undirected

In [186]:
def fix_graphml(xmlcontents):
    """Fix up graphml such that graphtool can import it."""
    xmlcontents = xmlcontents.replace("http://graphml.graphdrawing.org/xmlns/graphml", "http://graphml.graphdrawing.org/xmlns")
    xmlcontents = xmlcontents.replace('<key for="graphml" id="d13" yfiles.type="resources"/>\n', '')
    xmlcontents = xmlcontents.replace('edgedefault="directed"', 'edgedefault="undirected"')
    xmlcontents = xmlcontents.replace('<key for="node" id="d2"', '<key for="node" id="d2" attr.type="string"')
    xmlcontents = xmlcontents.replace('<key for="edge" id="d7"', '<key for="edge" id="d7" attr.type="string"')
    xmlcontents = xmlcontents.replace('<key for="graph" id="d8"', '<key for="graph" id="d8" attr.type="string"')
    return xmlcontents

def read_student_graph(zip_file, folder_info):
    """Read the graphs of a single student folder, compute betweenness, return a dict
       {timestamp : { concept : betweenness } }."""
    from io import TextIOWrapper
    import pathlib
    import graph_tool as gt
    import os
    result = {}
    for fileinfo in zip_file.infolist():
        if not fileinfo.is_dir() and fileinfo.filename.startswith(folder_info.filename):
            print(f'Extracting {fileinfo.filename}')
            path = pathlib.Path('tmp/'+fileinfo.filename)
            with zip_file.open(fileinfo) as graph_file:
                contents = TextIOWrapper(graph_file, "UTF-8").read()
                fixed = fix_graphml(contents)

                os.makedirs(path.parent, exist_ok=True)
                with TextIOWrapper(open(path, "wb"), "UTF-8") as out:
                    out.write(fixed)
            graph = gt.load_graph(str(path.absolute()))
            prop = graph.new_vertex_property("double")
            vc, ec = gt.centrality.betweenness(graph, prop)
            #gt.draw.graph_draw(graph)
            # only use vertex centrality
            centrality_dict = dict(zip(graph.vertex_properties['id'], vc))
            # path is of the form <timestamp>.graphml
            result[path.stem] = centrality_dict
    if (len(result) > 1):
        return result
    print(f'Ignoring {folder_info.filename} as we need at least two graphs')

def read_recording(filename):
    """Read the given filename and produce a dictionary {student : betweenness_info}."""
    import zipfile
    from tqdm.auto import tqdm
    result = {}
    with zipfile.ZipFile(filename) as recording:
        for fileinfo in recording.infolist():
            if fileinfo.is_dir():
                # record only students with valid data (>= 2 graphs)
                data = read_student_graph(recording, fileinfo)
                if data:
                    result[fileinfo.filename[:-1]] = data

    return result

In [None]:
rec2Ma = read_recording('data/Robotics_Acceleration 2Ma.zip')
rec2Mf = read_recording('data/Robotics_Acceleration 2Mf.zip')

## Processing
For each student:
  * compute delta pre/post
  * assemble in data table: 
    * rows: vertex names
    * columns: students
    * values: betweenness delta
    

In [216]:
import pandas as pd
import itertools
# Produce a single dataframe for all students, recording per student centrality delta.
delta = pd.DataFrame()
for idx, (student, data) in enumerate(itertools.chain(rec2Ma.items(), rec2Mf.items())):
    # Produce a DataFrame for each student:
    df = pd.DataFrame.from_dict(data)
    # Select the oldest column as pre-intervention and the most recent as post-intervention.
    # Column headers are timestamps, hence min/max will do. Rename accordingly.
    cols = list(df)
    df.rename(columns={min(cols): 'pre', max(cols): 'post'}, inplace=True)
    df['delta'] = df['post'] - df['pre']
    # Anonymize as "student X" instead of username to not leak student emails in public.
    delta["student " + str(idx)] = df['delta']

delta['mean betweenness delta'] = delta.mean(axis=1)
delta.sort_values(by='mean betweenness delta', ascending=False, inplace=True)
delta


Unnamed: 0,student2 0,student2 1,student2 2,student2 3,student2 4,student2 5,student2 6,student2 7,student2 8,student2 9,student2 10,student2 11,mean betweenness delta
Sensor,0.333333,-0.041667,0.583333,0.125,0.0,0.0,0.166667,0.166667,-0.166667,0.0,0.0,0.541667,0.142361
Beschleunigung,-0.25,0.083333,0.166667,0.166667,0.833333,0.0,0.0,0.166667,0.0,0.083333,0.0,0.208333,0.121528
Gravitation,0.0,0.25,0.083333,-0.083333,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.166667,0.055556
Ausrichtung,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.048611
Erdanziehung,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.041667
Motor,0.083333,0.166667,0.166667,0.041667,0.333333,-0.333333,0.0,0.0,-0.083333,0.0,0.0,0.083333,0.038194
Aktor,0.0,0.083333,0.0,0.0,0.041667,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.024306
Vektor,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.083333,0.0,0.0,0.0,0.0,0.020833
Roboter,0.25,0.0,0.083333,0.0,0.0,0.0,0.083333,0.083333,-0.166667,0.0,0.0,-0.083333,0.020833
Schwerelosigkeit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.006944


### Other Stats

Compute min/max betweenness concept pre and post intervention.

In [218]:
import pandas as pd
import itertools
pre_df = pd.DataFrame()
post_df = pd.DataFrame()
# Produce a single dataframe for all students, recording per student centrality delta.
delta_df = pd.DataFrame()
for student, data in itertools.chain(rec2Ma.items(), rec2Mf.items()):
    df = pd.DataFrame.from_dict(data)
    cols = list(df)
    df.rename(columns={min(cols): 'pre', max(cols): 'post'}, inplace=True)
    pre_df[student] = df['pre']
    post_df[student] = df['post']
    delta_df[student] = df['post'] - df['pre']
    #print(df.iloc[0])

main = pd.DataFrame()
main['pre_mean'] = pre_df.mean(axis=1)
main['pre_max'] = pre_df.max(axis=1)
main['post_mean'] = post_df.mean(axis=1)
main['post_max'] = post_df.max(axis=1)
main['delta_mean'] = delta_df.mean(axis=1)
main.sort_values(by='delta_mean', inplace=True, ascending=False)
main


Unnamed: 0,pre_mean,pre_max,post_mean,post_max,delta_mean
Sensor,0.024306,0.208333,0.166667,0.583333,0.142361
Beschleunigung,0.097222,0.5,0.21875,0.833333,0.121528
Gravitation,0.048611,0.25,0.104167,0.25,0.055556
Ausrichtung,0.0,0.0,0.048611,0.5,0.048611
Erdanziehung,0.0,0.0,0.041667,0.333333,0.041667
Motor,0.038194,0.333333,0.076389,0.333333,0.038194
Aktor,0.0,0.0,0.024306,0.166667,0.024306
Vektor,0.0,0.0,0.020833,0.166667,0.020833
Roboter,0.034722,0.25,0.055556,0.25,0.020833
Schwerelosigkeit,0.0,0.0,0.006944,0.083333,0.006944
