In [1]:
# The script in this notebook extracts vectorised representations
# for traditions and motifs from the .csv file later to be used
# in the query-processing engine.

In [1]:
import pandas as pd
import json

In [2]:
berezkin = pd.read_csv("berezkin_new.csv", sep="\t", index_col="idn")

In [43]:
def grid_representation(data, mcode):
    """Creates a vectorised binary-grid representation
       for a motif"""
    # We use of grid of 5*5 degree cells represented
    # as a 1 * (180/5) * (360/5) dimensional integer vector
    vector = [0 for i in range(2592)]
    for index, val in enumerate(data[mcode]):
        if val == 0:
            continue
        lat = 180 - (data['latit'][index+1] + 90) # from 0 on the south pole to 180 on the north pole
        lon = data['longit'][index+1] + 180       # from 0 in the west to 359 in the east
        mult_lat = int(lat // 5)
        idx_lon  = int(lon // 5)
        vector[36 * mult_lat + idx_lon] = 1
    return vector

In [7]:
colnames = list(berezkin)

In [7]:
motif_vectors = {}
for motif in colnames[11:]:
    motif_vectors[motif] = grid_representation(berezkin, motif)

In [30]:
with open('motif_vectors.json', 'w', encoding = 'utf-8') as out:
    json.dump(motif_vectors, out, ensure_ascii=False)

In [63]:
# Extract distribution across traditions for mapping
motif_distributions = {}
for motif in colnames[11:]:
    motif_distributions[motif] = [int(item) for item in berezkin[motif]]
with open('motif_distributions.json', 'w', encoding = 'utf-8') as out:
    json.dump(motif_distributions, out, ensure_ascii=False)

In [6]:
# Extract names with lats and lons
latlons = []
for idx, val in enumerate(berezkin['groups']):
    latlons.append({
            "Name": val,
            "Latitude": float(berezkin['latit'][idx+1]),
            "Longitude": float(berezkin['longit'][idx+1])
                   })
with open('coords.json', 'w', encoding = 'utf-8') as out:
    json.dump(latlons, out)

In [8]:
# Extract motif list
motifs = [item for item in colnames[11:]]
with open('motif_list.json', 'w', encoding = 'utf-8') as out:
    json.dump(motifs, out)

In [68]:
# Extract binary representations for traditions
traditions = {}
for i in range(berezkin.shape[0]):
    traditions[berezkin.iloc[i,0]] = [int(item) for item in berezkin.iloc[i,11:]]

In [70]:
with open('traditions.json', 'w', encoding = 'utf-8') as out:
    json.dump(traditions, out, ensure_ascii=False)