In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
import json

# Read Data

In [46]:
north_india = pd.read_csv('./north-india.csv')
south_india = pd.read_csv('./south-india.csv')
south_india = south_india.drop(columns=['longitude', 'latitude'])
metadata = pd.read_csv('./metadata.csv')
with open('output.json') as json_file:
    codebook = json.load(json_file)

## Draw from Google Sheets

In [47]:
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']

# Reference: https://developers.google.com/sheets/api/quickstart/python
def read_google_sheets(SPREADSHEET_ID, RANGE_NAME, HEADER_RANGE):
    creds = None
    # autogenerated
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('sheets', 'v4', credentials=creds)

    sheet = service.spreadsheets()
    result = sheet.values().get(spreadsheetId=SPREADSHEET_ID,
                                range=RANGE_NAME).execute()
    
    header = sheet.values().get(spreadsheetId=SPREADSHEET_ID,
                               range=HEADER_RANGE).execute()
    
    header_values = header.get('values', [])
    values = result.get('values', [])
    
    return values, header_values

SPREADSHEET_ID = '1AjynK9mMQTw58B_B8b_ZIip3fyUm-aoV7Pp21HziBb0'
RANGE_NAME = 'canto_codings!A2:AT'
HEADER_RANGE = 'canto_codings!A1:AT1'

data, header = read_google_sheets(SPREADSHEET_ID, RANGE_NAME, HEADER_RANGE)

canto_codings = pd.DataFrame(data, columns = header[0])

## Utility Functions

In [48]:
def find_canto_features(canto_coding_id):
    for i,row in canto_codings.iterrows():
        if(str(canto_coding_id) == row['canto_coding_id']):
            return row

In [None]:
def get_canto_metadata(canto_coding_id):
    for i, row in metadata.iterrows():
        if(str(canto_coding_id) == row['C-id']):
            return row

In [50]:
def get_display_code(line, binary_code):
    line = codebook['line_'+str(line)]
    for encoding in line:
        if encoding['code'] == str(binary_code):
            return encoding['display_code']

# Prepare Output Data Structure

In [81]:
columns = ["canto_coding_id", "culture", "lat", "lng"]

for i in range(37):
    columns.append("cv_"+str(i+1))
    
north_india_full = pd.DataFrame(columns=columns)
south_india_full = pd.DataFrame(columns=columns)

# Connect Everything

In [78]:
def main(input_matrix, output_matrix):
    for i, row in input_matrix.iterrows():
        canto_coding_id = row['canto_coding_id']
        culture = row['place']
        meta = get_canto_metadata(int(canto_coding_id))
        lat = meta['Local_lat']
        lng = meta['Local_long']
        canto = find_canto_features(canto_coding_id)
        canto_data = []
        for i in range(37):
            canto_data.append(get_display_code(i+1, canto["cv_"+str(i+1)]))
        new_row = pd.DataFrame([[
            canto_coding_id,
            culture,
            lat,
            lng,
            canto_data[0],
            canto_data[1],
            canto_data[2],
            canto_data[3],
            canto_data[4],
            canto_data[5],
            canto_data[6],
            canto_data[7],
            canto_data[8],
            canto_data[9],
            canto_data[10],
            canto_data[11],
            canto_data[12],
            canto_data[13],
            canto_data[14],
            canto_data[15],
            canto_data[16],
            canto_data[17],
            canto_data[18],
            canto_data[19],
            canto_data[20],
            canto_data[21],
            canto_data[22],
            canto_data[23],
            canto_data[24],
            canto_data[25],
            canto_data[26],
            canto_data[27],
            canto_data[28],
            canto_data[29],
            canto_data[30],
            canto_data[31],
            canto_data[32],
            canto_data[33],
            canto_data[34],
            canto_data[35],
            canto_data[36],
        ]], columns = columns)
        output_matrix = output_matrix.append(new_row)
    return output_matrix

In [86]:
north_india_full = main(north_india, north_india_full)

In [82]:
south_india_full = main(south_india, south_india_full)

In [87]:
north_india_full.to_csv('north_india_full.csv', index=False)
south_india_full.to_csv('south_india_full.csv', index=False)

In [85]:
south_india_full

Unnamed: 0,canto_coding_id,culture,lat,lng,cv_1,cv_2,cv_3,cv_4,cv_5,cv_6,...,cv_28,cv_29,cv_30,cv_31,cv_32,cv_33,cv_34,cv_35,cv_36,cv_37
0,756,C Indian Folk,22.97,78.65,0.46,0.38,0.46,0.54,0.31,0.54,...,0.38,1.0,0.08,1.0,0.31,0.08,0.08,0.54,0.54,0.77
0,758,Andhra Pradesh,15.91,79.74,0.15,0.62,0.62,0.31,0.08,0.08,...,0.08,0.08,0.08,1.0,0.54,0.46,0.08,0.31,0.54,0.31
0,759,Madras,13.08,80.27,0.62,0.62,0.62,0.54,0.54,0.77,...,0.69,1.0,0.54,0.54,0.54,0.23,0.31,0.31,0.54,0.31
0,760,Madras,13.08,80.27,0.15,0.62,0.54,0.31,0.08,0.08,...,0.69,0.08,0.08,1.0,0.54,0.46,0.31,0.77,0.54,0.31
0,844,Gond,21.14,79.08,0.69,0.23,0.46,0.54,0.54,0.77,...,1.0,1.0,0.54,1.0,0.54,0.35,0.42,0.42,0.54,0.31
0,845,Toda,21.14,79.08,0.69,0.23,0.38,0.54,0.77,0.77,...,1.0,1.0,0.54,1.0,0.54,0.35,0.42,0.54,0.54,0.31
0,846,Gond,21.14,79.08,0.46,0.23,0.46,0.54,0.77,0.77,...,1.0,1.0,0.54,1.0,0.54,0.35,0.54,0.42,0.54,0.31
0,847,Gond,21.14,79.08,0.69,0.23,0.46,0.54,0.77,0.77,...,1.0,1.0,0.54,1.0,0.54,0.35,0.42,0.42,0.54,0.31
0,848,Gond,21.14,79.08,0.69,0.23,0.38,0.54,0.77,0.77,...,1.0,1.0,0.54,1.0,0.54,0.46,0.54,0.54,0.54,0.31
0,849,Gond,21.14,79.08,0.69,0.23,0.38,0.54,0.77,0.77,...,1.0,0.54,0.54,1.0,0.54,0.35,0.42,0.54,0.54,0.31
