In [8]:
import csv
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from collections import *
import math
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

# Get Data from Google Sheets

In [2]:
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']

# Reference: https://developers.google.com/sheets/api/quickstart/python
def read_google_sheets(SPREADSHEET_ID, RANGE_NAME, HEADER_RANGE):
    creds = None



    # autogenerated
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    service = build('sheets', 'v4', credentials=creds)

    sheet = service.spreadsheets()
    result = sheet.values().get(spreadsheetId=SPREADSHEET_ID,
                                range=RANGE_NAME).execute()

    header = sheet.values().get(spreadsheetId=SPREADSHEET_ID,
                               range=HEADER_RANGE).execute()

    header_values = header.get('values', [])
    values = result.get('values', [])

    return values, header_values

In [3]:
SPREADSHEET_ID = '1AjynK9mMQTw58B_B8b_ZIip3fyUm-aoV7Pp21HziBb0'
RANGE_NAME = 'canto_codings!A2:AT'
HEADER_RANGE = 'canto_codings!A1:AT1'

data, header = read_google_sheets(SPREADSHEET_ID, RANGE_NAME, HEADER_RANGE)

canto_data = pd.DataFrame(data, columns = header[0])

In [4]:
features = ['cv_'+str(i+1)for i in range(37)]
cultures = canto_data['Culture'].unique()
columns = ['culture', 'soc_id']
for feature in features:
    columns.append(feature)

In [5]:
final_df = pd.DataFrame(columns = columns)
for culture in cultures:
    modal_profile = dict(canto_data[canto_data['Culture']==culture][features].mode().loc[0])
    modal_profile['culture'] = culture
    soc_id = canto_data[canto_data['Culture']==culture]['C_cid'].reset_index()['C_cid'][0]
    modal_profile['soc_id'] = soc_id
    final_df = final_df.append(modal_profile, ignore_index=True)

In [6]:
final_df.to_csv('././output/modal_profiles.csv')

In [9]:
with open('./output/output.json') as json_file:
    data = json.load(json_file)

In [39]:
cv = []
for col in final_df.columns:
    if 'cv_' in col:
        cv.append(col)

for i, row in final_df[:1].iterrows():
    for col in cv:
        print(col, row[col])

cv_1 8208
cv_2 2
cv_3 2
cv_4 8192
cv_5 1024
cv_6 1024
cv_7 2
cv_8 2
cv_9 2
cv_10 128
cv_11 64
cv_12 8192
cv_13 2
cv_14 2
cv_15 512
cv_16 32
cv_17 128
cv_18 512
cv_19 2
cv_20 1024
cv_21 8192
cv_22 8192
cv_23 8192
cv_24 512
cv_25 16
cv_26 512
cv_27 8192
cv_28 32
cv_29 128
cv_30 128
cv_31 8192
cv_32 1040
cv_33 256
cv_34 128
cv_35 8192
cv_36 8192
cv_37 1024


In [26]:
columns = list(final_df)

for i in columns:
    if 'cv_' in i:
        line = i.replace('cv_','line_')
        for i in range(len(data[line])):
            print(data[line][i]['code'])

4
8
20
34
36
16
68
48
132
32
80
196
144
96
208
64
160
272
1028
192
288
128
1040
320
8196
576
256
8336
8464
768
1152
2112
8208
512
1280
1792
8224
2304
1536
8256
4864
1024
4352
2560
8320
8448
4608
3072
9728
2048
5120
12544
8704
12800
9216
6144
13312
4096
14336
10240
12288
8192
0
2
6
4
10
8
68
72
32
96
64
4100
256
512
8256
4352
8448
8704
4096
8192
0
2
6
4
36
66
16
68
48
132
32
80
144
64
160
192
128
256
512
2304
4352
1024
3072
8448
2048
5120
9216
4096
10240
12288
8192
0
2
18
16
130
144
128
8208
1152
1024
8320
9216
8192
0
2
18
16
130
144
128
1024
9216
8192
0
2
18
16
130
144
128
8194
1152
8208
1024
8320
9216
8192
0
2
18
16
130
144
128
1024
9216
8192
0
2
18
16
130
128
1024
8192
0
2
16
130
128
1024
8192
0
2
18
16
130
144
1026
128
8194
1040
8208
1152
1024
8320
9216
8192
0
2
8
66
72
96
64
2056
8200
512
8256
2560
2048
8704
10240
8192
0
2
10
8
34
40
32
136
160
128
8200
512
8224
8320
2048
8704
10240
8192
0
2
10
8
66
72
64
520
576
8200
512
2560
2048
8704
10240
8192
0
2
10
8
34
130
40
32
136
160
128


In [None]:
: