# From spreadsheet to Kobo — Part 2 (maybe 3?)
---

### Import whatever you need 👇

In [1]:
import io
import requests
import uuid
from datetime import datetime
from random import choice, randint, sample
from time import sleep
from xml.etree import ElementTree as ET

import pandas as pd
import pytz

### Set up some helpful constants that we'll use below 🤘

In [46]:
# You can find this by navigating to https://kc.kobotoolbox.org/token
TOKEN = '<TOKEN>'

In [3]:
KC_URL = 'https://kc.kobotoolbox.org'
KF_URL = 'https://kf.kobotoolbox.org'

ASSET_UID = 'aN7gj3BsHV9pNPhnDXrKE3'

ASSETS_URL = f'{KF_URL}/api/v2/assets/'
DATA_URL = f'{ASSETS_URL}{ASSET_UID}/data'
DEPLOYMENT_URL = '{}{}/deployment/'
XML_URL = f'{DATA_URL}.xml'
SUMISSION_URL = f'{KC_URL}/api/v1/submissions'
FORMS_URL = f'{KC_URL}/api/v1/forms'

HEADERS = {
    'Authorization': f'Token {TOKEN}'
}
PARAMS = {
    'format': 'json'
}

FILENAME = 'repeat_groups_export.xlsx'
REPEAT_GROUP_NAME = 'group_gp3qf47'

def _get_deployment_url(cloned_asset_uid):
    return DEPLOYMENT_URL.format(ASSETS_URL, cloned_asset_uid)

In [4]:
subs = pd.read_excel(FILENAME)
subs.head()

Unnamed: 0,start,end,What_is_your_name,_id,_uuid,_submission_time,_validation_status,_notes,_status,_submitted_by,_tags,_index
0,2021-03-29T12:41:30.439-07:00,2021-03-29T12:51:46.944-07:00,David,90550160,c7c8433c-822e-48cd-9159-aa1e29b3ae45,2021-03-29T19:51:54,,[],submitted_via_web,,,1
1,2021-03-29T12:41:09.540-07:00,2021-03-29T12:41:30.426-07:00,Naomi,90549293,cfa60288-a8cc-4284-896d-daa1627c3b75,2021-03-29T19:41:37,,[],submitted_via_web,,,2
2,2021-03-29T12:40:41.303-07:00,2021-03-29T12:41:09.523-07:00,Josh,90549280,cef09794-91b2-488e-87cc-76ef53057332,2021-03-29T19:41:11,,[],submitted_via_web,,,3


In [5]:
subs.columns.to_list()

['start',
 'end',
 'What_is_your_name',
 '_id',
 '_uuid',
 '_submission_time',
 '_validation_status',
 '_notes',
 '_status',
 '_submitted_by',
 '_tags',
 '_index']

In [6]:
subs_cols = ['What_is_your_name', '_uuid']
subs_filtered = subs[subs_cols]
subs_filtered.head()

Unnamed: 0,What_is_your_name,_uuid
0,David,c7c8433c-822e-48cd-9159-aa1e29b3ae45
1,Naomi,cfa60288-a8cc-4284-896d-daa1627c3b75
2,Josh,cef09794-91b2-488e-87cc-76ef53057332


In [7]:
repeat_subs = pd.read_excel(FILENAME, sheet_name=REPEAT_GROUP_NAME)
repeat_subs.head()

Unnamed: 0,A_pizza_place_you_like,Your_favourite_toppings_there,Your_favourite_toppings_there/cheese,Your_favourite_toppings_there/pepperoni,Your_favourite_toppings_there/avo,_index,_parent_table_name,_parent_index,_submission__id,_submission__uuid,_submission__submission_time,_submission__validation_status,_submission__notes,_submission__status,_submission__submitted_by,_submission__tags
0,Place 1,cheese pepperoni,1,1,0,1,Repeat groups,1,90550160,c7c8433c-822e-48cd-9159-aa1e29b3ae45,2021-03-29T19:51:54,,[],submitted_via_web,,[]
1,Place 2,cheese avo,1,0,1,2,Repeat groups,1,90550160,c7c8433c-822e-48cd-9159-aa1e29b3ae45,2021-03-29T19:51:54,,[],submitted_via_web,,[]
2,Place 1,cheese,1,0,0,3,Repeat groups,2,90549293,cfa60288-a8cc-4284-896d-daa1627c3b75,2021-03-29T19:41:37,,[],submitted_via_web,,[]
3,Place 1,cheese pepperoni,1,1,0,4,Repeat groups,3,90549280,cef09794-91b2-488e-87cc-76ef53057332,2021-03-29T19:41:11,,[],submitted_via_web,,[]
4,Place 2,pepperoni avo,0,1,1,5,Repeat groups,3,90549280,cef09794-91b2-488e-87cc-76ef53057332,2021-03-29T19:41:11,,[],submitted_via_web,,[]


# Data cleaning

In [8]:
repeat_subs_cols = ['A_pizza_place_you_like', 'Your_favourite_toppings_there', '_submission__uuid']
repeat_subs_filtered = repeat_subs[repeat_subs_cols]
repeat_subs_filtered.head()

Unnamed: 0,A_pizza_place_you_like,Your_favourite_toppings_there,_submission__uuid
0,Place 1,cheese pepperoni,c7c8433c-822e-48cd-9159-aa1e29b3ae45
1,Place 2,cheese avo,c7c8433c-822e-48cd-9159-aa1e29b3ae45
2,Place 1,cheese,cfa60288-a8cc-4284-896d-daa1627c3b75
3,Place 1,cheese pepperoni,cef09794-91b2-488e-87cc-76ef53057332
4,Place 2,pepperoni avo,cef09794-91b2-488e-87cc-76ef53057332


In [9]:
USER = 'Josh'
user_uid = subs_filtered[subs_filtered['What_is_your_name'] == USER]['_uuid'].values[0]
user_uid

'cef09794-91b2-488e-87cc-76ef53057332'

In [10]:
repeat_subs_for_user = repeat_subs_filtered[repeat_subs_filtered['_submission__uuid'] == user_uid]
repeat_subs_for_user

Unnamed: 0,A_pizza_place_you_like,Your_favourite_toppings_there,_submission__uuid
3,Place 1,cheese pepperoni,cef09794-91b2-488e-87cc-76ef53057332
4,Place 2,pepperoni avo,cef09794-91b2-488e-87cc-76ef53057332
5,Place 3,cheese pepperoni avo,cef09794-91b2-488e-87cc-76ef53057332


In [11]:
def remove_choice(string_choices, choice):
    return ' '.join([c for c in string_choices.split() if c != choice])

In [12]:
def map_place_names(place):
    places = {
        'Place 1': 'Limoncello',
        'Place 2': 'The Backyard',
        'Place 3': 'Pizza Hut'
    }
    new_place = places.get(place)
    return new_place if new_place is not None else place

In [13]:
uuids_for_updating = [
    'cef09794-91b2-488e-87cc-76ef53057332', 
    'c7c8433c-822e-48cd-9159-aa1e29b3ae45'
]

In [14]:
for i, row in repeat_subs_filtered.iterrows():
    if row['_submission__uuid'] in uuids_for_updating:
        row['Your_favourite_toppings_there'] = remove_choice(row['Your_favourite_toppings_there'], 'pepperoni')
    row['A_pizza_place_you_like'] = map_place_names(row['A_pizza_place_you_like'])

In [15]:
repeat_subs_filtered

Unnamed: 0,A_pizza_place_you_like,Your_favourite_toppings_there,_submission__uuid
0,Limoncello,cheese,c7c8433c-822e-48cd-9159-aa1e29b3ae45
1,The Backyard,cheese avo,c7c8433c-822e-48cd-9159-aa1e29b3ae45
2,Limoncello,cheese,cfa60288-a8cc-4284-896d-daa1627c3b75
3,Limoncello,cheese,cef09794-91b2-488e-87cc-76ef53057332
4,The Backyard,avo,cef09794-91b2-488e-87cc-76ef53057332
5,Pizza Hut,cheese avo,cef09794-91b2-488e-87cc-76ef53057332


In [16]:
updated_subs = []
for i, subs_row in subs_filtered.iterrows():
    sub = {
        'What_is_your_name': subs_row['What_is_your_name'],
        '_uuid': subs_row['_uuid']
    }
    
    df_repeats = repeat_subs_filtered[repeat_subs_filtered['_submission__uuid'] == sub['_uuid']]
    repeat_group = []
    for j, repeats_row in df_repeats.iterrows():
        repeat_group.append({
            'A_pizza_place_you_like': repeats_row['A_pizza_place_you_like'],
            'Your_favourite_toppings_there': repeats_row['Your_favourite_toppings_there']
        })
    sub[REPEAT_GROUP_NAME] = repeat_group
    updated_subs.append(sub)

In [17]:
updated_subs

[{'What_is_your_name': 'David',
  '_uuid': 'c7c8433c-822e-48cd-9159-aa1e29b3ae45',
  'group_gp3qf47': [{'A_pizza_place_you_like': 'Limoncello',
    'Your_favourite_toppings_there': 'cheese'},
   {'A_pizza_place_you_like': 'The Backyard',
    'Your_favourite_toppings_there': 'cheese avo'}]},
 {'What_is_your_name': 'Naomi',
  '_uuid': 'cfa60288-a8cc-4284-896d-daa1627c3b75',
  'group_gp3qf47': [{'A_pizza_place_you_like': 'Limoncello',
    'Your_favourite_toppings_there': 'cheese'}]},
 {'What_is_your_name': 'Josh',
  '_uuid': 'cef09794-91b2-488e-87cc-76ef53057332',
  'group_gp3qf47': [{'A_pizza_place_you_like': 'Limoncello',
    'Your_favourite_toppings_there': 'cheese'},
   {'A_pizza_place_you_like': 'The Backyard',
    'Your_favourite_toppings_there': 'avo'},
   {'A_pizza_place_you_like': 'Pizza Hut',
    'Your_favourite_toppings_there': 'cheese avo'}]}]

# Get submission XML

In [18]:
res = requests.get(url=XML_URL, headers=HEADERS, params=PARAMS)

In [19]:
res.status_code

200

In [20]:
parsed_xml = ET.fromstring(res.text)
e = parsed_xml.find(f'results/{ASSET_UID}')
print(ET.tostring(e).decode())

<aN7gj3BsHV9pNPhnDXrKE3 id="aN7gj3BsHV9pNPhnDXrKE3" version="1 (2021-03-29 19:40:28)">
          <formhub>
            <uuid>0b21fc06052142e4bd7594e20a1e39fb</uuid>
          </formhub>
          <start>2021-03-29T12:40:41.303-07:00</start>
          <end>2021-03-29T12:41:09.523-07:00</end>
          <What_is_your_name>Josh</What_is_your_name>
          <group_gp3qf47>
            <A_pizza_place_you_like>Place 1</A_pizza_place_you_like>
            <Your_favourite_toppings_there>cheese pepperoni</Your_favourite_toppings_there>
          </group_gp3qf47><group_gp3qf47>
            <A_pizza_place_you_like>Place 2</A_pizza_place_you_like>
            <Your_favourite_toppings_there>pepperoni avo</Your_favourite_toppings_there>
          </group_gp3qf47><group_gp3qf47>
            <A_pizza_place_you_like>Place 3</A_pizza_place_you_like>
            <Your_favourite_toppings_there>cheese pepperoni avo</Your_favourite_toppings_there>
          </group_gp3qf47>
          <__version__>vuYnF2nxow

In [21]:
e.findall('group_gp3qf47')

[<Element 'group_gp3qf47' at 0x7fc0e9562db0>,
 <Element 'group_gp3qf47' at 0x7fc0e9562f90>,
 <Element 'group_gp3qf47' at 0x7fc0e95150e0>]

In [22]:
all_xml_subs = parsed_xml.findall(f'results/{ASSET_UID}')
all_xml_subs

[<Element 'aN7gj3BsHV9pNPhnDXrKE3' at 0x7fc0e9562ae0>,
 <Element 'aN7gj3BsHV9pNPhnDXrKE3' at 0x7fc0e9515400>,
 <Element 'aN7gj3BsHV9pNPhnDXrKE3' at 0x7fc0e9515950>]

In [23]:
def get_xml_for_submission_uid(submission_uid):
    return [x for x in all_xml_subs if x.find('meta/instanceID').text == f'uuid:{submission_uid}'][0]

In [24]:
tmp_el = get_xml_for_submission_uid('cef09794-91b2-488e-87cc-76ef53057332')
tmp_el

<Element 'aN7gj3BsHV9pNPhnDXrKE3' at 0x7fc0e9562ae0>

In [25]:
tmp_el.attrib

{'id': 'aN7gj3BsHV9pNPhnDXrKE3', 'version': '1 (2021-03-29 19:40:28)'}

In [26]:
print(ET.tostring(tmp_el).decode())

<aN7gj3BsHV9pNPhnDXrKE3 id="aN7gj3BsHV9pNPhnDXrKE3" version="1 (2021-03-29 19:40:28)">
          <formhub>
            <uuid>0b21fc06052142e4bd7594e20a1e39fb</uuid>
          </formhub>
          <start>2021-03-29T12:40:41.303-07:00</start>
          <end>2021-03-29T12:41:09.523-07:00</end>
          <What_is_your_name>Josh</What_is_your_name>
          <group_gp3qf47>
            <A_pizza_place_you_like>Place 1</A_pizza_place_you_like>
            <Your_favourite_toppings_there>cheese pepperoni</Your_favourite_toppings_there>
          </group_gp3qf47><group_gp3qf47>
            <A_pizza_place_you_like>Place 2</A_pizza_place_you_like>
            <Your_favourite_toppings_there>pepperoni avo</Your_favourite_toppings_there>
          </group_gp3qf47><group_gp3qf47>
            <A_pizza_place_you_like>Place 3</A_pizza_place_you_like>
            <Your_favourite_toppings_there>cheese pepperoni avo</Your_favourite_toppings_there>
          </group_gp3qf47>
          <__version__>vuYnF2nxow

# Clone form

In [27]:
data = {
    'clone_from': ASSET_UID,
    'name': f'Test clone from Jupyter {uuid.uuid4()}'
}
res = requests.post(url=ASSETS_URL, headers=HEADERS, params=PARAMS, data=data)

In [28]:
res.status_code

201

In [29]:
cloned_survey = res.json()

In [30]:
cloned_asset_uid = cloned_survey['uid']
cloned_asset_uid

'aCBWsvNKN85P4q38276zC8'

# Deploy new form

In [31]:
data = {
    'active': 'true'
}
res = requests.post(url=_get_deployment_url(cloned_asset_uid), headers=HEADERS, params=PARAMS, data=data)

In [32]:
res.status_code

200

# Get new formid

In [33]:
res = requests.get(url=FORMS_URL, headers=HEADERS, params=PARAMS)

In [34]:
res.status_code

200

In [35]:
all_forms = res.json()

In [36]:
latest_form = [f for f in all_forms if f['id_string'] == cloned_asset_uid][0]
latest_form

{'url': 'https://kc.kobotoolbox.org/api/v1/forms/646017?format=json',
 'formid': 646017,
 'metadata': [],
 'owner': 'joshuaberetta',
 'public': False,
 'public_data': False,
 'require_auth': False,
 'tags': [],
 'title': 'Test clone from Jupyter 35886dc5-45c9-46e1-b448-537b5e02de2e',
 'users': [{'user': 'joshuaberetta',
   'permissions': ['add_datadictionary',
    'add_xform',
    'change_datadictionary',
    'change_xform',
    'delete_data_xform',
    'delete_datadictionary',
    'delete_xform',
    'move_xform',
    'report_xform',
    'transfer_xform',
    'validate_xform',
    'view_xform']}],
 'hash': 'md5:321a1149d087f94db24790a6e4b2798a',
 'has_kpi_hooks': False,
 'description': 'Test clone from Jupyter 35886dc5-45c9-46e1-b448-537b5e02de2e',
 'downloadable': True,
 'allows_sms': False,
 'encrypted': False,
 'sms_id_string': 'aCBWsvNKN85P4q38276zC8',
 'id_string': 'aCBWsvNKN85P4q38276zC8',
 'date_created': '2021-03-29T23:54:10.235417Z',
 'date_modified': '2021-03-29T23:54:10.378

In [37]:
form_id = latest_form['formid']
form_id

646017

# Get deployent info

In [38]:
res = requests.get(f'{ASSETS_URL}{cloned_asset_uid}', headers=HEADERS, params=PARAMS)

In [39]:
res.status_code

200

In [40]:
deployed_versions = res.json()['deployed_versions']

In [41]:
count = deployed_versions['count']
count

1

In [42]:
date_deployed = deployed_versions['results'][0]['date_deployed']
date_deployed

'2021-03-29T23:54:07.528773Z'

In [43]:
def format_date_string(date_str):
    """
    goal: "1 (2021-03-29 19:40:28)"
    """
    date, time = date_str.split('T')
    return f"{date} {time.split('.')[0]}"

In [44]:
format_date_string(date_deployed)

'2021-03-29 23:54:07'

# 🚧 WIP 🚧

In [45]:
def submit_data(xml_sub: bytes, _uuid: str) -> str:
    """
    Send the XML to kobo!
    """
    file_tuple = (_uuid, io.BytesIO(xml_sub))
    files = {'xml_submission_file': file_tuple}
    res = requests.Request(
        method='POST', url=SUMISSION_URL, files=files, headers=HEADERS
    )
    session = requests.Session()
    res = session.send(res.prepare())
    return res.status_code

def format_openrosa_datetime() -> str:
    """
    This is required to get the correct datetime formatting
    """
    return datetime.now(tz=pytz.UTC).isoformat('T', 'milliseconds')

def update_element_value(e: ET.Element, name: str, value: str) -> None:
    """
    Get or create a node and give it a value
    """
    el = e.find(name)
    if el is None:
        el = ET.SubElement(e, name)
    el.text = value

def create_submissions(subs: list, form_id: int, asset_uid: str) -> list:
    """
    Take a bunch of submissions and send them off
    """
    all_subs = []
    for sub in subs:
        parsed_xml = get_xml_for_submission_uid(sub['_uuid'])
        
        _now = format_openrosa_datetime()
        _uuid = str(uuid.uuid4())
        
        for k, v in sub.items():
            if isinstance(v, list):
                for item in v:
                    update_element_value(parsed_xml, item, row[item])
            else:
                update_element_value()
        
        # We have to update the instanceID, otherwise there'll be issues
        update_element_value(parsed_xml, 'meta/instanceID', f'uuid:{_uuid}')
        
        # Updating the `start` and `end` times is not really necessary, but 
        # probably something you'd want to do
        update_element_value(parsed_xml, 'start', _now)
        update_element_value(parsed_xml, 'end', _now)
        
        all_subs.append(submit_data(ET.tostring(parsed_xml), _uuid))
        
        # If you are submitting a large amount of data, please be mindful that it can
        # overwhelm the servers if sent in a short span of time. Letting it sleep for
        # for a short stint between each upload will be much appreciated
        sleep(0.2)
        
    return all_subs