# Ingest Data from Dropbox

In [1]:
import os
import re
from pathlib import Path

import pandas as pd
from dotenv import load_dotenv, find_dotenv
import dropbox

import lib.db as db

In [2]:
load_dotenv(find_dotenv())

CXN = db.connect()
INTERIM_DATA = Path('..') / 'data' / 'interim'
PROCESSED_DATA = Path('..') / 'data' / 'processed'

In [3]:
DROPBOX = os.getenv('DROPBOX')

dbx = dropbox.Dropbox(DROPBOX)

### Ingest Nitfix 1

In [4]:
file_name = 'nitfix01.csv'

csv_path = str(INTERIM_DATA / file_name)
dbx_path = 'id:zSBrtnqOfSAAAAAAAAAAKw/5657_Nit_Fix_I.reconcile.4.2.csv'

file_metadata = dbx.files_download_to_file(csv_path, dbx_path)

df = pd.read_csv(csv_path)
columns = {}
for old in df.columns:
    new = old.lower()
    new = new.replace('⁰', 'deg')
    new = new.replace("''", 'sec')
    new = new.replace("'", 'min')
    new = re.sub(r'[^a-z0-9_]+', '_', new)
    new = re.sub(r'^_|_$', '', new)
    columns[old] = new
columns['subject_qr_code'] = 'sample_id'

df.rename(columns=columns, inplace=True)

df.to_sql('reconciled', CXN, if_exists='replace')

csv_path = PROCESSED_DATA / file_name
df.to_csv(csv_path, index=False)

df.head()

Unnamed: 0,subject_id,country,state_province,county,location,minimum_elevation,maximum_elevation,main_dropdown,latitude_deg,latitude_min,...,month_1,day_1,year_1,month_2,day_2,year_2,subject_image_name,subject_nybg_bar_code,subject_resolved_name,sample_id
0,16192935,United States of America,North Carolina,Wayne,"Town of Fremont, along NC Rt.222, east of Evan...",,,feet,35,32,...,9 - September,25,2011,,,,R0001220.JPG,NYBG 3196996,Senna obtusifolia,8e37959f-dfa6-44b6-a201-b94215340016
1,16192937,United States of America,Arizona,Maricopa,"Salt Rivr at 35th Avenue bridge in Phoenix, ju...",1022.0,,feet,33.411913,,...,1 - January,21,2012,,,,R0001205.JPG,NYBG 3196995,Senna artemisioides,90a9d5ee-a1c6-4dd3-b6b1-6932ea796abd
2,16192938,Gabon,Ogooué-Lolo,,"Makande surroundings, c. 65 km SSW of Booué. I...",,,,- 0,41 S,...,2 - February,11,1999,,,,R0001202.JPG,NYBG 3196994,Scorodophloeus zenkeri,90f68e06-c5cb-48dc-9de1-5c0512314486
3,16192939,"Tanzania, United Republic of",Tanga,,"Mkaramo Parish, Mkwaja Subchiefeom, Mwera Chie...",150.0,,feet,,,...,7 - July,10,1957,,,,R0001201.JPG,NYBG 3196992,Scorodophloeus fischeri,90fb8362-a4ed-407d-a8b1-32dc56506101
4,16192941,Congo (Democratic Republic of the),Kasaï-Central,,Babadi - Kasai,,,unknown,,,...,12 - December,Not Shown,1934,,,,R0001199.JPG,NYBG 3196991,Leonardoxa romii,911525c9-04f7-4213-8781-a9842216c2d8
