-
Notifications
You must be signed in to change notification settings - Fork 5
/
gsheet_parser.py
52 lines (46 loc) · 1.94 KB
/
gsheet_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas as pd
from mtd.parsers.utils import BaseParser
from mtd.exceptions import UnsupportedFiletypeError
from mtd.parsers.utils import ResourceManifest
from typing import Dict, List, Tuple, Union
import gspread
class Parser(BaseParser):
'''
Parse data for MTD **TODO: test worksheet location. Skipheader in manifest skips first row. Location in manifest decides worksheet.
:param ResourceManifest manifest: Manifest for parser
:param str resource_path: path to file
'''
def __init__(self, manifest: ResourceManifest, resource: gspread.models.Spreadsheet):
self.manifest = manifest
try:
work_book = resource
if "location" in self.manifest:
work_sheet = work_book.get_worksheet(self.manifest['location'])
else:
work_sheet = work_book.get_worksheet(0)
if "skipheader" in self.manifest and self.manifest['skipheader']:
min_row = 1
else:
min_row = 0
self.resource = work_sheet.get_all_records()[min_row:]
except:
raise UnsupportedFiletypeError('Google Spreadsheet')
self.entry_template = self.manifest['targets']
def getCellValue(self, entry: Dict, col: str) -> str:
''' Given a gspread record dict, return the value of the key matching the header in the record
'''
for k,v in entry.items():
if k == col:
return v
return ''
def resolve_targets(self) -> List[dict]:
word_list = []
for entry in self.resource:
word_list.append(self.fill_entry_template(self.entry_template, entry, self.getCellValue))
return word_list
def parse(self) -> Dict[str, Union[dict, pd.DataFrame]]:
try:
data = self.resolve_targets()
return {"manifest": self.manifest, "data": pd.DataFrame(data)}
except Exception as e:
print(e)