Skip to content

Commit

Permalink
Reading from google spreadsheet
Browse files Browse the repository at this point in the history
  • Loading branch information
pdonorio committed Apr 28, 2017
1 parent 2b47f92 commit 75aa41e
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 44 deletions.
3 changes: 2 additions & 1 deletion operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
#########################
# RETHINKDB 2 ELASTICSEARCH

r2e.make()
# r2e.make(skip_lexique=True)
r2e.make(only_xls=True)
# r2e.make(only_xls=True)

#########################
print("Conversion completed")
Expand Down
91 changes: 91 additions & 0 deletions operations/gxls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-

import gspread
from oauth2client.service_account import ServiceAccountCredentials
from restapi.resources.services.elastic import EL_INDEX3, EL_TYPE1
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

LEXIQUE_TABLE = 'lexique'
ENCODING = 'utf-8'
keys = ['sheet', 'macro', 'micro', 'titre', 'latin', 'italien', 'français']


class GExReader(object):
""" Reading google spreadsheets online """

def __init__(self, filename=None, rethink=None, elastic=None):

if filename is None:
filename = "/uploads/data/test2.xlsx"

if rethink is not None:
q = rethink.get_query()
# drop table if exist
if LEXIQUE_TABLE in q.table_list().run():
q.table_drop(LEXIQUE_TABLE).run()
# create table
q.table_create(LEXIQUE_TABLE, primary_key='titre').run()
# set index as convention/titre

# save the main object
self._r = rethink.get_table_query(LEXIQUE_TABLE)
else:
self._r = None

if elastic is not None:
self._el = elastic
else:
self._el = None

# CONNECT
filename = 'Voc typol_CH_Rd'
fileconf = './confs/endpoints/gxls_client.json'

# use creds to create a client to interact with the Google Drive API
scope = ['https://spreadsheets.google.com/feeds']
creds = ServiceAccountCredentials \
.from_json_keyfile_name(fileconf, scope)
client = gspread.authorize(creds)
self._xls = client.open(filename)

def get_data(self):

print("Getting data")
sheet = self._xls.sheet1

for row_num in range(2, sheet.row_count):

print("ROW", row_num - 1)
row = sheet.row_values(row_num)

term = {}
empty = True
for cell_num in range(0, len(keys)):
value = row[cell_num].strip()
if value != '':
empty = False
# value = value.encode(ENCODING)
# key = keys[cell_num].encode(ENCODING)
key = keys[cell_num]
# print(key, value)
term[key] = value

if empty:
return False

######################
# SAVE

# Save rethinkdb
self._r.insert(term).run()
# Update elastic specific index
self._el.index(
index=EL_INDEX3, id=row_num, body=term, doc_type=EL_TYPE1)

# print(term)
# return False

return True
# print(list_of_hashes)
85 changes: 49 additions & 36 deletions operations/rethink2elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,13 +238,19 @@ def suggest_transcription(transcription, key, probability=0.5):
return True


def read_xls(fix_suggest=False):
# print("FIX SUGGEST", fix_suggest)
from .xls import ExReader
obj = ExReader(rethink=query, elastic=es)
if obj.check_empty():
raise BaseException("Failed to load 'Lexique'")
return obj.get_data()
def read_xls():

# NEW
from .gxls import GExReader
obj = GExReader(rethink=query, elastic=es)
obj.get_data()

# # OLD
# from .xls import ExReader
# obj = ExReader(rethink=query, elastic=es)
# if obj.check_empty():
# raise BaseException("Failed to load 'Lexique'")
# return obj.get_data()


def single_update(doc):
Expand Down Expand Up @@ -523,47 +529,53 @@ def single_update(doc):
def make(only_xls=False, skip_lexique=False):

###################
q = query.get_table_query(RDB_TABLE1)
cursor = q.run()
# print("SOME", cursor)

# HTML STRIPPER
if es.indices.exists(index=EL_INDEX0):
es.indices.delete(index=EL_INDEX0)
es.indices.create(index=EL_INDEX0, body=HTML_ANALYZER)

# MULTI INDEX FILTERING
if es.indices.exists(index=EL_INDEX1):
es.indices.delete(index=EL_INDEX1)
es.indices.create(index=EL_INDEX1, body=INDEX_BODY1)
logger.info("Created index %s" % EL_INDEX1)

# SUGGESTIONS
if es.indices.exists(index=EL_INDEX2):
es.indices.delete(index=EL_INDEX2)
es.indices.create(index=EL_INDEX2, body=INDEX_BODY2)
logger.info("Created index %s" % EL_INDEX2)

# es.indices.put_mapping(
# index=EL_INDEX2, doc_type=EL_TYPE2, body=SUGGEST_MAPPINGS)
# print(es.indices.stats(index=EL_INDEX2))
# exit(1)

# print(es.indices.stats(index=EL_INDEX1))
# print(es.info())
if not only_xls:

q = query.get_table_query(RDB_TABLE1)
cursor = q.run()
# print("SOME", cursor)

# HTML STRIPPER
if es.indices.exists(index=EL_INDEX0):
es.indices.delete(index=EL_INDEX0)
es.indices.create(index=EL_INDEX0, body=HTML_ANALYZER)

# MULTI INDEX FILTERING
if es.indices.exists(index=EL_INDEX1):
es.indices.delete(index=EL_INDEX1)
es.indices.create(index=EL_INDEX1, body=INDEX_BODY1)
logger.info("Created index %s" % EL_INDEX1)

# SUGGESTIONS
if es.indices.exists(index=EL_INDEX2):
es.indices.delete(index=EL_INDEX2)
es.indices.create(index=EL_INDEX2, body=INDEX_BODY2)
logger.info("Created index %s" % EL_INDEX2)

# es.indices.put_mapping(
# index=EL_INDEX2, doc_type=EL_TYPE2, body=SUGGEST_MAPPINGS)
# print(es.indices.stats(index=EL_INDEX2))
# exit(1)

# print(es.indices.stats(index=EL_INDEX1))
# print(es.info())

##################
# LEXIQUE
if not skip_lexique:

if es.indices.exists(index=EL_INDEX3):
es.indices.delete(index=EL_INDEX3)
es.indices.create(index=EL_INDEX3, body={})
logger.info("Created index %s" % EL_INDEX3)

# READ FROM XLS FILE
read_xls(fix_suggest=(not only_xls))
read_xls()
# dictionary = read_xls(fix_suggest=(not only_xls))

if only_xls:
return False

###################
count = 0
for doc in cursor:
Expand All @@ -575,3 +587,4 @@ def make(only_xls=False, skip_lexique=False):
# print("TOTAL", es.search(index=EL_INDEX1))
print("Completed. No images:")
pp(noimages.keys())
return True
13 changes: 6 additions & 7 deletions operations/xls.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@


class ExReader(object):
"""
Reading spreadsheets from a file
"""
""" Reading spreadsheets from a file """

def __init__(self, filename=None, rethink=None, elastic=None):

if rethink is not None:
Expand Down Expand Up @@ -124,16 +123,16 @@ def save_data(self, ws, name):

total_data.append(data)

######################
# SAVE

# Save rethinkdb
self._r.insert(data).run()

# Update elastic suggest?

# Update elastic specific index
# save
self._el.index(
index=EL_INDEX3, id=counter, body=data, doc_type=EL_TYPE1)

######################
counter += 1
# exit(1)

Expand Down

0 comments on commit 75aa41e

Please sign in to comment.