In [1]:
import config as cfg
import requests
import json
import pandas as pd
from datetime import date
from datetime import timedelta
from datetime import datetime
from oauth2client.service_account import ServiceAccountCredentials
import gspread
import gspread_pandas as gp

In [2]:
class DataProcessing:
    @staticmethod
    def parse_data(topic_id, apikey, start_date, tags="", exclude_tags="", delta=2, size=1000):
        start_date = date.fromisoformat(start_date)
        temp_data = []
        for i in range(delta):
            skip_size = 0
            print(start_date)
            while True:
                out = requests.get(
                    f'https://api.youscan.io/api/external/topics/{topic_id}'
                    f'/mentions?apiKey={apikey}'
                    f'&from={date.isoformat(start_date)}'
                    f'&to={date.isoformat(start_date)}'
                    f'&processed=true'
                    f'{tags}'
                    f'{exclude_tags}'
                    f'&skip={skip_size}'
                    f'&size={size}')

                if len(out.text) < 500:
                    print(out.text)
                    break
                temp_data += json.loads(out.text)['mentions']
                skip_size += 1000
                print(len(temp_data))
            start_date += timedelta(days=1)
        return temp_data

    @staticmethod
    def region_labels(x):
        if x in cfg.regions_dict['volga']:
            return 'Волга'
        elif x in cfg.regions_dict['volga_sever']:
            return 'Волга-Север'
        elif x in cfg.regions_dict['moskva']:
            return 'Москва'
        elif x in cfg.regions_dict['severniy_kavkaz']:
            return 'Северный Кавказ'
        elif x in cfg.regions_dict['severo_zapad']:
            return 'Северо-Запад'
        elif x in cfg.regions_dict['sibir']:
            return 'Сибирь'
        elif x in cfg.regions_dict['ural']:
            return 'Урал'
        elif x in cfg.regions_dict['centr']:
            return 'Центр'
        elif x in cfg.regions_dict['yug']:
            return 'Юг'
        elif x in cfg.regions_dict['yuzhniy_ural']:
            return 'Южный Урал'
        else:
            pass


    def dataframe_processing(self, dataframe, hr=False):
        # labels for hr report
        if hr:
            dataframe['Macro Region'] = dataframe['region'].apply(self.region_labels)
        # del last spaces
        dataframe['tags'] = dataframe.tags.apply(lambda x: [i.strip(' ') for i in x])
        # datetime
        dataframe['published'] = dataframe.published.apply(lambda x: (datetime.strptime(x.split('+')[0].split('.')[0], "%Y-%m-%dT%H:%M:%S")+ timedelta(hours=3)).strftime("%Y-%m-%d"))
        # find and split tags
        frame_columns = ['tag_' + str(x) for x in range(dataframe.tags.apply(lambda x: len(x)).max())]
        tags_df = pd.DataFrame(dataframe.tags.to_list(), columns=frame_columns)
        dataframe = pd.concat([dataframe, tags_df], axis=1)
        dataframe.rename(columns=cfg.new_columns, inplace=True)
        return dataframe

In [122]:
class Sheets:
    c = ''
    gc = ''
    print('start', c, gc)
    def connect(self, directory, file):
        self.c = gp.conf.get_config(conf_dir=directory, file_name=file)
        scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
        credentials = ServiceAccountCredentials.from_json_keyfile_name('google_secret.json', scope)
        self.gc = gspread.authorize(credentials)
        print(self.c, self.gc)
    def upload_to_sheet(self,  spread_name, sheet_name, dataframe):
        print('upload', self.c, self.gc)
        # clear old data
        sht = self.gc.open(spread_name)
        sht.values_clear(f"{sheet_name}!A1:X100000")
        print(f"{sheet_name} clear data")
        # write new data
        gp.Spread(spread_name, config=self.c).df_to_sheet(dataframe,
                                                          sheet=sheet_name,
                                                          start='A1',
                                                          index=False,
                                                          replace=False)
        print(f"{sheet_name} write data")

start  


In [4]:
d = DataProcessing.parse_data(cfg.pyaterochka_id, cfg.key, cfg.start_date, cfg.pyaterochka_exclude)

2022-01-31
1000
1409
{"total":1409,"lastSeq":11496200,"mentions":[]}
2022-02-01
2409
2730
{"total":1321,"lastSeq":11496200,"mentions":[]}


In [26]:
d1 = DataProcessing()

In [44]:
d2 = d1.dataframe_processing(pd.json_normalize(d)[cfg.columns])

In [40]:
x=Sheets()

In [41]:
x.connect('./', 'google_secret.json')

In [45]:
x.upload_to_sheet('ALL Еженедельный Рабочий файл х5', 'test', d2)

test clear data
test write data


In [123]:
worker = DataProcessing()
loader = Sheets()
#
# pyat = worker.parse_data(cfg.pyaterochka_id, cfg.key,cfg.start_date, cfg.pyaterochka_exclude)
# pyat = worker.dataframe_processing(pd.json_normalize(pyat)[cfg.columns])
#
# connection = loader.connect('./', 'google_secret.json')


In [16]:
configs = {
    'carousel': {
        'car': [cfg.car_id, "", cfg.car_excl],
        'car_x5': [cfg.x5_id, cfg.car_t, cfg.x5_excl_m]
    },
    'pyaterochka_delivery': {
        'pyat_del': [cfg.pyat_id, cfg.pyat_del_t, '']
    },
    'pyaterochka': {
        'pyat': [cfg.pyat_id, "", cfg.pyat_excl],
        'pyat_x5': [cfg.x5_id, cfg.pyat_t, cfg.x5_excl_m]
    },
    'perekrestok_fast': {
        'pyat_del': [cfg.per_id, cfg.per_fast_t, ""]
    },
    'perekrestok_ru': {
        'pyat_del': [cfg.per_id, cfg.per_ru_t, ""]
    },
    'chizhik': {
        'pyat': [cfg.chizh_id, "", ""],
        'pyat_x5': [cfg.x5_id, cfg.chizh_t, cfg.x5_excl_m]
    },
    'x5': {
        'pyat_del': [cfg.x5_id, "", cfg.x5_excl]
    },
    'perekrestok': {
        'pyat': [cfg.per_id, "", cfg.per_excl],
        'pyat_x5': [cfg.x5_id, cfg.per_t, cfg.x5_excl_m]
    },
    'okolo': {
        'ok_x5': [cfg.x5_id, cfg.okolo, ""],
        'ok_per': [cfg.per_id, cfg.okolo, ""],
        'ok_pyat': [cfg.pyat_id, cfg.okolo, ""],
        'ok_car': [cfg.car_id, cfg.okolo, ""]
    },
    'omni': {
        'om_x5': [cfg.x5_id, cfg.omni, ""],
        'om_per': [cfg.per_id, cfg.omni, ""],
        'om_pyat': [cfg.pyat_id, cfg.omni5, ""],
        'om_car': [cfg.car_id, cfg.omni, ""]
    },
    'logistics': {
        'log_x5': [cfg.x5_id, cfg.log_15, ""],
        'log_per': [cfg.per_id, cfg.log_15, ""],
        'log_pyat1': [cfg.pyat_id, cfg.log_1, ""],
        'log_pyat15': [cfg.pyat_id, cfg.log_15, ""],
        'log_car': [cfg.car_id, cfg.log_15, ""]
    },
    'hr_x5': {
        'hr': [cfg.x5_id, cfg.hr, ""]
    },
}

In [17]:
for key, value in x.items():
    for name, data in value.items():
        print(name)
        print(data)
        print('------------------------------------')

car
['172904', '', '&excludeTags=15_Логистика&excludeTags=19.10_Около&excludeTags=14_Omni']
------------------------------------
car_x5
['174566', '&tags=40 Карусель', '&excludeTags=15_Логистика&excludeTags=19.10_Около&excludeTags=14_Omni']
------------------------------------
pyat_del
['160358', '&tags=18.11_ДоставкаПятерочка', '']
------------------------------------
pyat
['160358', '', '&excludeTags=18.11_ДоставкаПятерочка&excludeTags=15_Логистика&excludeTags=001_Логистика &excludeTags=19.10_Около&excludeTags=001_Omni']
------------------------------------
pyat_x5
['174566', '&tags=40 Пятерочка', '&excludeTags=15_Логистика&excludeTags=19.10_Около&excludeTags=14_Omni']
------------------------------------
pyat_del
['160394', '&tags=18.12_Перекресток быстро', '']
------------------------------------
pyat_del
['160394', '&tags=18.11_Впрок', '']
------------------------------------
pyat
['233371', '', '']
------------------------------------
pyat_x5
['174566', '&tags=40_Чижик', '&exclud