In [1]:
from datetime import datetime
import pytz
import requests
import xml.etree.ElementTree as ET
import base64
import json
import argparse
import sys

import pandas as pd
import numpy as np

In [7]:
def ler_recomendacoes(arq):
    # Leitura da base de recomendações
    recom = pd.read_csv(arq, sep='\t')
    recom['e-mail'] = recom['e-mail'].apply(lambda x: x.replace(' ', ''))
    recom = recom[recom.Estoque > 5]

    # Organização da base de recomendações com as três colunas
    recomendacoes = []
    for pessoa in recom['CodCliente'].unique():
        recomendacoes.append(recom['Cod Recom'][recom['CodCliente'] == pessoa].values.tolist())

    for i in range(len(recomendacoes)):
        for j in range(5):
            try:
                recomendacoes[i, j]
            except:
                recomendacoes[i].append(np.NaN)
            recomendacoes[i] = recomendacoes[i][0:5]

    # formatação do DataFrame com as recomendações organizadas
    df = pd.DataFrame(data=recomendacoes, index=recom['CodCliente'].unique(), columns=['Recom1', 'Recom2', 'Recom3', 'Recom4', 'Recom5'])
    df.index.name = 'Cliente'
    df = df.merge(recom.loc[:, ['CodCliente', 'e-mail']].drop_duplicates(), left_on='Cliente', right_on='CodCliente')
    df.dropna(subset=['Recom3'], axis=0,  inplace=True)
    df.index = range(len(df))
    
    return df

In [3]:
def ler_db_produtos(site):
    # request para acessar lista de produtos
    if site == 'bol':
        url_xml = "https://www.bemol.com.br/feeds/google-merchant"
    else:
        url_xml = "https://www.bemolfarma.com.br/feeds/google-merchant-farma"
    header = {"Accept": "application/xml"}
    r = requests.get(url_xml, headers=header)

    # Gera um DataFrame com os produtos da BOL
    tree = ET.ElementTree(ET.fromstring(r.content))
    root = tree.getroot()
    codigos, produtos, images, links, precos = ([] for i in range(5))
    for channel in root.findall("channel"):
        for item in channel.findall("item"):
            for sem_imagem in item.findall("{http://base.google.com/ns/1.0}image_link"):
                sem_imagem.tag = "{http://base.google.com/ns/1.0}additional_image_link"
            for title in item.findall("title"):
                produtos.append(title.text.capitalize())
            for image in item.findall("{http://base.google.com/ns/1.0}additional_image_link"):
                images.append(image.text)
            for link in item.findall("link"):
                links.append(link.text)
            for preco in item.findall("{http://base.google.com/ns/1.0}price"):
                precos.append(float(preco.text.replace(" BRL", "")))
            for codigo in item.findall("{http://base.google.com/ns/1.0}mpn"):
                codigos.append(int(codigo.text))
    df = pd.DataFrame({'Nome': produtos, 'Imagem': images, 'Link': links, 'Preco': precos}, index=codigos)
    df['Nome'] = df['Nome'].apply(lambda x: x.replace(',', '.'))

    return df

In [54]:
def linkar(df_recom, df_prod):
    a, b, c, d, e = ([] for i in range(5))
    for i in range(len(df_recom)):
        try:
            if df_prod['Imagem'][df_recom['Recom1'][i]].find('sem-foto.gif') > 0:
                a.append(-2)
            else:
                a.append(df_prod['Preco'][df_recom['Recom1'][i]]) 
        except:
            a.append(-1)
        try:
            if df_prod['Imagem'][df_recom['Recom2'][i]].find('sem-foto.gif') > 0:
                b.append(-2)
            else:
                b.append(df_prod['Preco'][df_recom['Recom2'][i]])
        except:
            b.append(-1)
        try:
            if df_prod['Imagem'][df_recom['Recom3'][i]].find('sem-foto.gif') > 0:
                c.append(-2)
            else:
                c.append(df_prod['Preco'][df_recom['Recom3'][i]])
        except:
            c.append(-1)
        try:
            if df_prod['Imagem'][df_recom['Recom4'][i]].find('sem-foto.gif') > 0:
                d.append(-2)
            else:
                d.append(df_prod['Preco'][df_recom['Recom4'][i]]) 
        except:
            d.append(-1)
        try:
            if df_prod['Imagem'][df_recom['Recom5'][i]].find('sem-foto.gif') > 0:
                e.append(-2)
            else:
                e.append(df_prod['Preco'][df_recom['Recom5'][i]])
        except:
            e.append(-1)
            
        df_link = pd.DataFrame({'Recom1': a, 'Recom2': b, 'Recom3': c, 'Recom4': d, 'Recom5': e})
        
        excluidos = []
        for i in range(len(df_link)):
            c = 0
            for j in range(len(df_link.columns)):
                if df_link.iloc[i,j] > 5:
                    c += 1
            if c < 3:
                excluidos.append(i)
        
    return excluidos

In [51]:
df_recom = ler_recomendacoes('data/bases/base_farma_teste.csv')

In [52]:
df_prod = ler_db_produtos('farma')

In [55]:
df_link =linkar(df_recom, df_prod)
df_link

Unnamed: 0,Recom1,Recom2,Recom3,Recom4,Recom5
0,38.9,1.7,-2.0,-1.0,-1.0
1,2.5,2.5,2.6,4.5,3.1
2,1.7,4.2,-2.0,-1.0,-1.0
3,30.2,26.8,25.9,-1.0,-1.0
4,16.4,17.5,9.9,-1.0,-1.0
...,...,...,...,...,...
13380,1.8,1.8,1.8,1.8,1.8
13381,1.8,1.8,1.8,1.8,-1.0
13382,1.8,1.8,1.8,1.8,1.8
13383,1.8,1.8,1.8,1.8,-1.0


In [67]:
excluidos = []
for i in range(len(df_link)):
    c = 0
    for j in range(len(df_link.columns)):
        if df_link.iloc[i,j] > 5:
            c += 1
    if c < 3:
        excluidos.append(i)

In [70]:
df_link.drop(excluidos, axis='index')

Unnamed: 0,Recom1,Recom2,Recom3,Recom4,Recom5
3,30.2,26.8,25.9,-1.0,-1.0
4,16.4,17.5,9.9,-1.0,-1.0
7,28.9,27.1,27.1,35.9,30.9
9,11.9,11.9,10.3,-1.0,-1.0
11,27.9,34.1,25.9,37.9,28.4
...,...,...,...,...,...
13351,5.3,1.5,8.2,8.2,13.9
13352,10.7,6.4,8.3,1.8,1.8
13366,39.4,27.3,2.8,39.4,-1.0
13375,11.9,10.3,4.2,5.3,1.5


In [79]:
df_prod['Link'][6006193.0]

'https://www.bemolfarma.com.br/banho-creme-bioextratus-umectante-250g-p1042199?tsid=45'

In [48]:
df_link.to_csv('recom_restantes.csv', sep=',', index=False, encoding='utf-8')