In [149]:
import numpy as np
import pandas as pd
import collections

## 1. Cargamos los .json a DataFrames

In [5]:
sucursales = pd.read_json('../data/data/sucursales.json', lines=True)
precios = pd.read_json('../data/data/precios.json', lines=True)
productos = pd.read_json('../data/data/productos.json', lines=True)

## 2. Corregimos errores de loading de los datos

In [6]:
precios["fecha"]=list(map(lambda x: x["$date"],precios["fecha"]))
precios["_id"]=list(map(lambda x: x["$oid"],precios["_id"]))
productos["_id"]=list(map(lambda x: x["$oid"],productos["_id"]))
sucursales["_id"]=list(map(lambda x: x["$oid"],sucursales["_id"]))

## 3. Desglosamos la fecha en año, mes, dia y semana

In [7]:
precios["year"] = pd.DatetimeIndex(precios["fecha"]).year
precios["month"] = pd.DatetimeIndex(precios["fecha"]).month
precios["day"] = pd.DatetimeIndex(precios["fecha"]).day
precios["week"] = pd.DatetimeIndex(precios["fecha"]).week

## 4. Unificamos los datos en un DataFrame

In [8]:
precio_producto = precios.merge(productos,left_on='producto',right_on='id')

In [9]:
precio_producto_sucursal = precio_producto.merge(sucursales,left_on='sucursal',right_on='id')

In [10]:
precio_producto.columns.values

array(['_id_x', 'fecha', 'medicion', 'precio', 'producto', 'sucursal',
       'year', 'month', 'day', 'week', '_id_y', 'id', 'marca', 'nombre',
       'presentacion'], dtype=object)

In [11]:
precio_producto_sucursal.columns.values

array(['_id_x', 'fecha', 'medicion', 'precio', 'producto', 'sucursal',
       'year', 'month', 'day', 'week', '_id_y', 'id_x', 'marca', 'nombre',
       'presentacion', '_id', 'banderaDescripcion', 'banderaId',
       'comercioId', 'comercioRazonSocial', 'direccion', 'id_y', 'lat',
       'lng', 'localidad', 'provincia', 'sucursalId', 'sucursalNombre',
       'sucursalTipo'], dtype=object)

In [12]:
precio_producto_sucursal.head(3)

Unnamed: 0,_id_x,fecha,medicion,precio,producto,sucursal,year,month,day,week,...,comercioRazonSocial,direccion,id_y,lat,lng,localidad,provincia,sucursalId,sucursalNombre,sucursalTipo
0,5cba50f27af1523c35613042,2019-01-15T04:51:28.220Z,6,56.2,7790762052364,12-1-44,2019,1,15,3,...,Coto Centro Integral de ComercializaciÃ³n S.A.,Av. Monroe 3284,12-1-44,-34.563583,-58.468412,Belgrano,AR-C,44,MONROE,Supermercado
1,5cba5a537af1523c35976080,2019-02-23T09:53:40.735Z,9,56.2,7790762052364,12-1-44,2019,2,23,8,...,Coto Centro Integral de ComercializaciÃ³n S.A.,Av. Monroe 3284,12-1-44,-34.563583,-58.468412,Belgrano,AR-C,44,MONROE,Supermercado
2,5cba60367af1523c35ba838c,2018-11-08T16:35:49.030Z,1,56.2,7790762052364,12-1-44,2018,11,8,45,...,Coto Centro Integral de ComercializaciÃ³n S.A.,Av. Monroe 3284,12-1-44,-34.563583,-58.468412,Belgrano,AR-C,44,MONROE,Supermercado


In [13]:
len(precio_producto_sucursal['producto'].unique())

1000

## 5. Analizamos la cantidad de datos nulos por columna

In [15]:
# Analicemos la cantidad de nulos

def count_missing_data(dataframe):

    series_nulls = dataframe.isnull().sum()

    df_nulls = pd.DataFrame({'column': series_nulls.index, 'number_nulls': series_nulls.values})

    def number_rows_parent_dataframe(dataframe):
        return len(dataframe.index)

    def missing_data_porcentage(value, dataframe=dataframe):
        return value/number_rows_parent_dataframe(dataframe)*100

    df_nulls['percentage_nulls'] = df_nulls['number_nulls'].apply(missing_data_porcentage)

    return df_nulls

print(count_missing_data(precio_producto_sucursal))

                 column  number_nulls  percentage_nulls
0                 _id_x             0               0.0
1                 fecha             0               0.0
2              medicion             0               0.0
3                precio             0               0.0
4              producto             0               0.0
5              sucursal             0               0.0
6                  year             0               0.0
7                 month             0               0.0
8                   day             0               0.0
9                  week             0               0.0
10                _id_y             0               0.0
11                 id_x             0               0.0
12                marca             0               0.0
13               nombre             0               0.0
14         presentacion             0               0.0
15                  _id             0               0.0
16   banderaDescripcion             0           

In [176]:
precio_producto_sucursal[['producto','marca', 'nombre', 'presentacion']].head()

Unnamed: 0,producto,marca,nombre,presentacion
0,7790762052364,SANTA ANA,Vino Rosado Seleccion Especial Santa Ana 700 Ml,700.0 ml
1,7790762052364,SANTA ANA,Vino Rosado Seleccion Especial Santa Ana 700 Ml,700.0 ml
2,7790762052364,SANTA ANA,Vino Rosado Seleccion Especial Santa Ana 700 Ml,700.0 ml
3,7790762052364,SANTA ANA,Vino Rosado Seleccion Especial Santa Ana 700 Ml,700.0 ml
4,7790762052364,SANTA ANA,Vino Rosado Seleccion Especial Santa Ana 700 Ml,700.0 ml


## 6. Que cantidad de productos venden las diferentes marcas de supermercados?

In [26]:
precio_producto_sucursal['banderaDescripcion'].unique()

array(['COTO CICSA', 'Hipermercado Carrefour', 'Disco', 'Market',
       'Express', 'Jumbo', 'Vea', 'JOSIMAR SUPERMERCADOS',
       'Supermercados DIA', 'Walmart SuperCenter', 'Mi Changomas'],
      dtype=object)

In [44]:
def count_different_products(dataframe):
    
    supermarkets = dataframe['banderaDescripcion'].unique()
    
    time_frames = sorted(dataframe['medicion'].unique())
    
    d = {'banderaDescripcion': supermarkets}
    
    df = pd.DataFrame(data=d)
    
    for time in time_frames:
        
        array_amount_diff_products = []
        
        for market in supermarkets:
            
            array_amount_diff_products.append(len(precio_producto_sucursal[(precio_producto_sucursal['medicion']==time) & (precio_producto_sucursal['banderaDescripcion']==market)]['producto'].unique()))
        
        df[time] = array_amount_diff_products
        
    return df        

In [45]:
count_different_products(precio_producto_sucursal)

Unnamed: 0,banderaDescripcion,1,2,3,4,5,6,7,8,9,10
0,COTO CICSA,993,995,996,996,996,996,997,994,994,993
1,Hipermercado Carrefour,996,997,998,998,998,998,998,998,998,998
2,Disco,996,995,996,996,996,996,996,996,996,996
3,Market,997,996,997,997,997,997,997,997,997,997
4,Express,887,887,887,891,891,893,886,884,884,885
5,Jumbo,996,996,996,996,996,994,996,996,996,996
6,Vea,995,995,996,996,996,996,996,996,996,996
7,JOSIMAR SUPERMERCADOS,806,827,813,0,833,0,827,821,823,832
8,Supermercados DIA,760,760,760,764,763,770,768,810,810,805
9,Walmart SuperCenter,0,892,897,0,902,0,895,797,897,899


## 7. Que cantidad de sucursales hay por marca? 

In [173]:
def number_sucursales_by_mkt_brand(dataframe):
    
    supermarkets = dataframe['banderaDescripcion'].unique()
    
    time_frames = sorted(dataframe['medicion'].unique())
    
    d = {'banderaDescripcion': supermarkets}
    
    df = pd.DataFrame(data=d)
    
    for time in time_frames:
        
        d = {'sucursalId': ['nunique']}

        df_med = dataframe[dataframe['medicion']==time]

        res = df_med.groupby(['banderaDescripcion']).agg(d)

        res.columns = ['_'.join(col) for col in res.columns.values]

        res = res.reset_index()
        
        df = pd.merge(df, res, on='banderaDescripcion', how='left')
        
        df = df.rename(columns={'sucursalId_nunique': time})
        
    return df

In [174]:
number_sucursales_by_mkt_brand(precio_producto_sucursal)

Unnamed: 0,banderaDescripcion,1,2,3,4,5,6,7,8,9,10
0,COTO CICSA,65.0,57,65,65.0,65,65.0,65,65,65,65.0
1,Hipermercado Carrefour,9.0,9,9,9.0,9,9.0,9,9,9,9.0
2,Disco,26.0,26,26,26.0,26,13.0,26,26,26,26.0
3,Market,28.0,28,28,28.0,28,28.0,28,28,28,28.0
4,Express,9.0,9,9,9.0,9,9.0,9,9,9,9.0
5,Jumbo,10.0,10,10,10.0,10,5.0,10,10,10,10.0
6,Vea,12.0,12,12,12.0,12,9.0,12,12,12,12.0
7,JOSIMAR SUPERMERCADOS,1.0,1,1,,1,,1,1,1,1.0
8,Supermercados DIA,13.0,13,13,12.0,13,13.0,13,13,13,13.0
9,Walmart SuperCenter,,1,1,,1,,1,1,1,1.0


## 8. Hay sucursales dentro de una misma cadena que vendan menos/mas productos que otras?

In [None]:
# Tengo que contar la cantidad de productos por sucursal

d = {'precio': ['min', 'max']}
        
df_med = precio_producto_sucursal[precio_producto_sucursal['medicion']==1]

res = df_med.groupby(['banderaDescripcion', 'producto']).agg(d)

res.columns = ['_'.join(col) for col in res.columns.values]

res = res.reset_index()

## Vamos a tratar de inferir la existencia de estrategias competitivas de las diferentes marcas de supermercados:

  ###  - Adaptan los supermercados el precio de sus sucursales al lugar donde estan?
  ###  - Existe una cadena que este intentando diferenciarse de las demas a traves del precio?
  ###  - Existen cadenas que intenten diferenciarse de las demas mantentiendo el precio de ciertas categorias de productos mas baratos que el resto?

## 9. Hay diferencias de precios entre las diferentes sucursales de una misma marca?

In [64]:
def get_diff_prices_products_sucursales(dataframe):
    
    supermarkets = dataframe['banderaDescripcion'].unique()
    
    time_frames = sorted(dataframe['medicion'].unique())
    
    d = {'banderaDescripcion': supermarkets}
    
    df = pd.DataFrame(data=d)
    
    for time in time_frames:
        
        array_amount_prod_with_diff = []
        
        array_mean_diff = []
        
        d = {'precio': ['min', 'max']}
        
        df_med = dataframe[dataframe['medicion']==time]

        res = df_med.groupby(['banderaDescripcion', 'producto']).agg(d)

        res.columns = ['_'.join(col) for col in res.columns.values]

        res = res.reset_index()

        res['diff'] = res['precio_max'] - res['precio_min']

        res['diff_per'] = (res['precio_max'] - res['precio_min'])/res['precio_min']*100
        
        for market in supermarkets:
            
            array_amount_prod_with_diff.append(len(res[(res['banderaDescripcion']==market) & (res['diff']>0)]['diff'].values))
            
            array_mean_diff.append(np.mean(res[(res['banderaDescripcion']==market) & (res['diff']>0)]['diff'].values))
        
        col_amount = str(time) + '_amount_prod_diff'
        
        col_mean = str(time) + '_amount_mean_diff'
        
        df[col_amount] = array_amount_prod_with_diff
        
        df[col_mean] = array_mean_diff
        
    return df        

In [65]:
get_diff_prices_products_sucursales(precio_producto_sucursal)

Unnamed: 0,banderaDescripcion,1_amount_prod_diff,1_amount_mean_diff,2_amount_prod_diff,2_amount_mean_diff,3_amount_prod_diff,3_amount_mean_diff,4_amount_prod_diff,4_amount_mean_diff,5_amount_prod_diff,...,6_amount_prod_diff,6_amount_mean_diff,7_amount_prod_diff,7_amount_mean_diff,8_amount_prod_diff,8_amount_mean_diff,9_amount_prod_diff,9_amount_mean_diff,10_amount_prod_diff,10_amount_mean_diff
0,COTO CICSA,33,7.144242,168,8.563571,110,10.023909,53,14.805849,36,...,318,9.108082,116,11.38569,34,13.705588,91,10.905385,84,17.80131
1,Hipermercado Carrefour,767,9.355189,764,8.914869,771,9.139468,692,6.505116,737,...,800,6.5741,648,6.96412,732,4.857268,842,6.544537,759,6.049736
2,Disco,394,7.384645,432,6.528056,396,6.613157,457,5.971707,343,...,374,7.583342,369,6.749377,371,6.236981,377,6.245119,477,6.757254
3,Market,655,10.093634,656,8.775152,626,8.328642,455,5.908923,562,...,556,6.376583,509,6.491415,447,5.81217,522,8.030498,389,5.744242
4,Express,132,4.710379,13,9.388462,165,6.34303,138,8.843478,162,...,57,8.82807,144,10.703542,189,7.81963,170,10.016588,115,9.069652
5,Jumbo,464,8.688642,470,7.525702,418,8.380144,413,7.697433,426,...,364,8.044203,395,9.697367,424,9.083396,439,10.844897,515,7.564194
6,Vea,159,5.663208,160,5.305312,152,5.699474,206,5.226553,162,...,201,5.198756,113,5.176991,186,6.96629,201,7.255075,202,4.845693
7,JOSIMAR SUPERMERCADOS,0,,0,,0,,0,,0,...,0,,0,,0,,0,,0,
8,Supermercados DIA,50,6.0584,4,15.5,110,6.653273,21,8.65381,29,...,64,7.362812,5,9.39,5,15.76,38,5.711316,43,5.400465
9,Walmart SuperCenter,0,,0,,0,,0,,0,...,0,,0,,0,,0,,0,


## 9.2 Que sucursales son las que tienen los menores precios? 
(estan asociadas a algun barrio en especial?)

## 10. Que cadena vende la mayor cantidad de productos al menor precio?

(Ya que los precios de los mismos varian entre sucursales de una misma cadena, vamos a tomar el precio minimo que una cadena venda un producto)

In [91]:
def get_amount_products_with_min_price_by_mkt_brand(dataframe):
    
    supermarkets = dataframe['banderaDescripcion'].unique()
    
    time_frames = sorted(dataframe['medicion'].unique())
    
    d = {'banderaDescripcion': supermarkets}
    
    df = pd.DataFrame(data=d)
    
    for time in time_frames:
        
        array_amount_prod_with_min_price = []
        
        d_pr = {'precio': ['min']}

        df_med = dataframe[dataframe['medicion']==time]

        res = df_med.groupby(['banderaDescripcion', 'producto']).agg(d_pr)

        res.columns = ['_'.join(col) for col in res.columns.values]

        res = res.reset_index()
        
        res_min = res.loc[res.groupby(['producto'])['precio_min'].idxmin()]
        
        for market in supermarkets:
            
            array_amount_prod_with_min_price.append(len(res_min[res_min['banderaDescripcion']==market].index))
        
        df[time] = array_amount_prod_with_min_price
    
    return df
        

In [92]:
get_amount_products_with_min_price_by_mkt_brand(precio_producto_sucursal)

Unnamed: 0,banderaDescripcion,1,2,3,4,5,6,7,8,9,10
0,COTO CICSA,55,68,62,120,61,127,78,58,62,74
1,Hipermercado Carrefour,269,296,328,493,318,375,318,259,316,331
2,Disco,24,28,14,18,22,18,22,17,17,25
3,Market,152,75,114,64,98,79,91,73,72,99
4,Express,91,109,99,124,80,109,102,79,79,67
5,Jumbo,11,7,10,10,11,19,16,16,33,9
6,Vea,37,39,17,18,24,52,19,46,29,40
7,JOSIMAR SUPERMERCADOS,161,168,181,0,205,0,169,208,198,225
8,Supermercados DIA,137,114,103,153,113,221,94,139,115,91
9,Walmart SuperCenter,0,40,25,0,27,0,42,52,44,39


## 11. Que tipo de productos son los que cada cadena vende a menor precio?

Mostramos los productos que siempre estuvieron mas baratos en cada cadena (tambien podemos analizar diferentes periodos: al menos en 7 mediciones, 6, etc)

In [138]:
def get_products_with_min_price_by_mkt_brand(dataframe):
    
    supermarkets = dataframe['banderaDescripcion'].unique()
    
    time_frames = sorted(dataframe['medicion'].unique())
    
    d = {'banderaDescripcion': supermarkets}
    
    df = pd.DataFrame(data=d)
    
    for time in time_frames:
        
        array_amount_prod_with_min_price = []
        
        d_pr = {'precio': ['min']}

        df_med = dataframe[dataframe['medicion']==time]

        res = df_med.groupby(['banderaDescripcion', 'producto']).agg(d_pr)

        res.columns = ['_'.join(col) for col in res.columns.values]

        res = res.reset_index()
        
        res_min = res.loc[res.groupby(['producto'])['precio_min'].idxmin()]
        
        for market in supermarkets:
            
            array_amount_prod_with_min_price.append(res_min[res_min['banderaDescripcion']==market]['producto'].values.tolist())
        
        df[time] = array_amount_prod_with_min_price
    
    return df

In [100]:
get_products_with_min_price_by_mkt_brand(precio_producto_sucursal)

Unnamed: 0,banderaDescripcion,1,2,3,4,5,6,7,8,9,10
0,COTO CICSA,"[0000077940131, 12-1-2800000937881, 4005808352...","[0000077940131, 12-1-2800000937881, 4005808352...","[12-1-2800000937881, 4005808352630, 7622300398...","[0000077940131, 0000080052760, 12-1-2800000937...","[12-1-2800000937881, 4005808352630, 7613035379...","[0000080052760, 12-1-2800000937881, 4005808352...","[0000080052760, 12-1-2800000937881, 4005808352...","[12-1-2800000937881, 4005808352630, 7613034191...","[12-1-2800000937881, 4005808352630, 7613034191...","[12-1-2800000937881, 4005808352630, 7501059273..."
1,Hipermercado Carrefour,"[0000077903518, 0000078924468, 0000080052760, ...","[0000077903518, 0000078924468, 0000078924529, ...","[0000075032715, 0000077903518, 3014260273903, ...","[0000075027513, 0000077903518, 0000077940704, ...","[0000075027513, 0000077903518, 0041789001918, ...","[0000075027513, 0000077903518, 0000077940704, ...","[0000075032715, 0000077903518, 0000078924468, ...","[0000075027513, 0000078924468, 0000078924529, ...","[0000075027513, 0000078924468, 0000078924529, ...","[0000075027513, 0000078924468, 0000078924529, ..."
2,Disco,"[0038000846731, 7613035161580, 7613035379213, ...","[0038000846731, 7506339337532, 7622300840259, ...","[0038000846731, 7500435004664, 7622300840273, ...","[7500435112734, 7622300840259, 7790070410146, ...","[0000077940131, 7500435112741, 7622300840259, ...","[0000077940131, 0038000846731, 7790040994904, ...","[0000077940131, 0038000846731, 7622300833930, ...","[0038000846731, 7790040001022, 7790040102989, ...","[0038000846731, 7790040001022, 7790040102989, ...","[0038000846731, 7500435019248, 7622210649287, ..."
3,Market,"[0000077940704, 4005808979813, 7500435012034, ...","[0000075027513, 0000075032715, 7500435012041, ...","[0000075027513, 0000078924468, 0000078924529, ...","[0038000846731, 4005808979813, 7500435012034, ...","[0000080052760, 0038000846731, 4005808979813, ...","[4005808979813, 7500435012034, 7500435012041, ...","[0000075027513, 4005808315093, 7500435012034, ...","[0000080052760, 7500435106405, 7790040439108, ...","[0000075032715, 0000080052760, 7500435106405, ...","[0000075032715, 0000080052760, 0041789002922, ..."
4,Express,"[0000040084107, 0000075032715, 0041789001918, ...","[0000040084107, 0041789001918, 0070501051054, ...","[0000040084107, 0041789001918, 0070501051054, ...","[0000040084107, 0000078924468, 0000078924529, ...","[0000040084107, 0070501051054, 7500435019231, ...","[0000040084107, 0000077953063, 0000077953070, ...","[0000040084107, 7500435019231, 7501059273252, ...","[7500435019231, 7506309878997, 7613034449993, ...","[7500435112741, 7506309878997, 7613034413154, ...","[0000040084107, 7500435112741, 7613034416902, ..."
5,Jumbo,"[7500435019231, 7622210812797, 7622210813534, ...","[7500435112741, 7790040711105, 7790150006153, ...","[0000077940131, 7790010616775, 7790040711105, ...","[7500435112741, 7790520014184, 7790520014191, ...","[7500435004657, 7500435004664, 7622210812797, ...","[7500435012461, 7622210812797, 7790040931206, ...","[7500435012461, 7622210812797, 7790010596602, ...","[7500435012461, 7622210812797, 7622210813534, ...","[0000077940131, 3014260273903, 3014260273910, ...","[7613034191182, 7790230033031, 7790480008261, ..."
6,Vea,"[4005808980239, 7501065904522, 7506295302292, ...","[4005808980239, 7501065904522, 7506295302292, ...","[4005808980239, 7501065904522, 7506295302292, ...","[4005808980239, 7501065904522, 7506295302292, ...","[7501065904522, 7506295302292, 7622210813534, ...","[3014260273903, 3014260273910, 7500435019248, ...","[7622300742584, 7790040929807, 7790230033031, ...","[0000075032715, 3014260273903, 3014260273910, ...","[7500435023306, 7500435112734, 7500435112758, ...","[7613035379213, 7622300868512, 7622300868543, ..."
7,JOSIMAR SUPERMERCADOS,"[0099176316565, 4005808315093, 7500435004633, ...","[0000077940704, 0000077953063, 0000077953070, ...","[0000077940704, 0000077953063, 0000077953070, ...",[],"[0000077940704, 0000077953063, 0000077953070, ...",[],"[0000077940704, 0000077953063, 0000077953070, ...","[0000077940131, 0000077940704, 0000077953063, ...","[0000077940704, 0000077953063, 0000077953070, ...","[0000077940131, 0000077940704, 0000077953063, ..."
8,Supermercados DIA,"[0000077953063, 0000077953070, 0041789002922, ...","[0041789002922, 7500435004664, 7500435019224, ...","[0041789002922, 7613034416926, 7613034435408, ...","[0000075032715, 0041789002922, 7613034416902, ...","[0000075032715, 0041789002922, 7613034416902, ...","[0000075032715, 0000078924468, 0000078924529, ...","[7613034416902, 7613034435408, 7613034497062, ...","[0000040084107, 0000077903518, 4005808315093, ...","[0000040084107, 0000077903518, 7613034416926, ...","[0000077903518, 7500435019224, 7613034435408, ..."
9,Walmart SuperCenter,[],"[7501059273245, 7501059273252, 7501059273276, ...","[7622300864958, 7790040872400, 7790040931305, ...",[],"[7500435019248, 7500435112734, 7500435112765, ...",[],"[7500435112734, 7790010616751, 7790040711105, ...","[7500435019248, 7500435112765, 7613034428271, ...","[7790010596602, 7790040711105, 7790040872400, ...","[7500435004657, 7613034428271, 7622300829643, ..."


In [183]:
def get_unique_prod_with_min_price_by_mkt_brand(dataframe, mkt_brand, number_periods):
    
    array_products = []
    
    time_frames = sorted(precio_producto_sucursal['medicion'].unique())
    
    for time in time_frames:
        
        array_products = array_products + dataframe[dataframe['banderaDescripcion']==mkt_brand].loc[:,time].values[0]
        
    counter=collections.Counter(array_products)
    
    d = {'products': list(counter.keys()), 'freq': list(counter.values())}
    
    df = pd.DataFrame(data=d)
    
    df_with_out_duplicates = precio_producto_sucursal[['producto', 'marca', 'nombre']].drop_duplicates(subset=['producto', 'marca', 'nombre'])
    
    df = pd.merge(df, df_with_out_duplicates, left_on='products', right_on='producto', how='left' )
    
    return df[df['freq']>= number_periods]

In [162]:
min_price_by_mkt = get_products_with_min_price_by_mkt_brand(precio_producto_sucursal)

min_price_by_mkt.head(2)

Unnamed: 0,banderaDescripcion,1,2,3,4,5,6,7,8,9,10
0,COTO CICSA,"[0000077940131, 12-1-2800000937881, 4005808352...","[0000077940131, 12-1-2800000937881, 4005808352...","[12-1-2800000937881, 4005808352630, 7622300398...","[0000077940131, 0000080052760, 12-1-2800000937...","[12-1-2800000937881, 4005808352630, 7613035379...","[0000080052760, 12-1-2800000937881, 4005808352...","[0000080052760, 12-1-2800000937881, 4005808352...","[12-1-2800000937881, 4005808352630, 7613034191...","[12-1-2800000937881, 4005808352630, 7613034191...","[12-1-2800000937881, 4005808352630, 7501059273..."
1,Hipermercado Carrefour,"[0000077903518, 0000078924468, 0000080052760, ...","[0000077903518, 0000078924468, 0000078924529, ...","[0000075032715, 0000077903518, 3014260273903, ...","[0000075027513, 0000077903518, 0000077940704, ...","[0000075027513, 0000077903518, 0041789001918, ...","[0000075027513, 0000077903518, 0000077940704, ...","[0000075032715, 0000077903518, 0000078924468, ...","[0000075027513, 0000078924468, 0000078924529, ...","[0000075027513, 0000078924468, 0000078924529, ...","[0000075027513, 0000078924468, 0000078924529, ..."


In [184]:
get_unique_prod_with_min_price_by_mkt_brand(min_price_by_mkt, 'Hipermercado Carrefour', 8)

Unnamed: 0,products,freq,producto,marca,nombre
3,7500435004657,8,7500435004657,ARIEL,Jabon Liquido Ropa Agua Fria Ariel 3 Lt
10,7500435023306,8,7500435023306,ALWAYS,Toalla Femenina Plus Seca con Alas Always 16 Un
19,7622210649249,10,7622210649249,TERRABUSI,Galletitas Surtidas Chocolate Terrabusi Varied...
20,7622210649287,8,7622210649287,TERRABUSI,Galletitas Clasicas Surtidas Terrabusi Varieda...
23,7622300847265,9,7622300847265,EXPRESS,Galletitas Clasicas Express Pack 5 Un 540 Gr
24,7622300847340,10,7622300847340,EXPRESS,Galletitas Light Express 110 Gr
25,7622300864934,10,7622300864934,OREO,Galletitas Chocolate con Relleno de Vainilla O...
32,7622300869915,10,7622300869915,OREO,Galletitas Chocolate con Relleno de Chocolate ...
34,7702018652372,10,7702018652372,ORAL B,Cepillo Dental Oral B Classic 1 Un
35,7790010570541,9,7790010570541,JOHNSON'S,Shampoo para Bebe Cabellos Claros Mollusk John...
