In [14]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

**Generación de insights para ofertas relámpago:**

En conjunto con el desafío te compartimos un archivo llamado ofertas_relampago.csv el cual posee información de los resultados de ofertas del tipo relámpago para un periodo de tiempo y un país determinado.


Es decir, son ofertas que tienen una duración definida de algunas horas y un porcentaje de unidades (stock) comprometidas.

El objetivo de este desafío es hacer un EDA sobre estos datos buscando insights sobre este tipo de ofertas.

Las columnas del dataset son autoexplicativas pero puedes preguntarnos cualquier duda.

# 1. Importar los datos

In [15]:
ofertas_relampagos_df = pd.read_csv('../data/ofertas_relampago.csv')

In [16]:
ofertas_relampagos_df.OFFER_START_DATE = pd.to_datetime(ofertas_relampagos_df.OFFER_START_DATE)
ofertas_relampagos_df.OFFER_START_DTTM = pd.to_datetime(ofertas_relampagos_df.OFFER_START_DTTM)
ofertas_relampagos_df.OFFER_FINISH_DTTM = pd.to_datetime(ofertas_relampagos_df.OFFER_FINISH_DTTM)

In [17]:
ofertas_relampagos_df['SOLD_QUANTITY'] = ofertas_relampagos_df['SOLD_QUANTITY'].fillna(ofertas_relampagos_df.INVOLVED_STOCK-ofertas_relampagos_df.REMAINING_STOCK_AFTER_END)

In [18]:
ofertas_relampagos_df.sample(10)

Unnamed: 0,OFFER_START_DATE,OFFER_START_DTTM,OFFER_FINISH_DTTM,OFFER_TYPE,INVOLVED_STOCK,REMAINING_STOCK_AFTER_END,SOLD_AMOUNT,SOLD_QUANTITY,ORIGIN,SHIPPING_PAYMENT_TYPE,DOM_DOMAIN_AGG1,VERTICAL,DOMAIN_ID
25107,2021-06-20,2021-06-20 07:00:00+00:00,2021-06-20 13:00:00+00:00,lightning_deal,5,5,,0.0,,none,APPAREL ACCESORIES,APP & SPORTS,MLM-HATS_AND_CAPS
11915,2021-07-27,2021-07-27 13:00:00+00:00,2021-07-27 19:00:04+00:00,lightning_deal,5,4,3.62,1.0,,none,HOME&DECOR,HOME & INDUSTRY,MLM-CHRISTMAS_LIGHTS
37895,2021-07-22,2021-07-22 13:00:00+00:00,2021-07-22 19:00:00+00:00,lightning_deal,15,15,,0.0,,free_shipping,MOBILE,CE,MLM-TABLET_CASES
14694,2021-07-09,2021-07-09 07:00:00+00:00,2021-07-09 13:00:05+00:00,lightning_deal,5,4,1.92,1.0,,none,PERSONAL CARE,BEAUTY & HEALTH,MLM-NAIL_DRYERS
48656,2021-06-19,2021-06-19 07:00:00+00:00,2021-06-19 13:00:03+00:00,lightning_deal,5,5,,0.0,,free_shipping,TOOLS AND CONSTRUCTION,HOME & INDUSTRY,MLM-DRILL_BITS
11334,2021-07-27,2021-07-27 19:00:00+00:00,2021-07-28 01:00:00+00:00,lightning_deal,15,12,11.36,3.0,,none,PERSONAL CARE,BEAUTY & HEALTH,MLM-BODY_SKIN_CARE_PRODUCTS
41204,2021-06-25,2021-06-25 07:00:00+00:00,2021-06-25 13:00:35+00:00,lightning_deal,5,5,,0.0,,none,APPAREL,APP & SPORTS,MLM-SHIRTS
13900,2021-07-19,2021-07-19 19:00:00+00:00,2021-07-20 01:00:05+00:00,lightning_deal,15,6,69.79,9.0,,free_shipping,TOOLS AND CONSTRUCTION,HOME & INDUSTRY,MLM-ELECTRIC_DRILLS
21945,2021-07-28,2021-07-28 13:00:00+00:00,2021-07-28 19:00:00+00:00,lightning_deal,5,5,,0.0,,free_shipping,ELECTRONICS,CE,MLM-REFRIGERATORS
16368,2021-06-23,2021-06-23 07:00:00+00:00,2021-06-23 13:00:01+00:00,lightning_deal,5,2,16.41,3.0,,free_shipping,ELECTRONICS,CE,MLM-COFFEE_MAKERS


# 2. Creación de nuevas variables relevantes

In [19]:
ofertas_relampagos_df['DURATION_HRS']=((ofertas_relampagos_df.OFFER_FINISH_DTTM-ofertas_relampagos_df.OFFER_START_DTTM).dt.seconds/3600).astype(int)
ofertas_relampagos_df['DAY_OF_WEEK']=ofertas_relampagos_df.OFFER_START_DATE.dt.day_of_week.replace({0:'Lunes',1:'Martes',2:'Miércoles',3:'Jueves',4:'Viernes',5:'Sábado',6:'Domingo'})

In [20]:
ofertas_relampagos_df['PRICE_PER_UNIT']=np.round(ofertas_relampagos_df.SOLD_AMOUNT/ofertas_relampagos_df.SOLD_QUANTITY,2)
mean_price_per_unit = ofertas_relampagos_df.groupby('DOM_DOMAIN_AGG1')['PRICE_PER_UNIT'].transform('mean')
ofertas_relampagos_df['PRICE_PER_UNIT'] = ofertas_relampagos_df['PRICE_PER_UNIT'].fillna(mean_price_per_unit)
ofertas_relampagos_df['PRICE_PER_UNIT'] = ofertas_relampagos_df['PRICE_PER_UNIT'].fillna(ofertas_relampagos_df['PRICE_PER_UNIT'].median())
ofertas_relampagos_df['PRICE_PER_UNIT'] = np.round(ofertas_relampagos_df['PRICE_PER_UNIT'],2)

In [21]:
ofertas_relampagos_df.head()

Unnamed: 0,OFFER_START_DATE,OFFER_START_DTTM,OFFER_FINISH_DTTM,OFFER_TYPE,INVOLVED_STOCK,REMAINING_STOCK_AFTER_END,SOLD_AMOUNT,SOLD_QUANTITY,ORIGIN,SHIPPING_PAYMENT_TYPE,DOM_DOMAIN_AGG1,VERTICAL,DOMAIN_ID,DURATION_HRS,DAY_OF_WEEK,PRICE_PER_UNIT
0,2021-06-22,2021-06-22 16:00:00+00:00,2021-06-22 23:02:43+00:00,lightning_deal,4,-2,4.72,6.0,A,none,PETS FOOD,CPG,MLM-BIRD_FOODS,7,Martes,0.79
1,2021-06-22,2021-06-22 13:00:00+00:00,2021-06-22 19:00:02+00:00,lightning_deal,5,5,,0.0,,free_shipping,PET PRODUCTS,OTHERS,MLM-ANIMAL_AND_PET_PRODUCTS,6,Martes,7.1
2,2021-06-22,2021-06-22 07:00:00+00:00,2021-06-22 13:00:01+00:00,lightning_deal,15,12,10.73,3.0,,none,COMPUTERS,CE,MLM-SPEAKERS,6,Martes,3.58
3,2021-06-22,2021-06-22 19:00:00+00:00,2021-06-23 01:36:12+00:00,lightning_deal,15,13,7.03,2.0,,none,COMPUTERS,CE,MLM-HEADPHONES,6,Martes,3.52
4,2021-06-22,2021-06-22 13:00:00+00:00,2021-06-22 15:48:12+00:00,lightning_deal,15,0,39.65,15.0,,none,COMPUTERS,CE,MLM-HEADPHONES,2,Martes,2.64


# 3. EDA

Preguntas
- Histograma de Sold Quantity!!! Duración en Horas
- Por day of the week
- Value counts de VERTICAL!!
- ¿Cuál es la relación entre la duración en horas y la sold quantity? Y por VERTICAL? Por DOM_DOMAIN_AGG1?

Scattergraph supongo??

- ¿Cuál es la relación entre sold_quantity/sold_amount y shipping_payment_type?

## Histogramas

In [23]:
sold_quantity_sin_max_quartil =ofertas_relampagos_df.loc[(ofertas_relampagos_df.SOLD_QUANTITY<=np.quantile(ofertas_relampagos_df.SOLD_QUANTITY,0.75))&(ofertas_relampagos_df.SOLD_QUANTITY>0),'SOLD_QUANTITY']
sold_quantity_sin_max_quartil = sold_quantity_sin_max_quartil.rename('Sold Quantity')
fig = go.Figure(data=go.Histogram(x=sold_quantity_sin_max_quartil,marker_color='#2e3273',nbinsx=7))
fig.update_layout(title_text='Histograma de cantidades vendidas (min. 75%)', title_x=0.5,width=800,height=600,xaxis_title='Sold Quantity',yaxis_title='Frecuencia')

In [28]:
price_per_unit_sin_max_quartil =ofertas_relampagos_df.loc[(ofertas_relampagos_df.PRICE_PER_UNIT<=np.quantile(ofertas_relampagos_df.PRICE_PER_UNIT,0.75))&(ofertas_relampagos_df.SOLD_QUANTITY>0),'PRICE_PER_UNIT']
price_per_unit_sin_max_quartil = price_per_unit_sin_max_quartil.rename('Precio unitario')
fig = go.Figure(data=go.Histogram(x=price_per_unit_sin_max_quartil,marker_color='#2e3273',nbinsx=7))
fig.update_layout(title_text='Histograma de precio unitario de ofertas vendidas (min. 75%)', title_x=0.5,width=800,height=600,xaxis_title='Precio unitario',yaxis_title='Frecuencia')

#ofertas_relampagos_df.loc[ofertas_relampagos_df.PRICE_PER_UNIT<=np.quantile(ofertas_relampagos_df.PRICE_PER_UNIT,0.75),'PRICE_PER_UNIT'].plot(kind='hist')

In [32]:
duracion_hrs =ofertas_relampagos_df.loc[:,'DURATION_HRS']
duracion_hrs = duracion_hrs.rename('Duración')
fig = go.Figure(data=go.Histogram(x=duracion_hrs,marker_color='#2e3273',nbinsx=10))
fig.update_layout(title_text='Histograma de duración de ofertas', title_x=0.5,width=800,height=600,xaxis_title='Duración (hrs)',yaxis_title='Frecuencia')

In [48]:
dia_semana

Unnamed: 0_level_0,OFFER_TYPE,SOLD_QUANTITY,SOLD_AMOUNT
DAY_OF_WEEK,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Domingo,5834,3.52194,38.439695
Jueves,7286,5.858084,51.383731
Lunes,6307,7.182654,59.662545
Martes,7322,6.640945,58.953762
Miércoles,6718,6.637094,56.848631
Sábado,7506,3.921529,44.58166
Viernes,7773,4.954586,46.463682


In [49]:
dia_semana = ofertas_relampagos_df.groupby('DAY_OF_WEEK').agg({'OFFER_TYPE':'count','SOLD_QUANTITY':'mean','SOLD_AMOUNT':'mean'})
fig = go.Figure()
fig.add_trace(go.Bar(x=dia_semana.index,y=dia_semana['OFFER_TYPE'],marker_color='#2e3273'))
fig.add_trace(go.Line(x=dia_semana.index,y=dia_semana['SOLD_AMOUNT'],marker_color='##fbe74d'))
fig.update_layout(title_text='Número de ofertas por día de la semana', title_x=0.5,width=800,height=600,xaxis_title='Día',yaxis_title='Cantidad de ofertas')


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


