# Análisis del Comportamiento de Clientes en un Espacio de Trabajo y Estudio

In [1]:
import pandas as pd
from datetime import datetime

In [2]:
df_consumos = pd.read_csv('data/consumos.csv')
df_tickets = pd.read_csv('data/tickets.csv')

In [3]:
df_consumos

Unnamed: 0,ticket_id,producto,precio
0,1,Té verde,2.8
1,1,Capuchino,3.2
2,1,Croissant,2.2
3,2,Sándwich,4.5
4,2,Café con leche,3.0
...,...,...,...
5310,2677,Tarta de queso,3.5
5311,2678,Café con leche,3.0
5312,2678,Sándwich,4.5
5313,2679,Ensalada,5.0


In [4]:
df_tickets

Unnamed: 0,ticket_id,fecha,hora_llegada,hora_salida,duracion_min,tipo_visita,proposito,dispositivos_conectados,clima,total_usd
0,1,2025-04-15,13:36,15:18,103,individual,estudio,1,nublado,8.2
1,2,2025-04-15,08:09,10:04,115,individual,trabajo,1,nublado,7.5
2,3,2025-04-15,17:42,18:54,72,individual,estudio,2,nublado,10.3
3,4,2025-04-15,16:11,17:33,81,individual,estudio,2,nublado,2.8
4,5,2025-04-15,17:59,19:18,78,pareja,trabajo,2,nublado,6.3
...,...,...,...,...,...,...,...,...,...,...
2674,2675,2025-05-14,11:27,13:02,95,pareja,estudio,5,nublado,11.2
2675,2676,2025-05-14,11:39,12:32,53,individual,estudio,1,nublado,2.8
2676,2677,2025-05-14,10:37,12:28,111,grupo,estudio,2,nublado,3.5
2677,2678,2025-05-14,10:36,11:28,52,individual,estudio,2,nublado,7.5


## 1. Preprocesamiento de los datos

In [5]:
df_tickets.describe()

Unnamed: 0,ticket_id,duracion_min,dispositivos_conectados,total_usd
count,2679.0,2679.0,2679.0,2679.0
mean,1340.0,89.661814,2.080254,6.743412
std,773.50501,30.048949,1.21752,2.947882
min,1.0,15.0,1.0,2.2
25%,670.5,69.0,1.0,4.0
50%,1340.0,90.0,2.0,6.7
75%,2009.5,110.5,2.0,9.2
max,2679.0,192.0,5.0,13.5


In [6]:
df_tickets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2679 entries, 0 to 2678
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ticket_id                2679 non-null   int64  
 1   fecha                    2679 non-null   object 
 2   hora_llegada             2679 non-null   object 
 3   hora_salida              2679 non-null   object 
 4   duracion_min             2679 non-null   int64  
 5   tipo_visita              2679 non-null   object 
 6   proposito                2679 non-null   object 
 7   dispositivos_conectados  2679 non-null   int64  
 8   clima                    2679 non-null   object 
 9   total_usd                2679 non-null   float64
dtypes: float64(1), int64(3), object(6)
memory usage: 209.4+ KB


In [8]:
def datetim(data):
    return datetime.strptime(data, '%H:%M').time()

df_tickets['hora_salida'] = df_tickets['hora_salida'].apply(datetim)
df_tickets['hora_llegada'] = df_tickets['hora_llegada'].apply(datetim)

In [14]:
df_tickets['fecha'] = pd.to_datetime(df_tickets['fecha'], format='%Y-%m-%d')

In [19]:
df_tickets['month'] = df_tickets['fecha'].dt.month_name()

In [20]:
df_tickets

Unnamed: 0,ticket_id,fecha,hora_llegada,hora_salida,duracion_min,tipo_visita,proposito,dispositivos_conectados,clima,total_usd,month
0,1,2025-04-15,13:36:00,15:18:00,103,individual,estudio,1,nublado,8.2,April
1,2,2025-04-15,08:09:00,10:04:00,115,individual,trabajo,1,nublado,7.5,April
2,3,2025-04-15,17:42:00,18:54:00,72,individual,estudio,2,nublado,10.3,April
3,4,2025-04-15,16:11:00,17:33:00,81,individual,estudio,2,nublado,2.8,April
4,5,2025-04-15,17:59:00,19:18:00,78,pareja,trabajo,2,nublado,6.3,April
...,...,...,...,...,...,...,...,...,...,...,...
2674,2675,2025-05-14,11:27:00,13:02:00,95,pareja,estudio,5,nublado,11.2,May
2675,2676,2025-05-14,11:39:00,12:32:00,53,individual,estudio,1,nublado,2.8,May
2676,2677,2025-05-14,10:37:00,12:28:00,111,grupo,estudio,2,nublado,3.5,May
2677,2678,2025-05-14,10:36:00,11:28:00,52,individual,estudio,2,nublado,7.5,May


## Análisis Exploratorio de Datos (EDA)

### ¿En qué horarios hay mayor afluencia de personas trabajando o estudiando?

In [32]:
df_tickets['hour_arrive'] = df_tickets['hora_llegada'].apply(lambda x: x.hour)
df_tickets['hour_leave'] = df_tickets['hora_llegada'].apply(lambda x: x.hour)

In [34]:
df_tickets = df_tickets[['ticket_id', 'fecha', 'month','hora_llegada', 'hora_salida','hour_arrive', 'hour_leave', 'duracion_min', 'tipo_visita', 'proposito', 'dispositivos_conectados', 'clima', 'total_usd']]

In [35]:
df_tickets

Unnamed: 0,ticket_id,fecha,month,hora_llegada,hora_salida,hour_arrive,hour_leave,duracion_min,tipo_visita,proposito,dispositivos_conectados,clima,total_usd
0,1,2025-04-15,April,13:36:00,15:18:00,13,13,103,individual,estudio,1,nublado,8.2
1,2,2025-04-15,April,08:09:00,10:04:00,8,8,115,individual,trabajo,1,nublado,7.5
2,3,2025-04-15,April,17:42:00,18:54:00,17,17,72,individual,estudio,2,nublado,10.3
3,4,2025-04-15,April,16:11:00,17:33:00,16,16,81,individual,estudio,2,nublado,2.8
4,5,2025-04-15,April,17:59:00,19:18:00,17,17,78,pareja,trabajo,2,nublado,6.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2674,2675,2025-05-14,May,11:27:00,13:02:00,11,11,95,pareja,estudio,5,nublado,11.2
2675,2676,2025-05-14,May,11:39:00,12:32:00,11,11,53,individual,estudio,1,nublado,2.8
2676,2677,2025-05-14,May,10:37:00,12:28:00,10,10,111,grupo,estudio,2,nublado,3.5
2677,2678,2025-05-14,May,10:36:00,11:28:00,10,10,52,individual,estudio,2,nublado,7.5
