# Add Data

In [165]:
### General ###

import numpy as np
import pandas as pd
import calendar

#### Visualization Tools ###

# Base of sns
import matplotlib.pyplot as plt

# Born on top of matplotlib, but more attractive
import seaborn as sns

# For interactive visualization, not good for too many datapoints
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [166]:
df = pd.read_csv('./exported_data_2018-09-24_to_2023-07-14_V1.1.csv')

In [167]:
df = df.drop(columns='Unnamed: 0')

In [168]:
df['Mercado'].unique()

array(['Vega Modelo de Temuco', 'Femacal de La Calera',
       'Vega Central Mapocho de Santiago', 'Vega Monumental Concepción',
       'Central Lo Valledor de Santiago',
       'Feria Lagunitas de Puerto Montt',
       'Terminal La Palmera de La Serena', 'Macroferia Regional de Talca',
       'Terminal Hortofrutícola Agro Chillán',
       'Comercializadora del Agro de Limarí',
       'Agrícola del Norte S.A. de Arica',
       'Mapocho Venta Directa de Santiago',
       'Mercado Mayorista Lo Valledor de Santiago'], dtype=object)

In [169]:
market_name = 'Vega Modelo de Temuco'
lat, lon = -38.693614782579246, -72.52488766792806
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [170]:
market_name = 'Femacal de La Calera'
lat, lon = -32.785622816862606, -71.1884954316865
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [171]:
market_name = 'Vega Central Mapocho de Santiago'
lat, lon = -33.427611804927054, -70.64949941817034
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [172]:
market_name = 'Vega Monumental Concepción'
lat, lon = -36.807464037720756, -73.07157073479418
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [173]:
market_name = 'Central Lo Valledor de Santiago'
lat, lon = -33.481182817387094, -70.68250970411823
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [174]:
market_name = 'Feria Lagunitas de Puerto Montt'
lat, lon = -41.455866412678226, -73.00103064483459
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [175]:
market_name = 'Terminal La Palmera de La Serena'
lat, lon = -29.89028994286884, -71.25312099130934
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [176]:
market_name = 'Macroferia Regional de Talca'
lat, lon = -35.42081315536697, -71.63558080884442
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [177]:
market_name = 'Terminal Hortofrutícola Agro Chillán'
lat, lon = -36.57060167406145, -72.09974646092063
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [178]:
market_name = 'Comercializadora del Agro de Limarí'
lat, lon = -30.59589566101363, -71.19102668758502
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [179]:
market_name = 'Agrícola del Norte S.A. de Arica'
lat, lon = -18.499523817239574, -70.28420073026261
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [180]:
market_name = 'Mapocho Venta Directa de Santiago'
lat, lon = -33.4330979830953, -70.64946796447259
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [181]:
market_name = 'Mercado Mayorista Lo Valledor de Santiago'
lat, lon = -33.482984251005654, -70.68313373166113
df.loc[df['Mercado'] == market_name, 'Latitude'] = lat
df.loc[df['Mercado'] == market_name, 'Longitude'] = lon

In [182]:
df[['Latitude', 'Longitude']].isnull().sum()

Latitude     0
Longitude    0
dtype: int64

In [183]:
df[(df['Fecha'] >= '2023-01-23') & (df['Fecha'] <= '2023-01-28')]

Unnamed: 0,Variedad,Mercado,Unidad,Dia,Precio,Fecha,Volumen,Producto,Total,Latitude,Longitude
160402,Morada,Agrícola del Norte S.A. de Arica,Malla 18 kilos,Lunes,0.0,2023-01-23,0.0,Cebolla,0.0,-18.499524,-70.284201
160403,Morada,Mercado Mayorista Lo Valledor de Santiago,Malla 18 kilos,Lunes,9000.0,2023-01-23,2200.0,Cebolla,19800000.0,-33.482984,-70.683134
160404,Morada,Femacal de La Calera,Malla 18 kilos,Lunes,9250.0,2023-01-23,160.0,Cebolla,1480000.0,-32.785623,-71.188495
160405,Morada,Feria Lagunitas de Puerto Montt,Malla 18 kilos,Lunes,0.0,2023-01-23,0.0,Cebolla,0.0,-41.455866,-73.001031
160406,Morada,Terminal La Palmera de La Serena,Malla 18 kilos,Lunes,9500.0,2023-01-23,2000.0,Cebolla,19000000.0,-29.890290,-71.253121
...,...,...,...,...,...,...,...,...,...,...,...
161119,Sin especificar,Terminal Hortofrutícola Agro Chillán,Saco 20 kilos,Viernes,8250.0,2023-01-27,160.0,Zanahoria,1320000.0,-36.570602,-72.099746
161120,Sin especificar,Terminal La Palmera de La Serena,Saco 20 kilos,Viernes,5500.0,2023-01-27,600.0,Zanahoria,3300000.0,-29.890290,-71.253121
161121,Sin especificar,Vega Central Mapocho de Santiago,Saco 20 kilos,Viernes,18000.0,2023-01-27,770.0,Zanahoria,13860000.0,-33.427612,-70.649499
161122,Sin especificar,Vega Modelo de Temuco,Saco 20 kilos,Viernes,0.0,2023-01-27,0.0,Zanahoria,0.0,-38.693615,-72.524888


In [184]:
df['Unidad'] = df['Unidad'].str.upper()

In [185]:
df['Unidad'].unique(), df['Unidad'].unique().shape

(array(['MALLA 18 KILOS', 'MALLA 16 KILOS', 'CAJA 10 UNIDADES',
        'CAJA 15 UNIDADES', 'BIN (400 KILOS)', 'CAJA 16 KILOS EMPEDRADA',
        'CAJA 15 KILOS GRANEL', 'KILO (EN CAJA DE 15 KILOS)',
        'KILO (EN CAJA DE 17 KILOS)', 'SACO 25 KILOS', 'MALLA 25 KILOS',
        'BIN (450 KILOS)', 'CAJA 18 KILOS EMPEDRADA', 'BANDEJA 18 KILOS',
        'CAJA 12 KILOS', 'SACO 20 KILOS', 'UNIDAD', 'MALLA 15 KILOS',
        'PAQUETE 20 UNIDADES', 'PAQUETE 20 UNIDADES (VOLUMEN EN UNIDADES)',
        'BANDEJA 10 KILOS', '$/PAQUETE 20 UNIDADES (VOLUMEN EN UNIDADES)',
        'CAJA 16 KILOS', 'CAJA 20 KILOS', 'CAJA 15 KILOS',
        'CAJA 15 KILOS EMPEDRADA', 'MALLA 17 KILOS',
        'KILO (EN CAJA DE 18 KILOS)', 'BANDEJA 8 KILOS', 'CAJA 14 KILOS',
        'CAJA 18 KILOS', 'CAJA 10 KILOS', 'BANDEJA 9 KILOS',
        'BANDEJA 12 KILOS', 'MALLA 20 KILOS',
        'PAQUETE 10 UNIDADES (VOLUMEN EN UNIDADES)', 'MALLA 13 KILOS',
        'BANDEJA 15 KILOS GRANEL', 'BANDEJA 18 KILOS GRANEL',
      

In [186]:
unit = 'KILO'
df.loc[df['Unidad'].str.contains(unit),'Unidad'].unique(), df.loc[df['Unidad'].str.contains(unit),'Unidad'].unique().shape

(array(['MALLA 18 KILOS', 'MALLA 16 KILOS', 'BIN (400 KILOS)',
        'CAJA 16 KILOS EMPEDRADA', 'CAJA 15 KILOS GRANEL',
        'KILO (EN CAJA DE 15 KILOS)', 'KILO (EN CAJA DE 17 KILOS)',
        'SACO 25 KILOS', 'MALLA 25 KILOS', 'BIN (450 KILOS)',
        'CAJA 18 KILOS EMPEDRADA', 'BANDEJA 18 KILOS', 'CAJA 12 KILOS',
        'SACO 20 KILOS', 'MALLA 15 KILOS', 'BANDEJA 10 KILOS',
        'CAJA 16 KILOS', 'CAJA 20 KILOS', 'CAJA 15 KILOS',
        'CAJA 15 KILOS EMPEDRADA', 'MALLA 17 KILOS',
        'KILO (EN CAJA DE 18 KILOS)', 'BANDEJA 8 KILOS', 'CAJA 14 KILOS',
        'CAJA 18 KILOS', 'CAJA 10 KILOS', 'BANDEJA 9 KILOS',
        'BANDEJA 12 KILOS', 'MALLA 20 KILOS', 'MALLA 13 KILOS',
        'BANDEJA 15 KILOS GRANEL', 'BANDEJA 18 KILOS GRANEL',
        'CAJA 13 KILOS', 'KILO (EN CAJA DE 8 KILOS )',
        'CAJA 18 KILOS GRANEL'], dtype=object),
 (35,))

In [187]:
unit = 'UNIDAD'
df.loc[df['Unidad'].str.contains(unit),'Unidad'].unique(), df.loc[df['Unidad'].str.contains(unit),'Unidad'].unique().shape

(array(['CAJA 10 UNIDADES', 'CAJA 15 UNIDADES', 'UNIDAD',
        'PAQUETE 20 UNIDADES', 'PAQUETE 20 UNIDADES (VOLUMEN EN UNIDADES)',
        '$/PAQUETE 20 UNIDADES (VOLUMEN EN UNIDADES)',
        'PAQUETE 10 UNIDADES (VOLUMEN EN UNIDADES)'], dtype=object),
 (7,))

In [188]:
df.loc[df['Unidad'] == 'UNIDAD', 'Unidad'] = '1 UNIDADES'

In [189]:
df['Unidad'].str.extract(r'(\d+) (\w+)')[1].unique()

array(['KILOS', 'UNIDADES'], dtype=object)

In [190]:
df[['Numero Unidad', 'Nombre Unidad']] = df['Unidad'].str.extract(r'(\d+) (\w+)')

In [191]:
df

Unnamed: 0,Variedad,Mercado,Unidad,Dia,Precio,Fecha,Volumen,Producto,Total,Latitude,Longitude,Numero Unidad,Nombre Unidad
0,Morada,Vega Modelo de Temuco,MALLA 18 KILOS,Lunes,0.0000,2018-09-24,0.0,Cebolla,0.000000e+00,-38.693615,-72.524888,18,KILOS
1,Sin especificar,Femacal de La Calera,MALLA 18 KILOS,Lunes,6777.7759,2018-09-24,180.0,Cebolla,1.220000e+06,-32.785623,-71.188495,18,KILOS
2,Sin especificar,Vega Central Mapocho de Santiago,MALLA 18 KILOS,Lunes,0.0000,2018-09-24,0.0,Cebolla,0.000000e+00,-33.427612,-70.649499,18,KILOS
3,Sin especificar,Vega Monumental Concepción,MALLA 18 KILOS,Lunes,0.0000,2018-09-24,0.0,Cebolla,0.000000e+00,-36.807464,-73.071571,18,KILOS
4,Sin especificar,Central Lo Valledor de Santiago,MALLA 16 KILOS,Lunes,8830.4307,2018-09-24,2300.0,Cebolla,2.030999e+07,-33.481183,-70.682510,16,KILOS
...,...,...,...,...,...,...,...,...,...,...,...,...,...
181404,Sin especificar,Macroferia Regional de Talca,SACO 20 KILOS,Viernes,4500.0000,2023-07-14,600.0,Zanahoria,2.700000e+06,-35.420813,-71.635581,20,KILOS
181405,Sin especificar,Terminal Hortofrutícola Agro Chillán,SACO 20 KILOS,Viernes,0.0000,2023-07-14,0.0,Zanahoria,0.000000e+00,-36.570602,-72.099746,20,KILOS
181406,Sin especificar,Terminal La Palmera de La Serena,SACO 20 KILOS,Viernes,6250.0000,2023-07-14,440.0,Zanahoria,2.750000e+06,-29.890290,-71.253121,20,KILOS
181407,Sin especificar,Vega Central Mapocho de Santiago,SACO 20 KILOS,Viernes,6000.0000,2023-07-14,590.0,Zanahoria,3.540000e+06,-33.427612,-70.649499,20,KILOS


In [192]:
df

Unnamed: 0,Variedad,Mercado,Unidad,Dia,Precio,Fecha,Volumen,Producto,Total,Latitude,Longitude,Numero Unidad,Nombre Unidad
0,Morada,Vega Modelo de Temuco,MALLA 18 KILOS,Lunes,0.0000,2018-09-24,0.0,Cebolla,0.000000e+00,-38.693615,-72.524888,18,KILOS
1,Sin especificar,Femacal de La Calera,MALLA 18 KILOS,Lunes,6777.7759,2018-09-24,180.0,Cebolla,1.220000e+06,-32.785623,-71.188495,18,KILOS
2,Sin especificar,Vega Central Mapocho de Santiago,MALLA 18 KILOS,Lunes,0.0000,2018-09-24,0.0,Cebolla,0.000000e+00,-33.427612,-70.649499,18,KILOS
3,Sin especificar,Vega Monumental Concepción,MALLA 18 KILOS,Lunes,0.0000,2018-09-24,0.0,Cebolla,0.000000e+00,-36.807464,-73.071571,18,KILOS
4,Sin especificar,Central Lo Valledor de Santiago,MALLA 16 KILOS,Lunes,8830.4307,2018-09-24,2300.0,Cebolla,2.030999e+07,-33.481183,-70.682510,16,KILOS
...,...,...,...,...,...,...,...,...,...,...,...,...,...
181404,Sin especificar,Macroferia Regional de Talca,SACO 20 KILOS,Viernes,4500.0000,2023-07-14,600.0,Zanahoria,2.700000e+06,-35.420813,-71.635581,20,KILOS
181405,Sin especificar,Terminal Hortofrutícola Agro Chillán,SACO 20 KILOS,Viernes,0.0000,2023-07-14,0.0,Zanahoria,0.000000e+00,-36.570602,-72.099746,20,KILOS
181406,Sin especificar,Terminal La Palmera de La Serena,SACO 20 KILOS,Viernes,6250.0000,2023-07-14,440.0,Zanahoria,2.750000e+06,-29.890290,-71.253121,20,KILOS
181407,Sin especificar,Vega Central Mapocho de Santiago,SACO 20 KILOS,Viernes,6000.0000,2023-07-14,590.0,Zanahoria,3.540000e+06,-33.427612,-70.649499,20,KILOS


In [193]:
df.isnull().sum()

Variedad           0
Mercado            0
Unidad             0
Dia                0
Precio           571
Fecha              0
Volumen          348
Producto           0
Total            919
Latitude           0
Longitude          0
Numero Unidad      0
Nombre Unidad      0
dtype: int64

In [194]:
df.to_csv('./exported_data_2018-09-24_to_2023-07-14_V1.2.csv')