# Ventas de las tiendas vs distancia

### Código todo en uno ejecutable

In [1]:
import pandas as pd
import math

# Importamos las ventas por CP de nuestras tiendas (incluye nombre de la tienda, CP de la tienda, CP de la venta, cantidad vendida):
sales = pd.read_csv('03_Input_Sales_per_point.csv',sep=",",decimal=".")

# Importamos las coordenadas de todos los CP de España:
coord = pd.read_csv('02_Output_Codigos_Postales.csv',sep=",",decimal=".", dtype={'CP':'int', 'Market Size':'float','Latitude':'float','Longitude':'float'})

# Incluimos para cada CP de la tienda y CP de venta sus coordenadas:
sales2 = sales.merge(coord, left_on='CP CMP', right_on='CP', how='left')
sales2.rename(columns={'Latitude':'lata', 'Longitude':'lona'},inplace=True)
sales2.drop(['CP','Market Size'], inplace=True, axis=1)

df = sales2.merge(coord, left_on='CP Sales', right_on='CP', how='left')
df.rename(columns={'Latitude':'latb', 'Longitude':'lonb'},inplace=True)
df.drop('CP', inplace=True, axis=1)

# Calculamos la distancia entre el CP de la tienda y de la venta basándonos en las coordenadas:
df2 = pd.DataFrame(columns=['CMP', 'CP1', 'CP2', 'Dist', 'Roundist', 'Sales', 'Market Size'])
line=1

for cp,row in df.iterrows():
    cmp = row[1]
    cp1 = row[2]
    cp2 = row[3]
    sales = row[4]
    ms = row[7]
    lata = row[5]
    lona = row[6]
    latb = row[8]
    lonb = row[9]
    try:
        dist = math.acos(math.cos(math.radians(90-lata)) * math.cos(math.radians(90-latb)) + math.sin(math.radians(90-lata)) * math.sin(math.radians(90-latb)) * math.cos(math.radians(lona-lonb))) * 6371
    except:
        dist = 0
    rdist = round(dist)
    df2.loc[line] = (cmp, cp1, cp2, dist, rdist, sales, ms)
    line +=1 
    
# Agrupamos las ventas por distancia
Dist_sales = df2.groupby(['CMP','Roundist'],as_index=False).agg({'Sales':'sum', 'Market Size':'sum'}) 

#Añadimos las ventas acumuladas y el % que supone del total. Eliminamos aquellos CP demasiado alejados
Dist_sales['Pct'] = (Dist_sales['Sales']/Dist_sales['Sales'].sum())
Dist_sales['Acc_sales'] = Dist_sales['Sales'].cumsum()
Dist_sales['Acc_Pct'] = (Dist_sales['Acc_sales']/Dist_sales['Sales'].sum())
Dist_sales['Acc_MSize'] = Dist_sales['Market Size'].cumsum()
Dist_sales['MShare'] = Dist_sales['Sales']/Dist_sales['Market Size']
Dist_sales['Acc_MShare'] = Dist_sales['Acc_sales']/Dist_sales['Acc_MSize']

CMP_Dist_Sales = Dist_sales[Dist_sales.Roundist < 21]

# Exportamos el resultado a un CSV
CMP_Dist_Sales.to_csv('03_Output_CMP_Dist_sales.csv', sep=',',decimal=".", index=False)

### Ejercicio por partes

Juntamos las ventas por CP por cada Punto de venta y las coordenadas, para saber la venta por distancia y el tamaño del mercado

In [2]:
import pandas as pd
import math

# Importamos las ventas por CP de nuestras tiendas (incluye nombre de la tienda, CP de la tienda, CP de la venta, cantidad vendida):
sales = pd.read_csv('03_Input_Sales_per_point.csv',sep=",",decimal=".")

# Importamos las coordenadas de todos los CP de España:
coord = pd.read_csv('02_Output_Codigos_Postales.csv',sep=",",decimal=".", dtype={'CP':'int', 'Market Size':'float','Latitude':'float','Longitude':'float'})

# Incluimos para cada CP de la tienda y CP de venta sus coordenadas:
sales2 = sales.merge(coord, left_on='CP CMP', right_on='CP', how='left')
sales2.rename(columns={'Latitude':'lata', 'Longitude':'lona'},inplace=True)
sales2.drop(['CP','Market Size'], inplace=True, axis=1)

df = sales2.merge(coord, left_on='CP Sales', right_on='CP', how='left')
df.rename(columns={'Latitude':'latb', 'Longitude':'lonb'},inplace=True)
df.drop('CP', inplace=True, axis=1)

# Calculamos la distancia entre el CP de la tienda y de la venta basándonos en las coordenadas:
df2 = pd.DataFrame(columns=['CMP', 'CP1', 'CP2', 'Dist', 'Roundist', 'Sales', 'Market Size'])
line=1

for cp,row in df.iterrows():
    cmp = row[1]
    cp1 = row[2]
    cp2 = row[3]
    sales = row[4]
    ms = row[7]
    lata = row[5]
    lona = row[6]
    latb = row[8]
    lonb = row[9]
    try:
        dist = math.acos(math.cos(math.radians(90-lata)) * math.cos(math.radians(90-latb)) + math.sin(math.radians(90-lata)) * math.sin(math.radians(90-latb)) * math.cos(math.radians(lona-lonb))) * 6371
    except:
        dist = 0
    rdist = round(dist)
    df2.loc[line] = (cmp, cp1, cp2, dist, rdist, sales, ms)
    line +=1 
df2.head()

Unnamed: 0,CMP,CP1,CP2,Dist,Roundist,Sales,Market Size
1,Centro 1,3015,2001,137.564463,138,432.1,4700638.0
2,Centro 1,3015,2005,138.722344,139,134.4,11214890.0
3,Centro 1,3015,2006,149.034177,149,1035.6,8159006.0
4,Centro 1,3015,2008,134.155501,134,50.8,4868567.0
5,Centro 1,3015,2480,160.273548,160,3127.4,293611.4


Comprobaciones. Distancia a la que vendemos >90%, y el MSize caiga por debajo del 5%

In [3]:
# Agrupamos las ventas por distancia
Dist_sales = df2.groupby(['Roundist'],as_index=False).agg({'Sales':'sum', 'Market Size':'sum'}) 

#Añadimos las ventas acumuladas y el % que supone del total
Dist_sales['Pct'] = (Dist_sales['Sales']/Dist_sales['Sales'].sum())
Dist_sales['Acc_sales'] = Dist_sales['Sales'].cumsum()
Dist_sales['Acc_Pct'] = (Dist_sales['Acc_sales']/Dist_sales['Sales'].sum())
Dist_sales['Acc_MSize'] = Dist_sales['Market Size'].cumsum()
Dist_sales['MShare'] = Dist_sales['Sales']/Dist_sales['Market Size']
Dist_sales['Acc_MShare'] = Dist_sales['Acc_sales']/Dist_sales['Acc_MSize']

Dist_sales.head(40)

Unnamed: 0,Roundist,Sales,Market Size,Pct,Acc_sales,Acc_Pct,Acc_MSize,MShare,Acc_MShare
0,0,17135404.26,158086500.0,0.106755,17135400.0,0.106755,158086500.0,0.108393,0.108393
1,1,9494651.25,109507700.0,0.059152,26630060.0,0.165907,267594200.0,0.086703,0.099517
2,2,18199062.26,209388700.0,0.113381,44829120.0,0.279289,476982900.0,0.086915,0.093985
3,3,22834422.12,267624500.0,0.14226,67663540.0,0.421549,744607400.0,0.085323,0.090871
4,4,14767637.92,241401500.0,0.092003,82431180.0,0.513552,986008900.0,0.061175,0.083601
5,5,14030766.85,204349100.0,0.087413,96461940.0,0.600965,1190358000.0,0.068661,0.081036
6,6,8454402.1,163571100.0,0.052672,104916300.0,0.653637,1353929000.0,0.051686,0.07749
7,7,7807894.59,152530700.0,0.048644,112724200.0,0.70228,1506460000.0,0.051189,0.074827
8,8,4718302.74,87726800.0,0.029395,117442500.0,0.731676,1594187000.0,0.053784,0.073669
9,9,5278777.14,118236800.0,0.032887,122721300.0,0.764563,1712423000.0,0.044646,0.071665


Exportamos para el siguiente paso

In [4]:
# Agrupamos las ventas por distancia
Dist_sales = df2.groupby(['CMP','Roundist'],as_index=False).agg({'Sales':'sum', 'Market Size':'sum'}) 

#Añadimos las ventas acumuladas y el % que supone del total
Dist_sales['Pct'] = (Dist_sales['Sales']/Dist_sales['Sales'].sum())
Dist_sales['Acc_sales'] = Dist_sales['Sales'].cumsum()
Dist_sales['Acc_Pct'] = (Dist_sales['Acc_sales']/Dist_sales['Sales'].sum())
Dist_sales['Acc_MSize'] = Dist_sales['Market Size'].cumsum()
Dist_sales['MShare'] = Dist_sales['Sales']/Dist_sales['Market Size']
Dist_sales['Acc_MShare'] = Dist_sales['Acc_sales']/Dist_sales['Acc_MSize']

CMP_Dist_Sales = Dist_sales[Dist_sales.Roundist < 21]
CMP_Dist_Sales.head(30)

Unnamed: 0,CMP,Roundist,Sales,Market Size,Pct,Acc_sales,Acc_Pct,Acc_MSize,MShare,Acc_MShare
0,Centro 1,0,846265.32,6516160.0,0.005272,846265.32,0.005272,6516160.0,0.129872,0.129872
1,Centro 1,2,2118257.94,26909640.0,0.013197,2964523.26,0.018469,33425800.0,0.078717,0.08869
2,Centro 1,3,1560823.92,17578710.0,0.009724,4525347.18,0.028193,51004510.0,0.088791,0.088724
3,Centro 1,4,1503756.57,15605250.0,0.009369,6029103.75,0.037562,66609760.0,0.096362,0.090514
4,Centro 1,5,2102056.15,28157080.0,0.013096,8131159.9,0.050658,94766840.0,0.074655,0.085802
5,Centro 1,6,464069.79,20668250.0,0.002891,8595229.69,0.053549,115435100.0,0.022453,0.074459
6,Centro 1,7,441519.6,6914736.0,0.002751,9036749.29,0.0563,122349800.0,0.063852,0.07386
7,Centro 1,9,339027.62,7276212.0,0.002112,9375776.91,0.058412,129626000.0,0.046594,0.072329
8,Centro 1,10,81068.15,1378147.0,0.000505,9456845.06,0.058917,131004200.0,0.058824,0.072187
9,Centro 1,14,586386.03,13840820.0,0.003653,10043231.09,0.06257,144845000.0,0.042366,0.069338


In [5]:
# Exportamos el resultado a un CSV
CMP_Dist_Sales.to_csv('03_Output_CMP_Dist_sales.csv', sep=',',decimal=".", index=False)