In [1]:
cd '/userdata/pmavrodi/Projekte/Stao/src'

/userdata/pmavrodi/Projekte/Stao/src


In [2]:


import logging
import pandas as pd
import numpy as np
import configparser
import os

from simple_logging.custom_logging import setup_custom_logger
from input_reader.input_reader import get_input

settingsFile = "settings.cfg"

# logger einrichten
LOGGING_LEVEL = logging.INFO
logger = setup_custom_logger('GM_LOGGER', LOGGING_LEVEL, flog="logs/gm.log")

In [3]:
    config = configparser.ConfigParser()
    config.read(settingsFile)
    use_cache = config.getboolean('global', 'cache_enabled')
    cache_dir = config['cache_config']['cache_dir']
    single_store = config['global']['single_store']

    drivetimes_pd = pd.read_pickle(os.path.join(cache_dir,
                                                        config['cache_config']['drivetimes_cached']))

In [None]:
(stores_pd, stores_migros_pd, drivetimes_pd, haushalt_pd, referenz_pd) = get_input(settingsFile, logger)

In [21]:
stores_pd.loc[stores_pd['FORMAT']=='M', 'type'] = 'M'

In [23]:
stores_pd.loc[stores_pd['FORMAT']=='M'].head()

Unnamed: 0_level_0,ID,RETAILER,FORMAT,STRASSE,PLZ,ORT,VERKAUFSFLAECHE_TOTAL,VERKAUFSFLAECHE,Y,X,RELEVANZ,PROFIT_KST,UMSATZ_FOOD,UMSATZ_FRISCHE,UMSATZ_NEAR_NONFOOD,UMSATZ_FACHMARKT,type
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
6,SM_MIG_61607_15939,Migros,M,Kanderstegstrasse 31,3714,Frutigen,878.621,519.42,616073.9153,159391.549,1.0,33370.0,2714265.89,5007897.01,1647566.21,149192.23,M
9,SM_MIG_55810_14347,Migros,M,Rue du Léman 9,1815,Clarens,1026.81,622.98,558105.4795,143471.3736,1.0,63991.0,4432271.32,10699936.16,2424134.69,233238.81,M
11,SM_MIG_49525_11909,Migros,M,Route de Peney 2-4,1214,Vernier,1257.324,649.41,495252.6938,119091.9983,1.0,44860.0,3537224.77,7729667.8,1861354.71,233174.78,M
19,SM_MIG_66578_20929,Migros,M,Zihlmattweg 46,6005,Luzern,590.024,426.21,665786.1141,209297.4367,1.0,73840.0,1239974.15,2798169.67,490289.53,36419.43,M
20,SM_MIG_60500_19797,Migros,M,Turbenweg 1,3073,Gümligen,1300.694,755.95,605000.3189,197971.7516,1.0,33091.0,3582808.48,8414154.1,2408304.45,245573.31,M


In [6]:
drivetimes_pd=drivetimes_pd.reset_index().set_index(keys='filiale_id')

In [7]:
 relevant_hektars = set(drivetimes_pd.loc[stores_migros_pd.ID]['hektar_id'])

In [8]:
drivetimes_rel_hektars_pd = drivetimes_pd[drivetimes_pd['hektar_id'].isin(relevant_hektars)]

In [9]:
drivetimes_rel_hektars_stores_pd = drivetimes_rel_hektars_pd.merge(
    stores_pd[['ID', 'FORMAT', 'VERKAUFSFLAECHE', 'VERKAUFSFLAECHE_TOTAL', 'RELEVANZ', 'type']],
    left_index=True, right_on='ID', how='inner')

In [10]:
enriched_pd = drivetimes_rel_hektars_stores_pd.merge(haushalt_pd[['H14PTOT']], left_on='hektar_id', right_index=True, how='left')

In [11]:
enriched_pd['H14PTOT_corrected'] = enriched_pd['H14PTOT'].fillna(1)

In [12]:
enriched_pd['LAT'] = np.where(enriched_pd.VERKAUFSFLAECHE_TOTAL < 1000,
                                      enriched_pd.RELEVANZ * enriched_pd.VERKAUFSFLAECHE_TOTAL * 0.06,
                                      np.where((enriched_pd.VERKAUFSFLAECHE_TOTAL >= 1000) & (
                                                enriched_pd.VERKAUFSFLAECHE_TOTAL < 2500),
                            enriched_pd.RELEVANZ*(20 * (enriched_pd.VERKAUFSFLAECHE_TOTAL - 1000) / 1500 + 60),
                            enriched_pd.RELEVANZ*(20 * (enriched_pd.VERKAUFSFLAECHE_TOTAL - 2500) / 3500 + 80)))

enriched_pd['RLAT'] = enriched_pd['LAT'] * np.power(10, -(np.fmin(enriched_pd['LAT']/60.0, 1.0)
                                                             *(0.04-0.1)+0.1)*enriched_pd['fahrzeit'])

In [13]:
enriched_pd = enriched_pd.reset_index().set_index(keys=['hektar_id', 'type', 'OBJECTID'])

In [14]:
enriched_pd=enriched_pd[~enriched_pd.index.duplicated(keep='first')]

In [15]:
enriched_pd = enriched_pd.reset_index().set_index(keys=['hektar_id', 'type'])#.sort_index(level=[0,1])

In [104]:
len(enriched_pd.loc[enriched_pd['ID']=='SM_MIG_49995_11842'])

12939

In [105]:
# groups = enriched_pd.head(100).groupby(level=[0,1])
groups = enriched_pd.groupby(level=[0])

In [106]:
from multiprocessing import Pool, cpu_count
chunk_size = int(config["parallel"]["chunk_size"])

def applyParallel(dfGrouped, func):
    with Pool(1) as p:
        ret_list = p.map(func, [group for name, group in dfGrouped], chunksize = chunk_size)
    return pd.concat(ret_list)

In [107]:
def prune(d):
    
    def prune_fz(dd):
        if len(dd) == 1:
            return dd
        else:
            idx_to_return = dd['RLAT'] == np.max(dd['RLAT'])
            return dd.loc[idx_to_return]
    
    d_sorted = d.sort_values(by='fahrzeit', ascending=True)
    d_pruned = d_sorted.groupby(['fahrzeit']).apply(prune_fz)

    tmp = [x for x in d_pruned.index.names]
    if (tmp[0] == 'fahrzeit'):
        tmp[0] = 'a'
    d_pruned.index.names = tmp
    d_pruned = d_pruned.reset_index().set_index(keys=['hektar_id', 'type'])
    
    for column_to_delete in ['a', 'level_1', 'index']:
        if column_to_delete in d_pruned:
            del d_pruned[column_to_delete]

        
    while True:
        diffs = np.diff(d_pruned['RLAT'])
        return_idx = (diffs > 0)
        return_idx = np.insert(return_idx, 0, True) # always take the first entry
        d_pruned = d_pruned.loc[return_idx]
        if len(d_pruned) == 1 or all(return_idx):
            break
    
    return d_pruned


In [108]:
def prune2(d):
    ret = d.reset_index().groupby('type').apply(prune)
    ret.index.names = ['a', 'hektar_id', 'type']
    ret = ret.reset_index()
    del ret['a']
    return ret

In [109]:
y = applyParallel(groups, prune2)

Process ForkPoolWorker-1:
Traceback (most recent call last):
  File "/opt/r/anaconda/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/opt/r/anaconda/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/r/anaconda/lib/python3.5/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/opt/r/anaconda/lib/python3.5/multiprocessing/pool.py", line 44, in mapstar
    return list(map(*args))
  File "<ipython-input-108-e1defb7ceab0>", line 2, in prune2
    ret = d.reset_index().groupby('type').apply(prune)
  File "/opt/r/anaconda/lib/python3.5/site-packages/pandas/core/groupby.py", line 651, in apply
    return self._python_apply_general(f)
  File "/opt/r/anaconda/lib/python3.5/site-packages/pandas/core/groupby.py", line 655, in _python_apply_general
    self.axis)
  File "/opt/r/anaconda/lib/python3.5/site-packages/pandas/core/groupby.py", line 1511,

KeyboardInterrupt: 

In [154]:
groups.count()

Unnamed: 0_level_0,OBJECTID,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT
hektar_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
66322435,631,631,631,619,631,631,631,631,631,631,631
66322436,635,635,635,623,635,635,635,635,635,635,635
66322437,636,636,636,624,636,636,636,636,636,636,636
66322438,640,640,640,628,640,640,640,640,640,640,640
66322439,647,647,647,634,647,647,647,647,647,647,647
66322440,653,653,653,640,653,653,653,653,653,653,653
66322441,654,654,654,641,654,654,654,654,654,654,654
66322442,657,657,657,644,657,657,657,657,657,657,657
66322443,659,659,659,646,659,659,659,659,659,659,659
66322444,660,660,660,647,660,660,660,660,660,660,660


In [143]:
stores_migros_pd

Unnamed: 0_level_0,ID,RETAILER,FORMAT,STRASSE,PLZ,ORT,VERKAUFSFLAECHE_TOTAL,VERKAUFSFLAECHE,Y,X,RELEVANZ,PROFIT_KST,UMSATZ_FOOD,UMSATZ_FRISCHE,UMSATZ_NEAR_NONFOOD,UMSATZ_FACHMARKT,type
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
834,SM_MIG_68080_24815,Migros,M,Badenerstrasse 376,8004,Zürich,1064.582,516.58,680806.9627,248154.6014,1.0,150327.0,3756359.05,7359424.2,2156160.77,226835.98,MIG


In [151]:
stores_migros_pd

Unnamed: 0_level_0,ID,RETAILER,FORMAT,STRASSE,PLZ,ORT,VERKAUFSFLAECHE_TOTAL,VERKAUFSFLAECHE,Y,X,RELEVANZ,PROFIT_KST,UMSATZ_FOOD,UMSATZ_FRISCHE,UMSATZ_NEAR_NONFOOD,UMSATZ_FACHMARKT,type
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
834,SM_MIG_68080_24815,Migros,M,Badenerstrasse 376,8004,Zürich,1064.582,516.58,680806.9627,248154.6014,1.0,150327.0,3756359.05,7359424.2,2156160.77,226835.98,MIG


In [91]:
enriched_pruned_pd = enriched_pd
def calc_MA(x):
    x['Marktanteil'] = x['RLAT'] / np.nansum(x['RLAT'])
    return x

enriched_pruned_pd = enriched_pruned_pd.reset_index().groupby(by='hektar_id').apply(calc_MA)

In [92]:
enriched_pruned_pd['LokalUP'] = enriched_pruned_pd['Marktanteil'] * enriched_pruned_pd['H14PTOT'] * 7800
enriched_pruned_pd['LokalUP_corrected'] = enriched_pruned_pd['Marktanteil'] * enriched_pruned_pd['H14PTOT_corrected'] * 7800
enriched_pruned_pd['Missing_HH_Hektare'] = np.isnan(enriched_pruned_pd['H14PTOT'])

In [93]:
migros_only_pd = enriched_pruned_pd[enriched_pruned_pd['OBJECTID'].isin(stores_migros_pd.index.values)]

In [None]:
migros_only_pd.head()

Unnamed: 0,hektar_id,type,OBJECTID,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT,Marktanteil,LokalUP,LokalUP_corrected,Missing_HH_Hektare
53648,49971200,MIG,118,5,SM_MIG_49995_11842,M,270.37,412.292,1.0,,1.0,24.73752,10.40025,0.004966,,38.736407,True
53649,49971201,MIG,118,5,SM_MIG_49995_11842,M,270.37,412.292,1.0,,1.0,24.73752,10.40025,0.005069,,39.536731,True
53650,49971204,MIG,118,5,SM_MIG_49995_11842,M,270.37,412.292,1.0,,1.0,24.73752,10.40025,0.005286,,41.228748,True
53651,50921477,MIG,118,30,SM_MIG_49995_11842,M,270.37,412.292,1.0,1.0,1.0,24.73752,0.13661,0.000309,2.40918,2.40918,False
53652,49971206,MIG,118,5,SM_MIG_49995_11842,M,270.37,412.292,1.0,2.0,2.0,24.73752,10.40025,0.005425,84.636986,84.636986,False


In [95]:
umsatz_potential_pd = migros_only_pd.groupby('OBJECTID').agg({'LokalUP': lambda x: np.nansum(x),
                                                                 'LokalUP_corrected': lambda x: np.nansum(x),
                                                              'Missing_HH_Hektare': lambda x: (np.sum(x), len(x))
                                                                })

In [96]:
umsatz_potential_pd

Unnamed: 0_level_0,Missing_HH_Hektare,LokalUP,LokalUP_corrected
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
118,"(1677, 12939)",6205401.0,6235555.0


In [28]:
len(dflist[1:4])

3

In [19]:
# run ipcluster start -n 120 before this
from ipyparallel import Client
rc = Client()
lview = rc.load_balanced_view()
lview.block = True

In [27]:
parallel_list = lview.map(prune, dflist[1:4])

In [123]:
y

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,OBJECTID,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT
fahrzeit,hektar_id,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
4,64542616,ALD,2887,4,SM_ALD_64465_26140,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,1.0
5,64542616,ALD,2828,5,SM_ALD_64677_24927,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,5.562131
6,64542616,ALD,2881,6,SM_ALD_66431_25458,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,6.0


In [121]:
x.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,OBJECTID,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT
hektar_id,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
64542616,ALD,2887,4,SM_ALD_64465_26140,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,1.0
64542616,ALD,2828,5,SM_ALD_64677_24927,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,5.562131
64542616,ALD,2881,6,SM_ALD_66431_25458,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,6.0
64542616,ALD,2853,19,SM_ALD_64852_24872,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,5.5
64542616,ALD,2885,21,SM_ALD_62132_26115,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,5.7


In [64]:
x['ismax'] = x['RLAT'] == np.max(x['RLAT'])

In [66]:
x[x['ismax']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,RLAT,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,ismax
hektar_id,type,OBJECTID,fahrzeit,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
64542616,ALD,2828,18,5.562131,SM_ALD_64677_24927,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,True
64542616,ALD,2881,18,5.562131,SM_ALD_66431_25458,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,True


In [46]:
def pr(x):
    x['diff1'] = x['RLAT'] - x['RLAT'].iloc[0]
    return x

enriched_pruned_pd = enriched_pd.groupby(by=['hektar_id', 'type']).apply(pr)

# sorted['diff1'] = sorted['RLAT'] - sorted['RLAT'].iloc[0]
# sorted['diff2'] = sorted['RLAT'].iloc[1:len(sorted)] - sorted['RLAT'].shift(1)
# sorted = sorted[(sorted['diff1'] >= 0) & ( (sorted['diff2'] >=0 ) | np.isnan(sorted['diff2']))] 

In [28]:
a=enriched_pd.head()
a
# a.loc[a['fahrzeit']==14,'RLAT']=32
# a.loc[a['fahrzeit']==19,'RLAT']=34

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT
hektar_id,type,OBJECTID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
66322435,MIG,8,10,SM_MIG_66835_23607,MM,1001.19,1787.864,1.0,10,10,70.504853,28.068488
66322435,MIG,18,19,SM_MIG_64852_24674,SPEZ,0.0,330.895,1.0,10,10,19.8537,0.595747
66322435,MIG,26,28,SM_MIG_67548_25665,VOI,220.0,220.0,1.0,10,10,13.2,0.048998
66322435,MIG,39,14,SM_MIG_66250_25169,M,698.26,1098.812,1.0,10,10,61.317493,16.88824
66322435,MIG,47,26,SM_MIG_67962_24716,VOI,220.0,220.0,1.0,10,10,13.2,0.073076


In [29]:
def prune(x):
    sorted = x.sort_values(axis='index', by='fahrzeit')
    # sorted['diff1'] = sorted['RLAT'] - sorted['RLAT'].iloc[0]
    # sorted['diff2'] = sorted['RLAT'].iloc[1:len(sorted)] - sorted['RLAT'].shift(1)
    # sorted = sorted[(sorted['diff1'] >= 0) & ( (sorted['diff2'] >=0 ) | np.isnan(sorted['diff2']))] 
    return sorted
    
grouped=a.groupby(level=[0,1]).apply(prune)


In [132]:
enriched_pruned_pd.head()

Unnamed: 0,hektar_id,type,OBJECTID,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT,Marktanteil,LokalUP,LokalUP_corrected
0,64542616,ALD,2887,4,SM_ALD_64465_26140,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,29.733172,0.159142,3723.919404,3723.919404
1,64542616,COO,3546,4,SM_COO_64373_26219,Coop Supermarkt,1000.0,2600.0,1.0,3,3,80.571429,55.74181,0.298349,6981.361066,6981.361066
2,64542616,DEN,1109,3,SM_DEN_64420_26194,Denner Discount,374.0,374.0,1.0,3,3,22.44,13.132362,0.070289,1644.757549,1644.757549
3,64542616,LAN,2309,5,SM_LAN_64135_26492,Landi,921.0,921.0,0.05,3,3,2.763,0.901978,0.004828,112.967865,112.967865
4,64542616,LID,4226,5,SM_LID_64325_26299,Lidl Supermarkt,172.0,1000.0,0.8,3,3,48.0,26.377962,0.141184,3303.697488,3303.697488


In [130]:
    def calc_MA(x):
        x['Marktanteil'] = x['RLAT'] / np.nansum(x['RLAT'])
        return x


In [131]:
enriched_pruned_pd.groupby(by='hektar_id').apply(calc_MA)

Unnamed: 0,hektar_id,type,OBJECTID,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT,Marktanteil,LokalUP,LokalUP_corrected
0,64542616,ALD,2887,4,SM_ALD_64465_26140,Aldi Supermarkt,115.00,1000.000,0.80,3,3,48.000000,29.733172,0.159142,3723.919404,3723.919404
1,64542616,COO,3546,4,SM_COO_64373_26219,Coop Supermarkt,1000.00,2600.000,1.00,3,3,80.571429,55.741810,0.298349,6981.361066,6981.361066
2,64542616,DEN,1109,3,SM_DEN_64420_26194,Denner Discount,374.00,374.000,1.00,3,3,22.440000,13.132362,0.070289,1644.757549,1644.757549
3,64542616,LAN,2309,5,SM_LAN_64135_26492,Landi,921.00,921.000,0.05,3,3,2.763000,0.901978,0.004828,112.967865,112.967865
4,64542616,LID,4226,5,SM_LID_64325_26299,Lidl Supermarkt,172.00,1000.000,0.80,3,3,48.000000,26.377962,0.141184,3303.697488,3303.697488
5,64542616,MAN,4268,19,SM_MAN_64587_24908,Manor,172.00,1000.000,0.05,3,3,3.000000,0.043065,0.000230,5.393619,5.393619
6,64542616,MIG,976,4,SM_MIG_64371_26208,M,1169.63,1999.995,1.00,3,3,73.333267,50.734225,0.271547,6354.188102,6354.188102
7,64542616,OTT,5140,3,SM_OTT_64420_26194,,650.00,650.000,0.20,3,3,7.800000,4.125671,0.022082,516.718057,516.718057
8,64542616,PAM,5280,28,SM_PAM_67983_25067,,172.00,172.000,1.00,3,3,10.320000,0.031816,0.000170,3.984728,3.984728
9,64542616,SPA,4372,16,SM_SPA_66420_25599,Spar Supermarkt,172.00,405.000,1.00,3,3,24.300000,1.494192,0.007997,187.139433,187.139433


In [26]:
enriched_pruned_pd

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT,diff1,diff2
hektar_id,type,hektar_id,type,OBJECTID,fahrzeit,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
64542616,ALD,64542616,ALD,2887,4,SM_ALD_64465_26140,Aldi Supermarkt,115.00,1000.000,0.80,3,3,48.000000,29.733172,0.000000,
64542616,COO,64542616,COO,3546,4,SM_COO_64373_26219,Coop Supermarkt,1000.00,2600.000,1.00,3,3,80.571429,55.741810,0.000000,
64542616,DEN,64542616,DEN,1109,3,SM_DEN_64420_26194,Denner Discount,374.00,374.000,1.00,3,3,22.440000,13.132362,0.000000,
64542616,LAN,64542616,LAN,2309,5,SM_LAN_64135_26492,Landi,921.00,921.000,0.05,3,3,2.763000,0.901978,0.000000,
64542616,LID,64542616,LID,4226,5,SM_LID_64325_26299,Lidl Supermarkt,172.00,1000.000,0.80,3,3,48.000000,26.377962,0.000000,
64542616,MAN,64542616,MAN,4268,19,SM_MAN_64587_24908,Manor,172.00,1000.000,0.05,3,3,3.000000,0.043065,0.000000,
64542616,MIG,64542616,MIG,976,4,SM_MIG_64371_26208,M,1169.63,1999.995,1.00,3,3,73.333267,50.734225,0.000000,
64542616,OTT,64542616,OTT,5140,3,SM_OTT_64420_26194,,650.00,650.000,0.20,3,3,7.800000,4.125671,0.000000,
64542616,PAM,64542616,PAM,5280,28,SM_PAM_67983_25067,,172.00,172.000,1.00,3,3,10.320000,0.031816,0.000000,
64542616,SPA,64542616,SPA,4372,16,SM_SPA_66420_25599,Spar Supermarkt,172.00,405.000,1.00,3,3,24.300000,1.494192,0.000000,


In [127]:
enriched_pruned_pd = pd.read_pickle('/userdata/pmavrodi/Projekte/Stao/output/enriched_pd.pkl')

In [14]:
data2 = [{'RLAT': 1, 'b': 2}, {'RLAT': 15, 'b': 10},  {'RLAT': 5, 'b': 10}]
df = pd.DataFrame(data2)
df

Unnamed: 0,RLAT,b
0,1,2
1,15,10
2,5,10


In [129]:
enriched_pruned_pd.head(50)


Unnamed: 0,hektar_id,type,OBJECTID,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT,Marktanteil,LokalUP,LokalUP_corrected
0,64542616,ALD,2887,4,SM_ALD_64465_26140,Aldi Supermarkt,115.0,1000.0,0.8,3,3,48.0,29.733172,0.159142,3723.919404,3723.919404
1,64542616,COO,3546,4,SM_COO_64373_26219,Coop Supermarkt,1000.0,2600.0,1.0,3,3,80.571429,55.74181,0.298349,6981.361066,6981.361066
2,64542616,DEN,1109,3,SM_DEN_64420_26194,Denner Discount,374.0,374.0,1.0,3,3,22.44,13.132362,0.070289,1644.757549,1644.757549
3,64542616,LAN,2309,5,SM_LAN_64135_26492,Landi,921.0,921.0,0.05,3,3,2.763,0.901978,0.004828,112.967865,112.967865
4,64542616,LID,4226,5,SM_LID_64325_26299,Lidl Supermarkt,172.0,1000.0,0.8,3,3,48.0,26.377962,0.141184,3303.697488,3303.697488
5,64542616,MAN,4268,19,SM_MAN_64587_24908,Manor,172.0,1000.0,0.05,3,3,3.0,0.043065,0.00023,5.393619,5.393619
6,64542616,MIG,976,4,SM_MIG_64371_26208,M,1169.63,1999.995,1.0,3,3,73.333267,50.734225,0.271547,6354.188102,6354.188102
7,64542616,OTT,5140,3,SM_OTT_64420_26194,,650.0,650.0,0.2,3,3,7.8,4.125671,0.022082,516.718057,516.718057
8,64542616,PAM,5280,28,SM_PAM_67983_25067,,172.0,172.0,1.0,3,3,10.32,0.031816,0.00017,3.984728,3.984728
9,64542616,SPA,4372,16,SM_SPA_66420_25599,Spar Supermarkt,172.0,405.0,1.0,3,3,24.3,1.494192,0.007997,187.139433,187.139433


# Testing core algo

In [30]:
stores_migros_pd.head()

Unnamed: 0_level_0,ID,RETAILER,FORMAT,STRASSE,PLZ,ORT,VERKAUFSFLAECHE_TOTAL,VERKAUFSFLAECHE,Y,X,RELEVANZ,PROFIT_KST,UMSATZ_FOOD,UMSATZ_FRISCHE,UMSATZ_NEAR_NONFOOD,UMSATZ_FACHMARKT,own_hektar_id
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
6,SM_MIG_61607_15939,Migros,M,Kanderstegstrasse 31,3714,Frutigen,878.621,519.42,616073.9153,159391.549,1.0,33370.0,2714265.89,5007897.01,1647566.21,149192.23,61601593
8,SM_MIG_66835_23607,Migros,MM,Kirchfeldstrasse 8,5630,Muri AG,1787.864,1001.19,668350.1017,236074.543,1.0,33934.0,4973971.8,10746847.03,3218149.87,476688.61,66832360
9,SM_MIG_55810_14347,Migros,M,Rue du Léman 9,1815,Clarens,1026.81,622.98,558105.4795,143471.3736,1.0,63991.0,4432271.32,10699936.16,2424134.69,233238.81,55811434
11,SM_MIG_49525_11909,Migros,M,Route de Peney 2-4,1214,Vernier,1257.324,649.41,495252.6938,119091.9983,1.0,44860.0,3537224.77,7729667.8,1861354.71,233174.78,49521190
13,SM_MIG_63399_12693,Migros,MM,Brückenweg 10,3930,Visp,2230.17,1101.84,633999.7599,126933.4256,1.0,134480.0,6229558.9,11250268.13,5737413.46,781103.84,63391269


In [4]:
(stores_pd, stores_migros_pd, drivetimes_pd, haushalt_pd) = get_input(settingsFile, logger)
relevant_hektars = set(drivetimes_pd.loc[stores_migros_pd.ID]['hektar_id'])
drivetimes_rel_hektars_pd = drivetimes_pd[drivetimes_pd['hektar_id'].isin(relevant_hektars)]

In [5]:
drivetimes_pd.head()

Unnamed: 0_level_0,fahrzeit,hektar_id
filiale_id,Unnamed: 1_level_1,Unnamed: 2_level_1
SM_MIG_61607_15939,21,61341718
SM_MIG_61607_15939,21,61341719
SM_MIG_61607_15939,21,61341722
SM_MIG_61607_15939,22,61341730
SM_MIG_61607_15939,23,61341731


In [6]:
stores_pd.head()


Unnamed: 0_level_0,ID,RETAILER,FORMAT,STRASSE,PLZ,ORT,VERKAUFSFLAECHE_TOTAL,VERKAUFSFLAECHE,Y,X,RELEVANZ,PROFIT_KST,UMSATZ_FOOD,UMSATZ_FRISCHE,UMSATZ_NEAR_NONFOOD,UMSATZ_FACHMARKT,own_hektar_id
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
6,SM_MIG_61607_15939,Migros,M,Kanderstegstrasse 31,3714,Frutigen,878.621,519.42,616073.9153,159391.549,1.0,33370.0,2714265.89,5007897.01,1647566.21,149192.23,61601593
8,SM_MIG_66835_23607,Migros,MM,Kirchfeldstrasse 8,5630,Muri AG,1787.864,1001.19,668350.1017,236074.543,1.0,33934.0,4973971.8,10746847.03,3218149.87,476688.61,66832360
9,SM_MIG_55810_14347,Migros,M,Rue du Léman 9,1815,Clarens,1026.81,622.98,558105.4795,143471.3736,1.0,63991.0,4432271.32,10699936.16,2424134.69,233238.81,55811434
10,SM_MIG_49997_11718,Migros,SPEZ,Rue du Conseil-Général 20,1205,Genève,157.476,157.476,499972.6443,117183.8769,1.0,,,,,,49991171
11,SM_MIG_49525_11909,Migros,M,Route de Peney 2-4,1214,Vernier,1257.324,649.41,495252.6938,119091.9983,1.0,44860.0,3537224.77,7729667.8,1861354.71,233174.78,49521190


In [5]:
drivetimes_rel_hektars_pd.head()

Unnamed: 0_level_0,fahrzeit,hektar_id
filiale_id,Unnamed: 1_level_1,Unnamed: 2_level_1
SM_MIG_61607_15939,21,61341718
SM_MIG_61607_15939,21,61341719
SM_MIG_61607_15939,21,61341722
SM_MIG_61607_15939,22,61341730
SM_MIG_61607_15939,23,61341731


In [7]:
drivetimes_rel_hektars_stores_pd = drivetimes_rel_hektars_pd.merge(
        stores_pd[['ID','FORMAT', 'VERKAUFSFLAECHE', 'VERKAUFSFLAECHE_TOTAL', 'RELEVANZ']], left_index=True,
    right_on='ID', how='inner')

In [10]:
len(drivetimes_rel_hektars_pd)

110246169

In [11]:
len(drivetimes_rel_hektars_stores_pd)

109974613

In [12]:
drivetimes_rel_hektars_stores_pd.head()   

Unnamed: 0_level_0,fahrzeit,hektar_id,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
6,21,61341718,SM_MIG_61607_15939,M,519.42,878.621,1.0
6,21,61341719,SM_MIG_61607_15939,M,519.42,878.621,1.0
6,21,61341722,SM_MIG_61607_15939,M,519.42,878.621,1.0
6,22,61341730,SM_MIG_61607_15939,M,519.42,878.621,1.0
6,23,61341731,SM_MIG_61607_15939,M,519.42,878.621,1.0


In [13]:
enriched_pd = drivetimes_rel_hektars_stores_pd.merge(haushalt_pd[['H14PTOT']], left_on='hektar_id', right_index=True)

In [14]:
enriched_pd['H14PTOT_corrected'] = enriched_pd['H14PTOT'].fillna(1)

In [16]:
enriched_pd.head()

Unnamed: 0_level_0,fahrzeit,hektar_id,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
6,21,61341718,SM_MIG_61607_15939,M,519.42,878.621,1.0,1,1
20,28,61341718,SM_MIG_60500_19797,M,755.95,1300.694,1.0,1,1
530,12,61341718,SM_MIG_61274_17700,FM,820.12,1330.134,1.0,1,1
836,25,61341718,SM_MIG_60926_19134,MM,850.5,1879.651,1.0,1,1
949,29,61341718,SM_MIG_60277_19842,M,334.03,494.532,1.0,1,1


In [18]:
migros_only_pd = enriched_pd[enriched_pd['FORMAT'].isin(list(set(stores_migros_pd['FORMAT'])))]

In [23]:
migros_only_pd.groupby('ID').agg( lambda x: len(x))

Unnamed: 0_level_0,fahrzeit,hektar_id,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SM_MIG_48928_12169,8858,8858,8858,8858.0,8858.0,8858.0,8858,8858
SM_MIG_49525_11909,11407,11407,11407,11407.0,11407.0,11407.0,11407,11407
SM_MIG_49539_12095,11333,11333,11333,11333.0,11333.0,11333.0,11333,11333
SM_MIG_49640_11777,11323,11323,11323,11323.0,11323.0,11323.0,11323,11323
SM_MIG_49743_11609,10982,10982,10982,10982.0,10982.0,10982.0,10982,10982
SM_MIG_49752_12087,12168,12168,12168,12168.0,12168.0,12168.0,12168,12168
SM_MIG_49768_11948,11684,11684,11684,11684.0,11684.0,11684.0,11684,11684
SM_MIG_49781_11376,10585,10585,10585,10585.0,10585.0,10585.0,10585,10585
SM_MIG_49823_12053,11698,11698,11698,11698.0,11698.0,11698.0,11698,11698
SM_MIG_49835_11457,10721,10721,10721,10721.0,10721.0,10721.0,10721,10721


In [None]:
drivetimes_pd = pd.read_csv(drivetimes, sep=',', header=None, names=['filiale_id', 'fahrzeit', 'hektar_id'],
                                    index_col=0, nrows=110299436)

In [None]:
stores_pd[stores_pd['FORMAT'].isin(['M', 'MM', 'MMM', 'FM'])]

# Einige Daten issues

In [None]:
# get the input
(stores_pd, stores_migros_pd, drivetimes_pd, drivetimes_migros_pd, haushalt_pd) = get_input(settingsFile, logger)

In [None]:
drivetimes_pd.head(3)

In [None]:
print('Number of entries in drivetimes %d: ' % len(drivetimes_pd))

# Duplikate in drivetimes

In [None]:
drivetimes_pd2 = drivetimes_pd.reset_index()
drivetimes_pd2 = drivetimes_pd2.set_index(['filiale_id', 'fahrzeit', 'hektar_id'])

In [None]:
drivetimes_pd2.head(3)

In [None]:
drivetimes_duplicates = drivetimes_pd2[ drivetimes_pd2.index.duplicated()]

In [None]:
print('Number of duplicates: %d' % len(drivetimes_duplicates))

In [None]:
drivetimes_duplicates.head(10)

In [None]:
a=drivetimes_pd.loc['SM_MIG_68294_26394']
a[ (a['fahrzeit']==11) & (a['hektar_id']==67672614) ]

In [None]:
drivetimes_duplicates.to_csv('/userdata/pmavrodi/Projekte/Stao/output/drivetimes_duplicates.csv')

# Duplikate in drivetimes_migros_pd

In [None]:
drivetimes_pd3 = drivetimes_migros_pd.reset_index()
drivetimes_pd3 = drivetimes_pd3.set_index(['filiale_id', 'fahrzeit', 'hektar_id'])

In [None]:
drivetimes_pd3.head(4)

In [None]:
drivetimes_duplicates2 = drivetimes_pd3[ drivetimes_pd3.index.duplicated()]

In [None]:
print('Number of duplicates: %d' % len(drivetimes_duplicates2))

In [None]:
drivetimes_duplicates2.head(3)

# Filiale in drivetimes, aber nicht in stores_sm. Betreffen sind LAT, RLAT, usw

In [None]:
# get all relevant hektars, i.e. those from which a Migros store is reachable
relevant_hektars = set(drivetimes_migros_pd['hektar_id'])

In [None]:
drivetimes_rel_hektars_pd = drivetimes_pd[drivetimes_pd['hektar_id'].isin(relevant_hektars)]

In [None]:
drivetimes_rel_hektars_stores_pd = drivetimes_rel_hektars_pd.join(
        stores_pd[['FORMAT', 'VERKAUFSFLAECHE', 'VERKAUFSFLAECHE_TOTAL', 'RELEVANZ']], how='left')
len(drivetimes_rel_hektars_pd)

In [None]:
b=drivetimes_rel_hektars_stores_pd[pd.isnull(drivetimes_rel_hektars_stores_pd['VERKAUFSFLAECHE_TOTAL']) &
                                            pd.isnull(drivetimes_rel_hektars_stores_pd['RELEVANZ'])&
                                            pd.isnull(drivetimes_rel_hektars_stores_pd['FORMAT'])].index.unique()

In [None]:
b

In [None]:
drivetimes_rel_hektars_stores_pd[~pd.isnull(drivetimes_rel_hektars_stores_pd['VERKAUFSFLAECHE_TOTAL'])].head(2)

In [None]:
np.savetxt('/userdata/pmavrodi/Projekte/Stao/output/filiale_nur_drivetimes.txt', b,  fmt='%s')

# Hektare von drivetimes_sm ohne HH. Betrifft ist (lokal) Umsatzpotenztal

In [None]:
enriched_pd = drivetimes_rel_hektars_stores_pd[~pd.isnull(drivetimes_rel_hektars_stores_pd['VERKAUFSFLAECHE_TOTAL'])].join(haushalt_pd['H14PTOT'], on='hektar_id')

In [None]:
enriched_pd.head(5)

In [None]:
hektars_ohne_HH_info = enriched_pd[pd.isnull(enriched_pd['H14PTOT'])]['hektar_id'].unique()

In [None]:
len(hektars_ohne_HH_info)

In [None]:
np.savetxt('/userdata/pmavrodi/Projekte/Stao/output/hektaren_ohne_HH_info.txt', hektars_ohne_HH_info, fmt='%s')

# DEBUGGING MIT BOJAN

In [6]:
import pickle

In [28]:
enriched = pd.read_pickle('/userdata/pmavrodi/Projekte/Stao/output/enriched_PRUNED_pd.pkl')

In [29]:
enriched

Unnamed: 0,hektar_id,type,OBJECTID,fahrzeit,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,LAT,RLAT,Marktanteil,LokalUP,LokalUP_corrected,Missing_HH_Hektare
0,48621110,ALD,2944,20,SM_ALD_49747_11594,Aldi Supermarkt,115.00,1000.000,0.80,,1.0,48.000000,4.377652,0.060025,,468.195373,True
1,48621110,COO,3382,16,SM_COO_49438_11455,Coop Supermarkt,1000.00,600.000,1.00,,1.0,36.000000,3.406454,0.046708,,364.324502,True
2,48621110,COO,3821,20,SM_COO_49697_11599,Coop Supermarkt,450.00,1640.000,1.00,,1.0,68.533333,10.861801,0.148934,,1161.683271,True
3,48621110,COO,4032,21,SM_COO_49638_11982,Coop Supermarkt,1000.00,5070.000,1.00,,1.0,94.685714,13.686250,0.187662,,1463.761565,True
4,48621110,DEN,1362,20,SM_DEN_49751_11594,Denner Discount,374.00,374.000,1.00,,1.0,22.440000,0.630700,0.008648,,67.454159,True
5,48621110,LAN,2255,16,SM_LAN_49404_11314,Landi,921.00,921.000,0.05,,1.0,2.763000,0.076840,0.001054,,8.218163,True
6,48621110,LID,4229,20,SM_LID_49749_11596,Lidl Supermarkt,172.00,1000.000,0.80,,1.0,48.000000,4.377652,0.060025,,468.195373,True
7,48621110,MAN,4290,26,SM_MAN_49996_11815,Manor,172.00,1000.000,0.05,,1.0,3.000000,0.009018,0.000124,,0.964511,True
8,48621110,MIG,1993,18,SM_MIG_49646_11542,migrolino,150.00,150.000,1.00,,1.0,9.000000,0.207130,0.002840,,22.152788,True
9,48621110,MIG,11,21,SM_MIG_49525_11909,M,649.41,1257.324,1.00,,1.0,63.430987,9.168567,0.125717,,980.589744,True


In [13]:
enriched.head(10)

Unnamed: 0,OBJECTID,fahrzeit,hektar_id,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,...,LAT2,RLAT,RLAT2,Marktanteil,Marktanteil2,LokalUP,LokalUP2,LokalUP_corrected,LokalUP2_corrected,LAT_NEU
0,6,21,61341718,SM_MIG_61607_15939,M,519.42,878.621,1.0,1,1,...,6233.04,20.090134,11.876813,0.000474,0.000428,3.699834,3.339322,3.699834,3.339322,52.71726
1,20,28,61341718,SM_MIG_60500_19797,M,755.95,1300.694,1.0,1,1,...,9071.4,1.920244,1.116026,4.5e-05,4e-05,0.353635,0.313785,0.353635,0.313785,64.009253
2,530,12,61341718,SM_MIG_61274_17700,FM,820.12,1330.134,1.0,1,1,...,9841.44,1030.567971,635.416736,0.024332,0.022905,189.791171,178.655774,189.791171,178.655774,64.401787
3,836,25,61341718,SM_MIG_60926_19134,MM,850.5,1879.651,1.0,1,1,...,10206.0,8.97963,4.063082,0.000212,0.000146,1.653704,1.142389,1.653704,1.142389,71.72868
4,949,29,61341718,SM_MIG_60277_19842,M,334.03,494.532,1.0,1,1,...,4008.36,0.493601,0.333401,1.2e-05,1.2e-05,0.090902,0.09374,0.090902,0.09374,29.67192
5,953,30,61341718,SM_MIG_60110_19901,M,250.9,392.332,1.0,1,1,...,3010.8,0.264749,0.16931,6e-06,6e-06,0.048757,0.047604,0.048757,0.047604,23.53992
6,956,17,61341718,SM_MIG_61580_17713,M,635.94,966.043,1.0,1,1,...,7631.28,105.725002,69.598101,0.002496,0.002509,19.470498,19.568422,19.470498,19.568422,57.96258
7,958,16,61341718,SM_MIG_61034_18031,M,660.2,1163.531,1.0,1,1,...,7922.4,188.347216,106.870235,0.004447,0.003852,34.686347,30.047973,34.686347,30.047973,62.180413
8,963,23,61341718,SM_MIG_63293_16841,M,208.23,335.281,1.0,1,1,...,2498.76,3.504211,2.176329,8.3e-05,7.8e-05,0.645341,0.611904,0.645341,0.611904,20.11686
9,966,11,61341718,SM_MIG_61433_17711,M,775.58,1327.339,1.0,1,1,...,9306.96,1521.118685,888.80778,0.035914,0.032038,280.131835,249.9,280.131835,249.9,64.36452


In [11]:
20/3500

0.005714285714285714

In [50]:
enriched.loc[ idx, 'LAT_NEU' ] = enriched.loc[idx, 'RELEVANZ'] * (enriched.loc[idx, 'VERKAUFSFLAECHE_TOTAL']/1000.0) * 60

In [10]:
enriched.head(4)

Unnamed: 0,OBJECTID,fahrzeit,hektar_id,ID,FORMAT,VERKAUFSFLAECHE,VERKAUFSFLAECHE_TOTAL,RELEVANZ,H14PTOT,H14PTOT_corrected,...,LAT2,RLAT,RLAT2,Marktanteil,Marktanteil2,LokalUP,LokalUP2,LokalUP_corrected,LokalUP2_corrected,LAT_NEU
0,6,21,61341718,SM_MIG_61607_15939,M,519.42,878.621,1.0,1,1,...,6233.04,20.090134,11.876813,0.000474,0.000428,3.699834,3.339322,3.699834,3.339322,
1,20,28,61341718,SM_MIG_60500_19797,M,755.95,1300.694,1.0,1,1,...,9071.4,1.920244,1.116026,4.5e-05,4e-05,0.353635,0.313785,0.353635,0.313785,
2,530,12,61341718,SM_MIG_61274_17700,FM,820.12,1330.134,1.0,1,1,...,9841.44,1030.567971,635.416736,0.024332,0.022905,189.791171,178.655774,189.791171,178.655774,
3,836,25,61341718,SM_MIG_60926_19134,MM,850.5,1879.651,1.0,1,1,...,10206.0,8.97963,4.063082,0.000212,0.000146,1.653704,1.142389,1.653704,1.142389,
