In [1]:
import pandas as pd
import numpy as np
from gravity_utils import *

In [2]:
# constants
filtered_columns = ['Id' ,'bostad_komkod', 'bostad_SAMS', 'weight', 'rf1_komkod',
                    'Id_time', 'rf4_komkod', 'rf4_Samskod']


# Upsample based on idividvikt from RVU

In [3]:
resfil_raw = pd.read_csv('data/rvu/RVU_resfil.csv')
resfil_raw = resfil_raw.loc[:, filtered_columns + ['individvikt']]
#df_resfil = df_resfil[df_resfil['ärende_2']==1]
upsampled_resfil = draw_population(resfil_raw, resfil_raw.individvikt.astype(int))

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


In [4]:
resfil_prod_all_trips = upsampled_resfil.loc[(upsampled_resfil.rf1_komkod >= 1200) & (upsampled_resfil.rf1_komkod < 1300)]
resfil_prod_all_trips = resfil_prod_all_trips.groupby(['rf1_komkod'])['Id_time'].count().rename('work_trips')

# Production linear regression

In [5]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn import metrics
import statsmodels.api as sm
from scipy import stats
import stat

In [6]:
a2 = read_shapefile('data/GIS/A2_samsSW_2012_shp/A2_sw_region.shp')
a2['KOMMUN'] = a2.KOMMUN.astype(int)
a2['SAMSCODE'] = a2.SAMSCODE.astype(int)
night_pop = a2.groupby(['KOMMUN']).sum()
night_pop = night_pop[(night_pop.index > 1200) & (night_pop.index < 1300)]

b2 = read_shapefile('data/GIS/B2_sams_2013/B2_sams.shp')
b2['KOMMUN'] = b2.KOMMUN.astype(int)
b2['SAMSCODE'] = b2.SAMSCODE.astype(int)
sex_distr = b2.groupby(['KOMMUN']).sum()
sex_distr = sex_distr[(sex_distr.index > 1200) & (sex_distr.index < 1300)]

b1_buildings = pd.read_csv('data/GIS/B1_samsSW_20131231_shp/B1_sams_with_nbuildings.csv', sep=';', index_col=0)
prod_buildings = b1_buildings.groupby(['KOMMUN'])['nProduction', 'nAttraction', 'TotBef', 'small_building', 'appt_build', 'multi_appartment_building'].sum()

In [7]:
prod_all_x = pd.concat([night_pop, sex_distr, prod_buildings], axis=1)
prod_all_x.head()

Unnamed: 0_level_0,SAMSCODE,Offentliga,Naringsliv,Totalt,SAMSCODE,Man,Kv,TotKon,nProduction,nAttraction,TotBef,small_building,appt_build,multi_appartment_building
KOMMUN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1214,206380153,1797,4469,6266,206380153,6889,6427,13316,5111,693,13317,4835,136,139
1230,123000075,2964,7828,10792,123000075,11193,11447,22640,6880,510,22640,6509,281,90
1231,86170035,2018,5170,7188,86170035,8558,8505,17063,3262,488,17063,3017,186,59
1233,308250325,3194,12424,15618,308250325,16679,17107,33786,16269,1103,33786,14571,321,125
1256,150720078,1597,4498,6095,150720078,6939,6724,13663,5792,2880,13663,5520,202,68


### testing a model

In [8]:
x = prod_all_x.drop(axis=1, labels=['Man', 'Naringsliv', 'TotKon', 'nAttraction', 'Kv', 'Totalt', 'multi_appartment_building', 'nProduction', 'small_building', 'appt_build'])
#x = all_x.loc[:, ['TotBef', 'Naringsliv', ]]
y = pd.DataFrame(resfil_prod_all_trips)

est = sm.OLS(y, x)
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:             work_trips   R-squared:                       0.989
Model:                            OLS   Adj. R-squared:                  0.988
Method:                 Least Squares   F-statistic:                     892.5
Date:                Wed, 23 Oct 2019   Prob (F-statistic):           2.07e-29
Time:                        16:26:42   Log-Likelihood:                -358.37
No. Observations:                  33   AIC:                             722.7
Df Residuals:                      30   BIC:                             727.2
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
SAMSCODE     -3.8e-06   3.49e-06     -1.089      0.2

### actual model

In [9]:
x = prod_all_x.loc[:, ['Offentliga', 'TotBef']]
y = pd.DataFrame(resfil_prod_all_trips)

production_model = LinearRegression(fit_intercept=0)
scores = []

production_model.fit(x, y)
score = production_model.score(x, y)
print('Scores')
print(scores, scores)
print('Model')
print(production_model.intercept_, production_model.coef_)
pred_production = pd.DataFrame(production_model.predict(x))
pred_production['ind'] = x.index
pred_production.set_index('ind', inplace=True)

Scores
[] []
Model
0.0 [[4.95985625 1.13555962]]


# Attraction Linear regression

In [10]:
#att_work_trips = upsampled_resfil[upsampled_resfil['ärende_2'] == 1]

attr_all_trips = upsampled_resfil.loc[(upsampled_resfil.rf4_komkod >= 1200) & (upsampled_resfil.rf4_komkod < 1300)]
attr_all_trips = attr_all_trips.groupby(['rf4_komkod'])['Id_time'].count().rename('all_attr_trips')

In [11]:
a4 = read_shapefile('data/GIS/A4_samsSW_2012_shp/A4_sw_region.shp')
a4['KOMMUN'] = a4.KOMMUN.astype(int)
day_pop = a4.groupby(['KOMMUN']).sum()
day_pop = day_pop[(day_pop.index > 1200) & (day_pop.index < 1300)]

attr_buildings = b1_buildings.groupby(['KOMMUN'])['nProduction', 'nAttraction', 'TotBef', 'nIndustri', 'nEkonomi'].sum()

In [12]:
attr_x = pd.concat([day_pop, sex_distr, attr_buildings], axis=1)
attr_x.head()



Unnamed: 0_level_0,Offentliga,Naringsliv,Totalt,SAMSCODE,Man,Kv,TotKon,nProduction,nAttraction,TotBef,nIndustri,nEkonomi
KOMMUN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1214,1163.0,2152.0,3315.0,206380153,6889,6427,13316,5111,693,13317,142,130
1230,1121.0,4312.0,5433.0,123000075,11193,11447,22640,6880,510,22640,219,44
1231,1365.0,6401.0,7766.0,86170035,8558,8505,17063,3262,488,17063,262,47
1233,1128.0,6605.0,7733.0,308250325,16679,17107,33786,16269,1103,33786,262,271
1256,854.0,3074.0,3928.0,150720078,6939,6724,13663,5792,2880,13663,270,161


In [13]:
x = attr_x.drop(columns=['Man', 'Naringsliv', 'TotKon', 'Kv', 'nEkonomi', 'nIndustri', 'nProduction', 'nAttraction'])
y = pd.DataFrame(attr_all_trips)

est = sm.OLS(y, x)
est2 = est.fit()
print(est2.summary())

                            OLS Regression Results                            
Dep. Variable:         all_attr_trips   R-squared:                       0.998
Model:                            OLS   Adj. R-squared:                  0.998
Method:                 Least Squares   F-statistic:                     4282.
Date:                Wed, 23 Oct 2019   Prob (F-statistic):           9.90e-40
Time:                        16:26:46   Log-Likelihood:                -327.24
No. Observations:                  33   AIC:                             662.5
Df Residuals:                      29   BIC:                             668.5
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Offentliga     3.0763      0.585      5.262      0.0

In [14]:
x = attr_x.loc[:, ['Offentliga', 'Totalt', 'TotBef']]
y = (pd.DataFrame(attr_all_trips))

attraction_model = LinearRegression(fit_intercept=0)
scores = []

attraction_model.fit(x, y)
scores = attraction_model.score(x, y)
print('Scores')
print(scores, scores)
print('Model')
print(attraction_model.intercept_, attraction_model.coef_)
pred_attraction = pd.DataFrame(attraction_model.predict(x))
pred_attraction['ind'] = x.index
pred_attraction.set_index('ind', inplace=True)

Scores
0.997421267394188 0.997421267394188
Model
0.0 [[2.71192777 2.0514674  0.42853402]]


# Gravity model for SAMS

In [15]:
a2 = read_shapefile('data/GIS/A2_samsSW_2012_shp/A2_sw_region.shp')
a2['KOMMUN'] = a2.KOMMUN.astype(int)
a2['SAMSCODE'] = a2.SAMSCODE.astype(int)
night_pop = a2[(a2.KOMMUN > 1200) & (a2.KOMMUN < 1300)]
target_night_pop = night_pop.loc[(night_pop.KOMMUN == 1265) | (night_pop.KOMMUN == 1270)]
target_night_pop = target_night_pop.groupby(['SAMSCODE'])[['Totalt', 'Offentliga', 'Naringsliv']].sum()
other_night_pop = night_pop.loc[(night_pop.KOMMUN != 1265) & (night_pop.KOMMUN != 1270)]
other_night_pop = other_night_pop.groupby(['KOMMUN'])[['Totalt', 'Offentliga', 'Naringsliv']].sum()


sams_w_nbuildings = pd.read_csv('data/GIS/B1_samsSW_20131231_shp/B1_sams_with_nbuildings.csv', sep=';', index_col=0)
sams_w_nbuildings['SAMSCODE'] = sams_w_nbuildings.SAMSCODE.astype(int)
sams_w_nbuildings['KOMMUN'] = sams_w_nbuildings.KOMMUN.astype(int)

target_buildings = sams_w_nbuildings.loc[(sams_w_nbuildings.KOMMUN == 1265) | (sams_w_nbuildings.KOMMUN == 1270)]
target_buildings = target_buildings.groupby(['SAMSCODE'])['nProduction', 'nAttraction','TotBef', 'small_building', 'appt_build', 'multi_appartment_building'].sum()
other_buildings = sams_w_nbuildings.loc[(sams_w_nbuildings.KOMMUN != 1265) & (sams_w_nbuildings.KOMMUN != 1270)]
other_buildings = other_buildings.groupby(['KOMMUN'])['nProduction', 'nAttraction','TotBef', 'small_building', 'appt_build', 'multi_appartment_building'].sum()

#buildings = sams_w_nbuildings.groupby(['SAMSCODE'])['nProduction', 'nAttraction','TotBef', 'small_building', 'appt_build', 'multi_appartment_building'].sum()

In [16]:
target_x_sams = pd.concat([target_night_pop, target_buildings], axis=1)
other_x_sams = pd.concat([other_night_pop, other_buildings], axis=1)
all_x_sams = pd.concat([target_x_sams, other_x_sams])

all_x_sams.head()

Unnamed: 0,Totalt,Offentliga,Naringsliv,nProduction,nAttraction,TotBef,small_building,appt_build,multi_appartment_building
12650001,436,108,328,533,5,836,533,0,0
12650002,58,15,43,65,9,101,64,0,1
12650003,1626,457,1169,1417,153,3496,1341,62,14
12650004,929,252,677,963,142,2108,926,30,7
12650005,738,216,522,483,295,1705,424,55,4


In [17]:
x_sams = all_x_sams.loc[:, ['Offentliga', 'TotBef']]
pred_prod_sams = pd.DataFrame(production_model.predict(x_sams), index=x_sams.index, columns=['pred_prod'])

In [18]:
#att_sams_all_trips = upsampled_resfil
#att_sams_all_trips = att_sams_all_trips.loc[(att_sams_all_trips.rf4_komkod >= 1200) & (att_sams_all_trips.rf4_komkod < 1300)]
#att_sams_all_trips.loc[:, 'rf4_Samskod'] = att_sams_all_trips.rf4_Samskod.astype(int)
#att_sams_all_trips.loc[:, 'rf4_komkod'] = att_sams_all_trips.rf4_komkod.astype(int)

#att_target_all_trips = att_sams_all_trips.loc[(att_sams_all_trips.rf4_komkod == 1265) | (att_sams_all_trips.rf4_komkod == 1270)]
#att_target_all_trips = att_target_all_trips.groupby(['rf4_Samskod'])['Id_time'].count().rename('attr_work_trips')
#att_other_all_trips = att_sams_all_trips.loc[(att_sams_all_trips.rf4_komkod != 1265) | (att_sams_all_trips.rf4_komkod != 1270)]
#att_other_all_trips = att_other_all_trips.groupby(['rf4_komkod'])['Id_time'].count().rename('attr_work_trips')

attr_sams_all_trips = upsampled_resfil.loc[(upsampled_resfil.rf4_komkod >= 1200) & (upsampled_resfil.rf4_komkod < 1300)]
attr_sams_all_trips.loc[:, 'rf4_Samskod'] = attr_sams_all_trips.rf4_Samskod.astype(int)
attr_sams_all_trips.loc[:, 'rf4_komkod'] = attr_sams_all_trips.rf4_komkod.astype(int)
#original_sams_attr_all_trips = original_sams_attr_all_trips.groupby(['rf4_Samskod'])['Id_time'].count().rename('work_trips')

attr_target_all_trips = attr_sams_all_trips.loc[(attr_sams_all_trips.rf4_komkod == 1265) | (attr_sams_all_trips.rf4_komkod == 1270)]
attr_target_all_trips = pd.DataFrame(attr_target_all_trips.groupby(['rf4_Samskod'])['Id_time'].count().rename('attr_work_trips'))
attr_other_all_trips = attr_sams_all_trips.loc[(attr_sams_all_trips.rf4_komkod != 1265) | (attr_sams_all_trips.rf4_komkod != 1270)]
attr_other_all_trips = pd.DataFrame(attr_other_all_trips.groupby(['rf4_komkod'])['Id_time'].count().rename('attr_work_trips'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [19]:
# ignore not fully filled records in RVU
attr_target_all_trips = attr_target_all_trips.drop(index=0)

In [20]:
attr_all_trips = pd.concat([attr_target_all_trips, attr_other_all_trips])

In [21]:
a4 = read_shapefile('data/GIS/A4_samsSW_2012_shp/A4_sw_region.shp')
a4['KOMMUN'] = a4.KOMMUN.astype(int)
a4['SAMSCODE'] = a4.SAMSCODE.astype(int)
a4 = a4[(a4.KOMMUN > 1200) & (a4.KOMMUN < 1300)]

target_day_pop = a4.loc[(a4.KOMMUN == 1265) | (a4.KOMMUN == 1270)]
target_day_pop = target_day_pop.groupby(['SAMSCODE'])[['Totalt', 'Offentliga', 'Naringsliv']].sum()
other_day_pop = a4.loc[(a4.KOMMUN != 1265) & (a4.KOMMUN != 1270)]
other_day_pop = other_day_pop.groupby(['KOMMUN'])[['Totalt', 'Offentliga', 'Naringsliv']].sum()

target_buildings = b1_buildings.loc[(b1_buildings.KOMMUN == 1265) | (b1_buildings.KOMMUN == 1270)]
target_buildings = target_buildings.groupby(['SAMSCODE'])['nProduction', 'nAttraction','TotBef', 'small_building', 'appt_build', 'multi_appartment_building'].sum()
other_buildings = b1_buildings.loc[(b1_buildings.KOMMUN != 1265) & (b1_buildings.KOMMUN != 1270)]
other_buildings = other_buildings.groupby(['KOMMUN'])['nProduction', 'nAttraction','TotBef', 'small_building', 'appt_build', 'multi_appartment_building'].sum()

In [22]:
attr_target_x = pd.concat([target_day_pop, target_buildings], axis=1)
attr_other_x = pd.concat([other_day_pop, other_buildings], axis=1)
all_attr_x = pd.concat([attr_target_x, attr_other_x])

all_attr_x.head()

Unnamed: 0,Totalt,Offentliga,Naringsliv,nProduction,nAttraction,TotBef,small_building,appt_build,multi_appartment_building
12650001,64.0,0.0,64.0,533,5,836,533,0,0
12650002,30.0,0.0,30.0,65,9,101,64,0,1
12650003,703.0,273.0,430.0,1417,153,3496,1341,62,14
12650004,1141.0,347.0,794.0,963,142,2108,926,30,7
12650005,1160.0,326.0,834.0,483,295,1705,424,55,4


In [23]:
attr_x_sams = all_attr_x.loc[:, ['Offentliga', 'Naringsliv', 'nProduction']]
pred_attr_sams = pd.DataFrame(attraction_model.predict(attr_x_sams), index=attr_x_sams.index, columns=['pred_attr'])

# Gravity

In [24]:
sams_distance_matrix = pd.read_csv('data/sams_distance_matrix.csv', sep=';', index_col=[0])
sams_distance_matrix.columns = sams_distance_matrix.index
#the thing above is differently sorted
sams_distance_matrix = sams_distance_matrix.sort_index().T.sort_index().T

In [25]:
sams_distance_matrix.replace(0, np.nan, inplace=True)
for (idx, value) in sams_distance_matrix.min().iteritems():
    sams_distance_matrix.at[int(idx), idx] = value * (2.0 / 3)
sams_distance_matrix.at[12650001, 12650002] = sams_distance_matrix.loc[12650001].min() * (1.0 / 2)
sams_distance_matrix.at[12650002, 12650001] = sams_distance_matrix.loc[12650002].min() * (1.0 / 2)
sams_distance_matrix.at[12700020, 12700001] = sams_distance_matrix.loc[12700020].min() * (1.0 / 2)
sams_distance_matrix.at[12700001, 12700020] = sams_distance_matrix.loc[12700001].min() * (1.0 / 2)

In [26]:
pred_prod_sams = pred_prod_sams.sort_index().T.sort_index().T
pred_attr_sams = pred_attr_sams.sort_index().T.sort_index().T

pred_attr_sams_b = pred_attr_sams * (pred_prod_sams.sum().values / pred_attr_sams.sum().values)

In [27]:
f = sams_distance_matrix.values
cost_matrix = np.exp(-9.522933444530746e-05*(f))
Trips1 = fratar_double_constrained(prodA = pred_prod_sams.values.flatten(),
                                   attrA = pred_attr_sams.values.flatten(), 
                                   cost_matrix = cost_matrix,
                                    num_iter=100)

Checking production, attraction balancing:
Production:  2246455.1728156973
Attraction:  1296582.2204051262
Productions and attractions do not balance, attractions will be scaled to productions!


In [28]:
trips = pd.DataFrame(Trips1, columns=sams_distance_matrix.index, index=sams_distance_matrix.index)
trips.head()

Unnamed: 0,1214,1230,1231,1233,1256,1257,1260,1261,1262,1263,...,12700011,12700012,12700013,12700014,12700015,12700016,12700017,12700018,12700019,12700020
1214,4442.640896,35.133001,62.680152,13.713858,1.806619,14.404036,1131.997554,1926.028954,250.643604,12.008906,...,0.082342,0.110015,0.101454,0.025929,0.246125,0.237648,0.030704,0.001401,0.000918,0.014334
1230,30.951871,2635.208998,1963.8921,631.410944,0.291265,0.89746,4.725716,433.676623,668.519754,552.911873,...,1.706392,2.279857,0.205997,0.537328,0.906342,1.735528,0.636286,0.064519,0.042288,0.297057
1231,30.370438,1067.289676,1445.363164,416.607134,0.120449,0.880601,4.636943,425.529972,655.961555,364.813174,...,0.416048,0.555869,0.050226,0.13101,0.220982,0.423152,0.097477,0.04257,0.027902,0.045336
1233,11.907625,705.484453,755.537229,12838.740734,0.051979,0.345266,1.81805,166.841566,257.18906,897.468035,...,0.281809,0.376516,0.033199,0.086598,0.14607,0.279706,0.239802,0.104725,0.06864,0.111529
1256,0.499478,0.362973,0.268539,0.058968,3230.620834,109.227106,5.806452,0.382406,0.348616,0.051637,...,0.404447,0.435342,0.615976,0.464758,1.151084,10.855506,0.1215,0.016319,0.010562,0.042193


In [29]:
(trips*f).sum().sum() / trips.sum().sum()

17412.563083290966

In [30]:
trips.loc[12650001:12659999, 12650001:12659999].sum().sum()

12667.119341066813

In [31]:
pred_prod_sams.loc[12650003]

pred_prod    6236.570731
Name: 12650003, dtype: float64

In [32]:
prodA = np.array([400, 460, 400, 702])
attrA = np.array([260, 400, 500, 802])
f = np.array([
    [3, 11, 18, 22],
    [12, 3 ,13, 18],
    [15, 13, 5 ,7],
    [24, 18, 8, 5]
])
mtl = 10
alph=1. / mtl
cost_matrix = np.exp(-alph*(f))
cost_matrix = cost_matrix
num_iter=10

In [33]:
prodA = pred_prod_sams.values.flatten()
attrA = pred_attr_sams.values.flatten()
f = sams_distance_matrix.values
c=-0.00001
c2 = 0.000000005
cost_matrix = np.power(f, c2) * np.exp(c*f)
num_iter=10
num_iter_cal = 50
mtl = 17000
num_zones = len(prodA)
alpha = 0.5
f = f

trips = np.zeros((num_zones, num_zones))
print('Checking production, attraction balancing:')
sumP = sum(prodA)
sumA = sum(attrA)
print('Production: ', sumP)
print('Attraction: ', sumA)
if sumP != sumA:
    print('Productions and attractions do not balance, attractions will be scaled to productions!')
    attrA = attrA * (sumP / sumA)
    attrT = attrA.copy()
    prodT = prodA.copy()
else:
    print('Production, attraction balancing OK.')
    attrT = attrA.copy()
    prodT = prodA.copy()

trips = cost_matrix
#Run 2D balancing --->
computed_attractions = trips.sum(0)
computed_attractions[computed_attractions==0]=1
trips = trips * (attrT / computed_attractions)

computed_productions = trips.sum(1)
computed_productions[computed_productions==0]=1
trips = trips * np.reshape((prodT / computed_productions),[len(prodT),1])

for Iter in range(0, num_iter_cal):

    model_trip_len = (trips * f).sum() / trips.sum()
    c = c*(model_trip_len / mtl)**alpha
    cost_matrix = np.power(f, c2) * np.exp(c * f)
    for _ in range(0, num_iter):
        trips = cost_matrix
        #Run 2D balancing --->
        computed_attractions = trips.sum(0)
        computed_attractions[computed_attractions==0]=1
        trips = trips * (attrT / computed_attractions)

        computed_productions = trips.sum(1)
        computed_productions[computed_productions==0]=1
        trips = trips * np.reshape((prodT / computed_productions),[len(prodT),1])
    print ('iteration: ', Iter, ' coefficient: ', c, ' average trip length (model): ', model_trip_len)

print ('target average trip length (observed): ', mtl) 
print ('final average trip length (model): ', model_trip_len)
print ('final logit scaling factor: ', c)

Checking production, attraction balancing:
Production:  2246455.1728156973
Attraction:  1296582.2204051262
Productions and attractions do not balance, attractions will be scaled to productions!
iteration:  0  coefficient:  -1.7733633484754533e-05  average trip length (model):  53461.89861717321
iteration:  1  coefficient:  -2.9263535360664302e-05  average trip length (model):  46292.117830267845
iteration:  2  coefficient:  -4.3160931509273965e-05  average trip length (model):  36980.855570882326
iteration:  3  coefficient:  -5.655851603757511e-05  average trip length (model):  29191.96402905879
iteration:  4  coefficient:  -6.801246812039847e-05  average trip length (model):  24582.72257036023
iteration:  5  coefficient:  -7.724138694288267e-05  average trip length (model):  21926.634503690468
iteration:  6  coefficient:  -8.43989988886025e-05  average trip length (model):  20296.604311234452
iteration:  7  coefficient:  -8.981200681208761e-05  average trip length (model):  19250.5495

In [34]:
trips = pd.DataFrame(trips, columns=sams_distance_matrix.index, index=sams_distance_matrix.index)
trips.head()

Unnamed: 0,1214,1230,1231,1233,1256,1257,1260,1261,1262,1263,...,12700011,12700012,12700013,12700014,12700015,12700016,12700017,12700018,12700019,12700020
1214,4844.250773,24.138865,55.808762,14.92665,1.820238,6.426017,535.713497,2076.172543,238.967026,9.504572,...,0.003354,0.003777,0.004251,0.000838,0.01148,0.00945,0.001275,4.4e-05,3.1e-05,0.000498
1230,15.525952,2086.492947,1848.382629,754.659703,0.184199,0.228755,0.978004,303.28746,528.043507,480.530958,...,0.070527,0.079413,0.006961,0.017612,0.036157,0.063162,0.026815,0.002209,0.00157,0.010466
1231,14.29574,726.536197,1240.740765,449.265186,0.065625,0.21063,0.900511,279.256225,486.203538,286.070435,...,0.01406,0.015832,0.001388,0.003511,0.007208,0.012592,0.003208,0.001315,0.000935,0.001247
1233,4.118514,371.585058,490.313851,15665.210593,0.021008,0.060681,0.259432,80.45198,140.072213,620.111909,...,0.007387,0.008317,0.00071,0.001796,0.003687,0.00644,0.006954,0.00285,0.002026,0.002703
1256,0.103078,0.073813,0.064867,0.017419,3176.320962,27.700743,0.758884,0.082532,0.080676,0.011092,...,0.008971,0.007966,0.014354,0.009292,0.029095,0.293073,0.00269,0.000302,0.000212,0.000758


In [35]:
trips.loc[12650000:12659999, 12650000:12659999].sum().sum()

4793.002665216537

In [36]:
trips.loc[12650000:12659999, :].sum().sum()

32196.791099185182

In [37]:
pd.concat([sams_distance_matrix.loc[1280], trips.loc[1280]], axis=1)

Unnamed: 0,1280,1280.1
1214,59281.024000,95.106209
1230,17367.971000,4889.442556
1231,10265.369000,11935.234664
1233,24280.615000,16350.992891
1256,114968.564000,0.487161
1257,106211.483000,1.401270
1260,79442.086000,5.990887
1261,30617.532000,1857.826256
1262,24513.053000,3234.598263
1263,23676.829000,4211.774003


In [38]:
trips.loc[12650008]

1214          1.419414
1230         52.243540
1231         25.818171
1233         10.541072
1256          0.136885
1257          0.003195
1260          0.013661
1261          4.236313
1262          7.375701
1263         55.543819
1264        172.070582
1266         36.620375
1267         14.525988
1272          0.212308
1273          0.044188
1275          0.272102
1276          0.008697
1277          0.013142
1278          0.005462
1280        298.453193
1281        521.522714
1282          1.937727
1283          0.790303
1284          0.055720
1285         30.530478
1286        280.475446
1287          4.437880
1290          4.593743
1291          6.729225
1292          0.045494
               ...    
12650008     21.576068
12650009      1.566150
12650010      0.424616
12650011      0.660348
12650012      2.063817
12650013      1.804237
12650014      2.641425
12650015      1.904361
12650016      0.834781
12650017      1.678964
12700001      0.470984
12700002      0.100486
12700003   

In [39]:
sams_distance_matrix = sams_distance_matrix.loc[12650000:99999999, 12650000:99999999]
sams_pred_attr_sams = pred_attr_sams.loc[12650000:99999999]
sams_pred_prod_sams = pred_prod_sams.loc[12650000:99999999]

In [40]:
f = sams_distance_matrix.values
cost_matrix = np.exp(-0.0000005*(f))
Trips1 = fratar_double_constrained(prodA = sams_pred_attr_sams.values.flatten(),
                                   attrA = sams_pred_prod_sams.values.flatten(), 
                                   cost_matrix = cost_matrix,
                                    num_iter=100)

Checking production, attraction balancing:
Production:  28587.286303693953
Attraction:  53388.513095639115
Productions and attractions do not balance, attractions will be scaled to productions!


In [41]:
trips = pd.DataFrame(Trips1, columns=sams_distance_matrix.index, index=sams_distance_matrix.index)
trips.head()

Unnamed: 0,12650001,12650002,12650003,12650004,12650005,12650006,12650007,12650008,12650009,12650010,...,12700011,12700012,12700013,12700014,12700015,12700016,12700017,12700018,12700019,12700020
12650001,10.076568,1.283829,42.195206,24.65649,20.351584,8.901664,25.510034,11.40866,8.596997,2.597503,...,8.565901,6.512706,1.941167,2.435173,3.180315,12.73292,1.394992,1.307562,1.510228,1.917413
12650002,2.505811,0.318885,10.486843,6.127917,5.058012,2.212345,6.34005,2.835413,2.136626,0.645562,...,2.128897,1.618613,0.482441,0.605218,0.790409,3.164533,0.3467,0.32497,0.37534,0.476538
12650003,62.287807,7.931284,261.758519,152.900711,126.213771,55.203436,157.702973,70.66448,53.446267,16.108345,...,53.121205,40.388373,12.038094,15.101663,19.722637,78.962869,8.651006,8.108809,9.365642,11.890785
12650004,83.332048,10.610908,350.079161,204.533303,168.803866,73.844885,210.957122,94.526914,71.459954,21.547914,...,71.059514,54.026977,16.103194,20.201289,26.382704,105.627555,11.572333,10.847043,12.528292,15.906142
12650005,78.288812,9.968738,328.907172,192.128606,158.660975,69.368088,198.167987,88.825991,67.145069,20.241586,...,66.751578,50.751628,15.126949,18.976599,24.78327,99.223955,10.870768,10.189448,11.768773,14.941843


In [42]:
trips.loc[:, 12650005]

12650001     20.351584
12650002      5.058012
12650003    126.213771
12650004    168.803866
12650005    158.660975
12650006     24.740846
12650007     91.619685
12650008     42.039705
12650009     32.179412
12650010      8.226379
12650011     23.344781
12650012     55.825194
12650013     39.669910
12650014     46.906122
12650015     18.521618
12650016     10.615254
12650017     25.611935
12700001     48.513444
12700002      6.140320
12700003     18.508436
12700004     49.360185
12700005    169.998004
12700006     94.684403
12700007     36.882371
12700008     12.326266
12700009     23.891453
12700010     28.553972
12700011     46.187674
12700012     42.225987
12700013      6.648089
12700014     11.483550
12700015     11.670287
12700016     69.921574
12700017     21.604722
12700018      4.546968
12700019      2.570250
12700020      6.259363
Name: 12650005, dtype: float64