In [1]:
import numpy as np;
import pandas as pd;


## Initialize Parameters

In [2]:
## parameters and global variables

district = "all"

zetaPanchayat = 0.1
zetaMunicipality = 0.2
zetaCorporation = 0.3


muForSmall = 0.09
muForMedium = 0.04
muForLarge = 0.02



## Load and Initialize Data

In [3]:
## Load population data
initDataDF = pd.read_csv("../data/" + district + "_population_data.csv")


## Load distance data
distanceData = pd.read_csv("../data/" + district + "_distance_matrix.csv")

## Load area data
areaData = pd.read_csv("../data/" + district + "_area_data.csv")

## Number of regions
r = len(initDataDF.index)

## Sort population data and distance data by name for easy matrix transformation
initDataDF.sort_values(by=['name'], inplace=True)
initDataDF.reset_index(drop=True, inplace=True)

distanceData.sort_values(by=['fromName', 'toName'], inplace=True)
distanceData.reset_index(drop=True, inplace=True)

areaData.sort_values(by=['name'], inplace=True)
areaData.reset_index(drop=True,inplace=True)

## Initialize Job 
initDataDF.loc[initDataDF['type']=='P','J'] = initDataDF[initDataDF['type']=='P']['N']*zetaPanchayat
initDataDF.loc[initDataDF['type']=='M','J'] = initDataDF[initDataDF['type']=='M']['N']*zetaMunicipality
initDataDF.loc[initDataDF['type']=='C','J'] = initDataDF[initDataDF['type']=='C']['N']*zetaCorporation

## Initialize T
areaData['areaSqKm'] = areaData['area']/(1000*1000)

initDataDF.loc[areaData['areaSqKm'] <= 25,'T'] = initDataDF[areaData['areaSqKm'] <= 25]['N']*muForSmall
initDataDF.loc[(areaData['areaSqKm'] > 25) & (areaData['areaSqKm'] <= 100),'T'] = initDataDF[(areaData['areaSqKm'] > 25) & (areaData['areaSqKm'] <= 100)]['N']*muForMedium
initDataDF.loc[areaData['areaSqKm'] > 100,'T'] = initDataDF[areaData['areaSqKm'] > 100]['N']*muForLarge

initDataDF.tail(15)


Unnamed: 0,name,type,N,J,T
1020,Vettikkavala__Kollam,P,36204,3620.4,1448.16
1021,Vettom__Malappuram,P,6577,657.7,591.93
1022,Vettoor__Thiruvananthapuram,P,18704,1870.4,1683.36
1023,Vijayapuram__Kottayam,P,30838,3083.8,2775.42
1024,Vilakudi__Kollam,P,32995,3299.5,2969.55
1025,Vilappil__Thiruvananthapuram,P,36212,3621.2,3259.08
1026,Vilavoorkal__Thiruvananthapuram,P,31761,3176.1,2858.49
1027,Vilayoor__Palakkad,P,23389,2338.9,2105.01
1028,Villiyappally__Kozhikode,P,34502,3450.2,3105.18
1029,Vithura__Thiruvananthapuram,P,26249,2624.9,524.98


## Matrix computation of TMatrix

### Prepare Distance matrix

In [4]:
distanceMatrix = np.array(distanceData['value'].to_numpy()).reshape(r,r)
np.fill_diagonal(distanceMatrix,1)
distanceMatrix


array([[     1,  62713, 257069, ...,  38737, 153934, 239018],
       [ 62713,      1, 194549, ...,  74778, 216606, 176575],
       [257069, 194549,      1, ..., 263797, 410410,  18277],
       ...,
       [ 38737,  74778, 263797, ...,      1, 159119, 246386],
       [153934, 216606, 410410, ..., 159119,      1, 392249],
       [239018, 176575,  18277, ..., 246386, 392249,      1]])

In [5]:
distanceMatrix[1021,1031]

74357

### Job matrix

In [6]:
jobMatrix = np.tile(initDataDF['J'].to_numpy(),(r,1))
jobMatrix


array([[ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       ...,
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6]])

### Job by Distance Square matrix

In [7]:
tPropJByDSqMatrix = jobMatrix/np.multiply(distanceMatrix,distanceMatrix)
np.fill_diagonal(tPropJByDSqMatrix,0)
tPropJByDSqMatrix


array([[0.00000000e+00, 8.12958235e-07, 5.09015198e-08, ...,
        6.82347455e-07, 1.23718773e-07, 3.18153788e-08],
       [1.86578911e-07, 0.00000000e+00, 8.88734748e-08, ...,
        1.83109068e-07, 6.24832698e-08, 5.82961514e-08],
       [1.11039703e-08, 8.44744518e-08, 0.00000000e+00, ...,
        1.47135625e-08, 1.74047923e-08, 5.44112233e-06],
       ...,
       [4.89019008e-07, 5.71788869e-07, 4.83381986e-08, ...,
        0.00000000e+00, 1.15787221e-07, 2.99409972e-08],
       [3.09676748e-08, 6.81463223e-08, 1.99707464e-08, ...,
        4.04402155e-08, 0.00000000e+00, 1.18133922e-08],
       [1.28444789e-08, 1.02547472e-07, 1.00697884e-05, ...,
        1.68665201e-08, 1.90537746e-08, 0.00000000e+00]])

### Sum of Job by Distance Square matrix

In [8]:
tPropMatrixSum = np.tile(tPropJByDSqMatrix.sum(axis=1),(r,1)).T
tPropMatrixSum

array([[0.00263169, 0.00263169, 0.00263169, ..., 0.00263169, 0.00263169,
        0.00263169],
       [0.00343079, 0.00343079, 0.00343079, ..., 0.00343079, 0.00343079,
        0.00343079],
       [0.00368247, 0.00368247, 0.00368247, ..., 0.00368247, 0.00368247,
        0.00368247],
       ...,
       [0.00088496, 0.00088496, 0.00088496, ..., 0.00088496, 0.00088496,
        0.00088496],
       [0.00068259, 0.00068259, 0.00068259, ..., 0.00068259, 0.00068259,
        0.00068259],
       [0.00333467, 0.00333467, 0.00333467, ..., 0.00333467, 0.00333467,
        0.00333467]])

### Ti propotion matrix

In [9]:
tPropMatrix = tPropJByDSqMatrix/tPropMatrixSum
tPropMatrix

array([[0.00000000e+00, 3.08911137e-04, 1.93417641e-05, ...,
        2.59281128e-04, 4.70111565e-05, 1.20893355e-05],
       [5.43836820e-05, 0.00000000e+00, 2.59046789e-05, ...,
        5.33722984e-05, 1.82125099e-05, 1.69920563e-05],
       [3.01536102e-06, 2.29396299e-05, 0.00000000e+00, ...,
        3.99557111e-06, 4.72639344e-06, 1.47757494e-03],
       ...,
       [5.52586981e-04, 6.46116163e-04, 5.46217199e-05, ...,
        0.00000000e+00, 1.30838495e-04, 3.38330514e-05],
       [4.53682082e-05, 9.98356045e-05, 2.92575076e-05, ...,
        5.92456530e-05, 0.00000000e+00, 1.73068350e-05],
       [3.85179585e-06, 3.07518840e-05, 3.01972304e-03, ...,
        5.05792352e-06, 5.71383628e-06, 0.00000000e+00]])

### Ti matrix 
`number of people traveling out of region-i`

In [10]:
TiMatrix = np.tile(initDataDF['T'].to_numpy(),(r,1)).T
TiMatrix

array([[ 660.42,  660.42,  660.42, ...,  660.42,  660.42,  660.42],
       [2877.57, 2877.57, 2877.57, ..., 2877.57, 2877.57, 2877.57],
       [3027.42, 3027.42, 3027.42, ..., 3027.42, 3027.42, 3027.42],
       ...,
       [ 409.56,  409.56,  409.56, ...,  409.56,  409.56,  409.56],
       [1172.64, 1172.64, 1172.64, ..., 1172.64, 1172.64, 1172.64],
       [1635.84, 1635.84, 1635.84, ..., 1635.84, 1635.84, 1635.84]])

### Tij Matrix

In [11]:
TijMatrix = np.multiply(TiMatrix, tPropMatrix)
TijMatrix

array([[0.        , 0.20401109, 0.01277369, ..., 0.17123444, 0.03104711,
        0.00798404],
       [0.15649285, 0.        , 0.07454253, ..., 0.15358252, 0.05240777,
        0.04889583],
       [0.00912876, 0.06944789, 0.        , ..., 0.01209627, 0.01430878,
        4.47323992],
       ...,
       [0.22631752, 0.26462334, 0.02237087, ..., 0.        , 0.05358621,
        0.01385666],
       [0.05320058, 0.11707122, 0.03430852, ..., 0.06947382, 0.        ,
        0.02029469],
       [0.00630092, 0.05030516, 4.93978373, ..., 0.00827395, 0.00934692,
        0.        ]])

In [12]:
TijMatrix.sum(axis=0)[7]
initDataDF['N'][7] - TijMatrix.sum(axis=0)[7]

47484.0732845316

### Add Ni - Tij to diagonal elements

In [13]:
row,col = np.diag_indices_from(TijMatrix)
TijMatrix[row,col] = initDataDF['N'].to_numpy() - TijMatrix.sum(axis=0)
TijMatrix

array([[6.89933950e+03, 2.04011093e-01, 1.27736878e-02, ...,
        1.71234443e-01, 3.10471080e-02, 7.98403894e-03],
       [1.56492852e-01, 3.01057964e+04, 7.45425270e-02, ...,
        1.53582525e-01, 5.24077722e-02, 4.88958314e-02],
       [9.12876427e-03, 6.94478945e-02, 3.11619041e+04, ...,
        1.20962719e-02, 1.43087780e-02, 4.47323992e+00],
       ...,
       [2.26317524e-01, 2.64623336e-01, 2.23708716e-02, ...,
        1.00083826e+04, 5.35862141e-02, 1.38566645e-02],
       [5.32005756e-02, 1.17071223e-01, 3.43085238e-02, ...,
        6.94738226e-02, 2.84534246e+04, 2.02946870e-02],
       [6.30092172e-03, 5.03051619e-02, 4.93978373e+00, ...,
        8.27395362e-03, 9.34692194e-03, 1.68187806e+04]])

In [17]:
TijMatrix.sum(axis=0)[1024]

32994.99999999999

## Save output

In [15]:

data = []

for i in range(0,r):
    for j in range(0,r):
        Tij = TijMatrix[i][j]
        data.append({'iName': initDataDF.iloc[i]['name'], 'jName': initDataDF.iloc[j]['name'], 'Tij': Tij});
        
resultsDF = pd.DataFrame(data);

resultsDF.to_csv("../output/" + district + "_tmatrix_results.csv")


## Save top 5 

In [16]:
resultsDF["rank"] = resultsDF.groupby(['iName'])['Tij'].rank("dense", ascending=False)
top5resultsDF = resultsDF[resultsDF["rank"] <= 6]
top5resultsDF["jName_Tij"] = top5resultsDF["jName"] + " (" + top5resultsDF["Tij"].astype(str)  + ")"
top5resultsDFPivot = top5resultsDF.pivot(index="iName", columns="rank", values="jName_Tij")
top5resultsDFPivot.to_csv("../output/" + district + "_tmatrix_top5_regions.csv")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


## Old Version
### Function definition

def getTij(i,j,distanceMatrix, initDataDF, tPropMatrix):
    Ti = initDataDF.iloc[i]['T']

    if(i==j):
        return 0;
    
    Tjprop = tPropMatrix[i][j]
    TjpropSum = tPropMatrix[i].sum();
    ## TODO move this tPropMatrix computation.
    return Ti*Tjprop/TjpropSum;
    
    

### Compute TMatrix


data = []

for i in range(0,r):
    TijSum = 0
    for j in range(0,r):
        Tij = getTij(i,j,distanceMatrix, initDataDF, tPropMatrix)
        TijSum += Tij
        data.append({'iName': initDataDF.iloc[i]['name'], 'jName': initDataDF.iloc[j]['name'], 'Tij': Tij});
        
resultsDF = pd.DataFrame(data);

