In [1]:
import numpy as np;
import pandas as pd;


## Initialize Parameters

In [2]:
## parameters and global variables

district = "all"

zetaPanchayat = 0.1
zetaMunicipality = 0.2
zetaCorporation = 0.3


muForSmall = 0.09
muForMedium = 0.04
muForLarge = 0.02



## Load and Initialize Data

In [3]:
## Load population data
initDataDF = pd.read_csv("../data/" + district + "_init_data.csv")

## Load distance data
distanceData = pd.read_csv("../data/" + district + "_distance_matrix.csv")

## Load area data
areaData = pd.read_csv("../data/" + district + "_area_data.csv")

## Number of regions
r = len(initDataDF.index)

## Sort population data and distance data by name for easy matrix transformation
initDataDF.sort_values(by=['name'], inplace=True)
distanceData.sort_values(by=['fromName', 'toName'], inplace=True)
areaData.sort_values(by=['name'], inplace=True)


## Initialize Job 
initDataDF.loc[initDataDF['type']=='P','J'] = initDataDF[initDataDF['type']=='P']['N']*zetaPanchayat
initDataDF.loc[initDataDF['type']=='M','J'] = initDataDF[initDataDF['type']=='M']['N']*zetaMunicipality
initDataDF.loc[initDataDF['type']=='C','J'] = initDataDF[initDataDF['type']=='C']['N']*zetaCorporation

## Initialize T
areaData['areaSqKm'] = areaData['area']/(1000*1000)

initDataDF.loc[areaData['areaSqKm'] <= 25,'T'] = initDataDF[areaData['areaSqKm'] <= 25]['N']*muForSmall
initDataDF.loc[(areaData['areaSqKm'] > 25) & (areaData['areaSqKm'] <= 100),'T'] = initDataDF[(areaData['areaSqKm'] > 25) & (areaData['areaSqKm'] <= 100)]['N']*muForMedium
initDataDF.loc[areaData['areaSqKm'] > 100,'T'] = initDataDF[areaData['areaSqKm'] > 100]['N']*muForLarge

initDataDF.tail(15)

Unnamed: 0,name,type,S,E,I,H,R,N,J,T
1019,Vettikkavala__Kollam,P,36204,0,0,0,0,36204,3620.4,1448.16
1020,Vettom__Malappuram,P,6577,0,0,0,0,6577,657.7,591.93
1021,Vettoor__Thiruvananthapuram,P,18704,0,0,0,0,18704,1870.4,1683.36
1022,Vijayapuram__Kottayam,P,30838,0,0,0,0,30838,3083.8,2775.42
1023,Vilakudi__Kollam,P,32995,0,0,0,0,32995,3299.5,2969.55
1024,Vilappil__Thiruvananthapuram,P,36212,0,0,0,0,36212,3621.2,3259.08
1025,Vilavoorkal__Thiruvananthapuram,P,31761,0,0,0,0,31761,3176.1,2858.49
1026,Vilayoor__Palakkad,P,23389,0,0,0,0,23389,2338.9,2105.01
1027,Villiyappally__Kozhikode,P,34502,0,0,0,0,34502,3450.2,3105.18
1028,Vithura__Thiruvananthapuram,P,26249,0,0,0,0,26249,2624.9,524.98


## Matrix computation of TMatrix

### Prepare Distance matrix

In [5]:
distanceMatrix = np.array(distanceData['value'].to_numpy()).reshape(r,r)
np.fill_diagonal(distanceMatrix,1)
distanceMatrix


array([[     1,  62713, 257069, ...,  38737, 153934, 239018],
       [ 62713,      1, 194549, ...,  74778, 216606, 176575],
       [257069, 194549,      1, ..., 263797, 410410,  18277],
       ...,
       [ 38737,  74778, 263797, ...,      1, 159119, 246386],
       [153934, 216606, 410410, ..., 159119,      1, 392249],
       [239018, 176575,  18277, ..., 246386, 392249,      1]])

### Job matrix

In [6]:
jobMatrix = np.tile(initDataDF['J'].to_numpy(),(r,1))
jobMatrix


array([[ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       ...,
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6],
       [ 733.8, 3197.3, 3363.8, ..., 1023.9, 2931.6, 1817.6]])

### Job by Distance Square matrix

In [7]:
tPropJByDSqMatrix = jobMatrix/np.multiply(distanceMatrix,distanceMatrix)
np.fill_diagonal(tPropJByDSqMatrix,0)
tPropJByDSqMatrix


array([[0.00000000e+00, 8.12958235e-07, 5.09015198e-08, ...,
        6.82347455e-07, 1.23718773e-07, 3.18153788e-08],
       [1.86578911e-07, 0.00000000e+00, 8.88734748e-08, ...,
        1.83109068e-07, 6.24832698e-08, 5.82961514e-08],
       [1.11039703e-08, 8.44744518e-08, 0.00000000e+00, ...,
        1.47135625e-08, 1.74047923e-08, 5.44112233e-06],
       ...,
       [4.89019008e-07, 5.71788869e-07, 4.83381986e-08, ...,
        0.00000000e+00, 1.15787221e-07, 2.99409972e-08],
       [3.09676748e-08, 6.81463223e-08, 1.99707464e-08, ...,
        4.04402155e-08, 0.00000000e+00, 1.18133922e-08],
       [1.28444789e-08, 1.02547472e-07, 1.00697884e-05, ...,
        1.68665201e-08, 1.90537746e-08, 0.00000000e+00]])

### Sum of Job by Distance Square matrix

In [8]:
tPropMatrixSum = np.tile(tPropJByDSqMatrix.sum(axis=1),(r,1)).T
tPropMatrixSum

array([[0.00263165, 0.00263165, 0.00263165, ..., 0.00263165, 0.00263165,
        0.00263165],
       [0.00343072, 0.00343072, 0.00343072, ..., 0.00343072, 0.00343072,
        0.00343072],
       [0.00368204, 0.00368204, 0.00368204, ..., 0.00368204, 0.00368204,
        0.00368204],
       ...,
       [0.00088493, 0.00088493, 0.00088493, ..., 0.00088493, 0.00088493,
        0.00088493],
       [0.00068257, 0.00068257, 0.00068257, ..., 0.00068257, 0.00068257,
        0.00068257],
       [0.00333401, 0.00333401, 0.00333401, ..., 0.00333401, 0.00333401,
        0.00333401]])

### Ti propotion matrix

In [9]:
tPropMatrix = tPropJByDSqMatrix/tPropMatrixSum
tPropMatrix

array([[0.00000000e+00, 3.08915258e-04, 1.93420221e-05, ...,
        2.59284587e-04, 4.70117837e-05, 1.20894968e-05],
       [5.43848084e-05, 0.00000000e+00, 2.59052155e-05, ...,
        5.33734038e-05, 1.82128871e-05, 1.69924082e-05],
       [3.01571068e-06, 2.29422900e-05, 0.00000000e+00, ...,
        3.99603444e-06, 4.72694151e-06, 1.47774628e-03],
       ...,
       [5.52608643e-04, 6.46141491e-04, 5.46238610e-05, ...,
        0.00000000e+00, 1.30843624e-04, 3.38343777e-05],
       [4.53689850e-05, 9.98373139e-05, 2.92580086e-05, ...,
        5.92466674e-05, 0.00000000e+00, 1.73071313e-05],
       [3.85256101e-06, 3.07579928e-05, 3.02032291e-03, ...,
        5.05892828e-06, 5.71497134e-06, 0.00000000e+00]])

### Ti matrix 
`number of people traveling out of region-i`

In [10]:
TiMatrix = np.tile(initDataDF['T'].to_numpy(),(r,1)).T
TiMatrix

array([[ 660.42,  660.42,  660.42, ...,  660.42,  660.42,  660.42],
       [2877.57, 2877.57, 2877.57, ..., 2877.57, 2877.57, 2877.57],
       [3027.42, 3027.42, 3027.42, ..., 3027.42, 3027.42, 3027.42],
       ...,
       [ 409.56,  409.56,  409.56, ...,  409.56,  409.56,  409.56],
       [1172.64, 1172.64, 1172.64, ..., 1172.64, 1172.64, 1172.64],
       [1635.84, 1635.84, 1635.84, ..., 1635.84, 1635.84, 1635.84]])

### Tij Matrix

In [11]:
TijMatrix = np.multiply(TiMatrix, tPropMatrix)
TijMatrix

array([[0.        , 0.20401381, 0.01277386, ..., 0.17123673, 0.03104752,
        0.00798415],
       [0.15649609, 0.        , 0.07454407, ..., 0.15358571, 0.05240886,
        0.04889684],
       [0.00912982, 0.06945595, 0.        , ..., 0.01209767, 0.01431044,
        4.47375863],
       ...,
       [0.2263264 , 0.26463371, 0.02237175, ..., 0.        , 0.05358831,
        0.01385721],
       [0.05320149, 0.11707323, 0.03430911, ..., 0.06947501, 0.        ,
        0.02029503],
       [0.00630217, 0.05031516, 4.94076502, ..., 0.0082756 , 0.00934878,
        0.        ]])

### Add Ni - Tij to diagonal elements

In [12]:
row,col = np.diag_indices_from(TijMatrix)
TijMatrix[row,col] = initDataDF['N'].to_numpy() - TijMatrix.sum(axis=1)
TijMatrix

array([[6.67758000e+03, 2.04013815e-01, 1.27738582e-02, ...,
        1.71236727e-01, 3.10475222e-02, 7.98414545e-03],
       [1.56496093e-01, 2.90954300e+04, 7.45440709e-02, ...,
        1.53585706e-01, 5.24088576e-02, 4.88968441e-02],
       [9.12982284e-03, 6.94559477e-02, 3.06105800e+04, ...,
        1.20976746e-02, 1.43104373e-02, 4.47375863e+00],
       ...,
       [2.26326396e-01, 2.64633709e-01, 2.23717485e-02, ...,
        9.82944000e+03, 5.35883146e-02, 1.38572077e-02],
       [5.32014866e-02, 1.17073228e-01, 3.43091112e-02, ...,
        6.94750121e-02, 2.81433600e+04, 2.02950345e-02],
       [6.30217340e-03, 5.03151550e-02, 4.94076502e+00, ...,
        8.27559724e-03, 9.34877871e-03, 1.65401600e+04]])

## Save output

In [13]:

data = []

for i in range(0,r):
    for j in range(0,r):
        Tij = TijMatrix[i][j]
        data.append({'iName': initDataDF.iloc[i]['name'], 'jName': initDataDF.iloc[j]['name'], 'Tij': Tij});
        
resultsDF = pd.DataFrame(data);

resultsDF.to_csv("../output/" + district + "_tmatrix_results.csv")


## Save top 5 

In [14]:
resultsDF["rank"] = resultsDF.groupby(['iName'])['Tij'].rank("dense", ascending=False)
top5resultsDF = resultsDF[resultsDF["rank"] <= 6]
top5resultsDF["jName_Tij"] = top5resultsDF["jName"] + " (" + top5resultsDF["Tij"].astype(str)  + ")"
top5resultsDFPivot = top5resultsDF.pivot(index="iName", columns="rank", values="jName_Tij")
top5resultsDFPivot.to_csv("../output/" + district + "_tmatrix_top5_regions.csv")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


## Old Version
### Function definition

def getTij(i,j,distanceMatrix, initDataDF, tPropMatrix):
    Ti = initDataDF.iloc[i]['T']

    if(i==j):
        return 0;
    
    Tjprop = tPropMatrix[i][j]
    TjpropSum = tPropMatrix[i].sum();
    ## TODO move this tPropMatrix computation.
    return Ti*Tjprop/TjpropSum;
    
    

### Compute TMatrix


data = []

for i in range(0,r):
    TijSum = 0
    for j in range(0,r):
        Tij = getTij(i,j,distanceMatrix, initDataDF, tPropMatrix)
        TijSum += Tij
        data.append({'iName': initDataDF.iloc[i]['name'], 'jName': initDataDF.iloc[j]['name'], 'Tij': Tij});
        
resultsDF = pd.DataFrame(data);

