In [105]:
import pandas as pd
import  requests
import json

In [106]:
incidents_json = requests.get("https://gitlab.com/drvicsana/cop-proyecto-2023/-/raw/main/project_data/incidentes2019.json")
incidents_db = pd.DataFrame(json.loads(incidents_json.text))

ntas_json = requests.get("https://gitlab.com/drvicsana/cop-proyecto-2023/-/raw/main/project_data/barrios.json")
ntas_db = pd.DataFrame(json.loads(ntas_json.text))

In [107]:
# Create a column with the number of units of each type needed per incident
types_units = set().union(*incidents_db["units"])
for unit in types_units:
    incidents_db[unit]=incidents_db["units"].apply(lambda x: x.count(unit))

#Change shift column type to integer: 0 = False, 1 = True
incidents_db["is_first_shift"]= incidents_db["is_first_shift"].astype(int)

#Compute the number per incidents per neighbourhood and shift
incidents_per_nta = incidents_db.groupby(["is_first_shift", "nta"]).size().reset_index(name="incidents_count")


In [108]:
""" 
    computeIncidents:
    row (tuple): shift, neighbourhood
    unit (string): vehicle type
    df (pandas.DataFrame): dataframe of incidents grouped by shift and neigbourhood where summation operator is computed
    returns (int) -> the vehicle needs of a type (unit) in a neighbourhood and shift 
"""
def computeIncidents(row,unit,df=incidents_db.groupby(["is_first_shift","nta"]).sum().reset_index()):
    shift,nta = row
    return df[unit].loc[(df["nta"]==nta) & (df["is_first_shift"]==shift)].values[0]

# Compute number of units needed in total per shift and neighbourhood
for unit in types_units:
    incidents_per_nta[unit]=incidents_per_nta[["is_first_shift","nta"]].apply(lambda row: 
                                                                        computeIncidents(row,unit),axis=1)

# Dictionary object with the number of incidents per shift
incidents_per_shift = dict(incidents_per_nta.groupby("is_first_shift").sum()["incidents_count"])
total_incidents = incidents_per_nta["incidents_count"].sum()

  def computeIncidents(row,unit,df=incidents_db.groupby(["is_first_shift","nta"]).sum().reset_index()):
  incidents_per_shift = dict(incidents_per_nta.groupby("is_first_shift").sum()["incidents_count"])


### First Normalization approach (observed joint probabilities)

$$
N_{ijk}*P_{ijk} = N_{ijk}*\frac{N_{ijk}}{\sum_{i=1}^{I} \sum_{j=1}^{J} \sum_{k=1}^{K} N_{ijk}}
$$

$P_{ijk}$    :_Probability of needing a vehicle of type k in a shift j in the neighbourhood i_


In [110]:
incidents_per_nta_normalized = incidents_per_nta.copy()
for unit in types_units:
    incidents_per_nta_normalized[unit+"_normalized"]=incidents_per_nta_normalized[unit]*incidents_per_nta_normalized[unit]/total_incidents
    
incidents_per_nta_normalized
    

Unnamed: 0,is_first_shift,nta,incidents_count,ladder,rescue,engine,hazardous,squad,ladder_normalized,rescue_normalized,engine_normalized,hazardous_normalized,squad_normalized
0,0,BK17,681,558,13,607,25,8,3.11364,0.00169,3.68449,0.00625,0.00064
1,0,BK21,537,512,9,487,29,8,2.62144,0.00081,2.37169,0.00841,0.00064
2,0,BK26,459,363,14,390,29,4,1.31769,0.00196,1.52100,0.00841,0.00016
3,0,BK27,505,464,9,436,20,4,2.15296,0.00081,1.90096,0.00400,0.00016
4,0,BK34,1186,1065,28,1008,76,17,11.34225,0.00784,10.16064,0.05776,0.00289
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,1,QN52,677,663,52,563,35,28,4.39569,0.02704,3.16969,0.01225,0.00784
85,1,QN55,2025,1904,164,1646,82,91,36.25216,0.26896,27.09316,0.06724,0.08281
86,1,QN62,556,513,43,409,19,26,2.63169,0.01849,1.67281,0.00361,0.00676
87,1,SI07,1689,1557,138,1336,68,77,24.24249,0.19044,17.84896,0.04624,0.05929


In [125]:
incidents_per_nta_normalized.to_json("incidents_nta_units_needed_normalized.json")

### Second Normalization (Bayes Rule)

$$
N_{ijk}*P_{(K=k|I=i,J=j)} = N_{ijk}*\frac{P_{(I=i,J=j|K=k)} \cdot P_{(K=k)}}{P_{(I=i,J=j)}}
$$ 


In [111]:
incidents_per_nta_bayes = incidents_per_nta.copy()

#### _Prior Probabilities (a priori probabilities to use a vehicle type k in an incident)_ 

$P_{(K=k)}$

In [112]:
prior_probabilities = incidents_per_nta_bayes[['ladder', 'rescue', 'engine', 'hazardous', 'squad']].sum() / incidents_per_nta_bayes['incidents_count'].sum()
prior_probabilities

ladder       0.91712
rescue       0.05654
engine       0.82819
hazardous    0.04643
squad        0.02876
dtype: float64

#### _Observed frequencies (times a vehicle k is needed on shift j per neighbourhood i)_

$F_{(K=k,I=i,J=j)}$

In [116]:
#Joint frequencies per shift and neighbourhood
frequencies = incidents_per_nta_bayes.groupby(['is_first_shift', 'nta']).sum()

#### _Apply Bayes Rule_


$P_{(K=k|I=i,J=j)}$

In [122]:
conditional_probabilities = pd.DataFrame(index=frequencies.index)

for category in types_units:
    conditional_probabilities[f'{category}'] = (
        frequencies[category] / frequencies['incidents_count']
    ) * prior_probabilities[category]

conditional_probabilities.reset_index(inplace=True)

conditional_probabilities

Unnamed: 0,is_first_shift,nta,ladder,rescue,engine,hazardous,squad
0,0,BK17,0.751473,0.001079,0.738196,0.001704,0.000338
1,0,BK21,0.874424,0.000948,0.751077,0.002507,0.000428
2,0,BK26,0.725304,0.001725,0.703691,0.002933,0.000251
3,0,BK27,0.842661,0.001008,0.715031,0.001839,0.000228
4,0,BK34,0.823552,0.001335,0.703892,0.002975,0.000412
...,...,...,...,...,...,...,...
84,1,QN52,0.898154,0.004343,0.688731,0.002400,0.001189
85,1,QN55,0.862319,0.004579,0.673186,0.001880,0.001292
86,1,QN62,0.846192,0.004373,0.609226,0.001587,0.001345
87,1,SI07,0.845445,0.004620,0.655099,0.001869,0.001311


In [121]:
conditional_probabilities.to_json("conditional_probabilities.json")

#### _Apply normalization_

$$
N_{ijk}*P_{(K=k|I=i,J=j)}
$$

In [124]:
for vehicle in types_units:
    incidents_per_nta_bayes[vehicle] *= conditional_probabilities[vehicle]
incidents_per_nta_bayes


Unnamed: 0,is_first_shift,nta,incidents_count,ladder,rescue,engine,hazardous,squad
0,0,BK17,681,419.321809,0.014031,448.084842,0.042612,0.002703
1,0,BK21,537,447.704852,0.008528,365.774663,0.072714,0.003428
2,0,BK26,459,263.285371,0.024143,274.439431,0.085071,0.001003
3,0,BK27,505,390.994589,0.009069,311.753676,0.036776,0.000911
4,0,BK34,1186,877.082995,0.037376,709.522803,0.226121,0.007008
...,...,...,...,...,...,...,...,...
84,1,QN52,677,595.476398,0.225826,387.755622,0.084013,0.033306
85,1,QN55,2025,1641.855851,0.750963,1108.063416,0.154171,0.117611
86,1,QN62,556,434.096319,0.188026,249.173474,0.030146,0.034967
87,1,SI07,1689,1316.357160,0.637506,875.211971,0.127112,0.100958


In [126]:
incidents_per_nta_bayes.to_json("incidents_nta_units_needed_normalized_bayes.json")

### Third approach: Normalize Bayes probabilities per vehicle type

condi