In [None]:
import pandas as pd 
import numpy as np 

In [None]:
df = pd.DataFrame({'ascites': [0,1,0,1],
                   'edema': [0.5,0,1,0.5],
                   'stage': [3,4,3,4],
                   'cholesterol': [200.5,180.2,190.5,210.3]})
df

Unnamed: 0,ascites,edema,stage,cholesterol
0,0,0.5,3,200.5
1,1,0.0,4,180.2
2,0,1.0,3,190.5
3,1,0.5,4,210.3


In [None]:
df_stage = pd.get_dummies(data=df,columns=["stage"],dtype=np.float64)
df_stage[["stage_3","stage_4"]]
df_stage

Unnamed: 0,ascites,edema,cholesterol,stage_3,stage_4
0,0,0.5,200.5,1.0,0.0
1,1,0.0,180.2,0.0,1.0
2,0,1.0,190.5,1.0,0.0
3,1,0.5,210.3,0.0,1.0


In [None]:
df_stage_drop_first = df_stage.drop(columns="stage_3")
df_stage_drop_first

Unnamed: 0,ascites,edema,cholesterol,stage_4
0,0,0.5,200.5,0.0
1,1,0.0,180.2,1.0
2,0,1.0,190.5,0.0
3,1,0.5,210.3,1.0


In [None]:
Let's say we fit the hazard function
𝜆(𝑡,𝑥)=𝜆0(𝑡)𝑒^𝜃𝑇𝑋𝑖

So that we have the coefficients 𝜃
for the features in 𝑋𝑖

If you have a new patient, let's predict their hazard 𝜆(𝑡,𝑥)

In [None]:
lambda_0 = 1
coef = np.array([0.5,2.])
coef

array([0.5, 2. ])

In [None]:
X = pd.DataFrame({'age': [20,30,40],
                  'cholesterol': [180,220,170]
                 })
X

Unnamed: 0,age,cholesterol
0,20,180
1,30,220
2,40,170


In [None]:
np.dot(coef,X.T)

array([370., 455., 360.])

In [None]:
lambdas = lambda_0 * np.exp(np.dot(coef,X.T))
patients_df = X.copy()
patients_df['hazards'] = lambdas
patients_df

Unnamed: 0,age,cholesterol,hazards
0,20,180,4.886054e+160
1,30,220,4.0178090000000006e+197
2,40,170,2.218265e+156


In [None]:
df = pd.DataFrame({'time': [2,4,2,4,2,4,2,4],
                   'event': [1,1,1,1,0,1,1,0],
                   'risk_score': [20,40,40,20,20,40,40,20] 
                  })
df

Unnamed: 0,time,event,risk_score
0,2,1,20
1,4,1,40
2,2,1,40
3,4,1,20
4,2,0,20
5,4,1,40
6,2,1,40
7,4,0,20


In [None]:
A pair may be permissible if at least one patient is not censored.
If both pairs of patients are censored, then they are definitely not a permissible pair

In [None]:
pd.concat([df.iloc[0:1],df.iloc[1:2]],axis=0)

Unnamed: 0,time,event,risk_score
0,2,1,20
1,4,1,40


In [None]:
if df["event"][0] == 1 or df["event"][1] == 1:
      print(f"May be a permissible pair: 0 and 1")
else:
    print(f"Definitely not permissible pair: 0 and 1")

May be a permissible pair: 0 and 1


In [None]:
If both patients had an event (neither one was censored). This is definitely a permissible pair.

In [None]:
if df['event'][0] == 1 and df['event'][1] == 1:
    print(f"Definitely a permissible pair: 0 and 1")
else:
    print(f"May be a permissible pair: 0 and 1")

Definitely a permissible pair: 0 and 1


In [None]:
If we know that one patient was censored and one had an event, 
then we can check if censored patient's time is at least as great as the uncensored patient's time.
 If so, it's a permissible pair as well

In [None]:
if df['time'][7] >= df['time'][6]:
    print(f"Permissible pair: Censored patient 7 lasted at least as long as uncensored patient 6")
else:
    print("Not a permisible pair")

Permissible pair: Censored patient 7 lasted at least as long as uncensored patient 6
