In [None]:
import pandas as pd

The dataset contains cases from a study that was conducted between 1958 and 1970 at the University of Chicago's Billings Hospital on the survival of patients who had undergone surgery for breast cancer.

In [None]:
address = '../input/habermans-survival-data-set/haberman.csv'
df = pd.read_csv(address)

In [None]:
df

In [None]:
df.info()

We can find the data description in : https://www.kaggle.com/gilsousa/habermans-survival-data-set. So, let's try to change the column names

- 0 : Age of patient at time of operation (numerical)
- 1: Patient's year of operation (year - 1900, numerical)
- 2: Number of positive axillary nodes detected (numerical)
- 3: Survival status (class attribute)
1 = the patient survived 5 years or longer
2 = the patient died within 5 year


In [None]:
df.rename(columns = {'30':'Age', '64':'Op_Year', '1':'axil_nodes', '1.1':'Status'}, inplace=True)

In [None]:
df['Op_Year'].value_counts().plot(kind='bar')

In [None]:
df['axil_nodes'].value_counts().plot(kind='bar')

In [None]:
df['Status'].value_counts().plot(kind='bar')

### Survival Analysis

In [None]:
!pip install lifelines

### Kaplan Meier Estimator

In [None]:
from lifelines import KaplanMeierFitter

In [None]:
kapmei = KaplanMeierFitter() 

In [None]:
kapmei.fit(df['Age'],df['Status'], label='Kaplan Meier Estimation').plot(ci_show=False)

#### With grouping

In [None]:
group1 = (df['axil_nodes'] >= 1) 

In [None]:
group2 = (df['axil_nodes'] < 1)  

In [None]:
Age = df.Age
Status = df.Status

In [None]:
kapmei.fit(Age[group1],Status[group1], label='Positive axillary detected')
kapmei1 = kapmei.plot()
kapmei.fit(Age[group2],Status[group2], label='No positive axillary nodes detected ')
kapmei.plot(ax = kapmei1)

### Cox model

In [None]:
from lifelines import CoxPHFitter

In [None]:
cox = CoxPHFitter()

In [None]:
cox.fit(df, 'Age', event_col='Status').plot()