In [None]:
import pandas as pd
import numpy as np

import plotly.express as px

### Reading file where levesl of BCR::ABL1 is measured : 

In [None]:
pcr  =  pd.read_excel("../mergedPCR_0713.xls")

pcr.columns

Changing column names 

In [None]:
data = pcr[['SID1A', 'VISNAM1A', 'BCRIS1N']]

data.rename(columns = {'SID1A':'patient_id', 'VISNAM1A' : 'month', 'BCRIS1N' : 'BCRABL' }, inplace = True) 

data['month'].value_counts()

Changing the time values

In [None]:
data

In [None]:
replacements = {
    'V04- END OF M.03'      : '3',
    'V05- END OF M.06'      : '6',
    'V06- END OF M.09'      : '9', 
    'V07- END OF M.12'      : '12',
    'V08- END OF M.15'      : '15',
    'V09- END OF M.17'      : '17', 
    'V10- END OF M.18'      : '18', 
    'V11- END OF M.21'      : '21'
}

data['month'] = data['month'].replace(replacements)

data = data[data.month != 'V01- SCREENING']
data = data[data.month != 'UNSCHEDULED']
data = data[data.month != 'V12- EOS']
data = data[data.month != '21']


### Find patients we have cytof data from

In [None]:
data

In [None]:
cytof_patients = pd.read_csv("../patients.csv", header=None, names=['patient_number', 'patient_id', 'batch'])

Removing patients without cytofdata

In [None]:
cytof_patients

In [None]:
patient_ids = list(cytof_patients['patient_id'].unique())
data = data[data['patient_id'].isin(patient_ids)]


## Mapping patient ids to patient number

In [None]:
data = pd.merge(cytof_patients, data, on='patient_id' )

data


Removing patients that does not haveBCR::ABL measures at 18 months:

### INkluder de som er med til og med måned 3 og måned 6, for da kan du gjøre SL på de også 

In [None]:
unique_patients = data[['patient_id', 'patient_number']].drop_duplicates()
unique_patients

In [None]:
data_with_18_months = data[data['month']  == '18']

patients_18 = data_with_18_months['patient_id'].unique()

data = data[data['patient_id'].isin(patients_18)]

Setting month and response to numerical values 

In [None]:
data['BCRABL'] = pd.to_numeric(data['BCRABL'], errors='coerce')
data['month'] = pd.to_numeric(data['month'], errors='coerce')

In [None]:
data.to_csv("responses_all.csv", index=False)

### Plotting the response and the MR-4

In [None]:
import plotly.express as px

unique_batches = data['batch'].unique()
figures = []

# Create a scatter plot for each batch
for batch in unique_batches:
    df_subset = data[data['batch'] == batch]
    df_subset['BCRABL'] = df_subset['BCRABL'].replace(0, 0.001)
    
    # Create the line plot
    fig = px.line(df_subset, x='month', y='BCRABL', color='patient_number', log_y=True,
                  title=f'BCR-ABL% over Time by Patient for Batch {batch}', markers=True)
    
    # Update layout with custom axis titles and set y-axis to logarithmic
    fig.update_layout(
        yaxis_title='BCR-ABL%',
        xaxis_title='Test Time (Months)',
        yaxis_type='log',
        xaxis=dict(
            tickmode='array',
            tickvals=[3, 6, 9, 12, 15, 18],  # Explicitly set ticks for months 6 and 18
            ticktext=['3', '6', '9', '12', '15', '18']  # Custom labels for these ticks
        )
    )

    # Update traces to connect gaps
    fig.update_traces(connectgaps=True)
    
    # Add a horizontal line at y = 0.01 with annotation above the line
    fig.add_hline(y=0.01)
    
    # Add a horizontal line at y = 1 with annotation above the line
    fig.add_hline(y=1, line_dash="dash")

    figures.append(fig)


In [None]:
figures[0].show()

In [None]:
figures[1].show()

In [None]:
figures[2].show()

In [None]:
figures[3].show()

In [None]:
figures[4].show()

In [None]:
figures[5].show()

## Saving the response to plot into dataset

In [None]:
df_response_18 = data[data['month'] == 18]

df_response_18['BCRABL'].isna().sum()

In [None]:
df_response_6 = data[data['month'] == 6]

df_response_6
df_response_6 = df_response_6[['patient_number', 'BCRABL']]

df_response_6['CHR'] = df_response_6['BCRABL'].apply(lambda x: 0 if x > 1 else 1)

df_response_6['patient_number'].astype(str)

df_response_6.to_pickle('CHR_6months.pkl')

In [None]:
df_response_18 = data[data['month'] == 18]

df_response_18
df_response_18 = df_response_18[['patient_number', 'BCRABL']]

df_response_18['MR4'] = df_response_18['BCRABL'].apply(lambda x: 0 if x > 0.01 else 1)

df_response_18['patient_number'].astype(str)

df_response_18.to_pickle('MR4_18months.pkl')

In [None]:
df_response_6