In [None]:
import numpy as np 
import pandas as pd 
import pydicom # this one is to read the dicom files 
import os 
import scipy.ndimage
import matplotlib.pyplot as plt 
import sklearn
from sklearn.preprocessing import normalize
from tqdm.auto import tqdm 


import torch
import torch.nn as nn
import torch.nn.functional as F

from skimage import measure, morphology 
#from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from sklearn.preprocessing import normalize

#from mpl_toolkits.mplot3d.art3d import Poly3DCollection 
from torch.utils.data import DataLoader 
from torch.utils.data import TensorDataset 


In [None]:
def csv_preprocess (data):
    
    #Healthy FVC
    data['Healthy-FVC']=round((data['FVC']*100)/data['Percent'])
    FE=[]
    FE.append('Healthy-FVC')
    
    #Create Male, Female, Ex-smoker, Current-smoker and Never smoked
    COLS = ['Sex','SmokingStatus']
    for col in COLS:
        for mod in data[col].unique():
            FE.append(mod)
            data[mod] = (data[col] == mod).astype(int)
    
    data =  data[['Patient','Weeks','FVC','Age']+FE]
    data = data.sort_values(['Patient','Weeks'], ascending=True).reset_index(drop=True)
    
    FE1=['Male','Female','Ex-smoker','Never smoked','Currently smokes']
    #Rename base_Weeks and base_FVC
    rename_col={'Weeks':'base_Weeks','FVC':'base_FVC'}
    data=data.rename(columns=rename_col)
    
    #Weeks biasing Week=-12 to Week =0 and Week = 133 to Week = 145
    #data.base_Weeks+=t_min_week
    #Add new fields Week and actual_FCV
    npData=pd.DataFrame(columns=['Patient','base_Weeks','base_FVC','Age']+FE1+['Week','Healthy-FVC','actual_FVC'])
    
    for pid in data['Patient'].unique():
        weeks=data.loc[data['Patient']==pid].base_Weeks
        fvc = data.loc[data['Patient']==pid].base_FVC
        index = data.loc[data['Patient']==pid].index
        weeks.reset_index(inplace = True, drop = True)
        fvc.reset_index(inplace = True, drop = True)
        for k in range(len(weeks)):
            npData=pd.concat([npData,data.loc[data.index==index[0]]], sort=False)
            npData.iloc[-1, npData.columns.get_loc('Week')]=weeks[k]
            npData.iloc[-1, npData.columns.get_loc('actual_FVC')]=fvc[k]
    npData.reset_index(inplace = True, drop = True)
    npData=npData.fillna(0)
    
#     npData['Week']=npData['Week']-npData['base_Weeks']
#     npData['base_Weeks']=0.0
    #Random Shuffle
    #npData=sklearn.utils.shuffle(npData)
    #npData.reset_index(inplace = True, drop = True)
    
    return npData

In [None]:
# #################################################### Network Model########################################
# class DATA(nn.Module):
# 	def __init__(self):
# 		super(DATA, self).__init__()
# 		self.data_net1=nn.Sequential(
# 						nn.Linear(10,42),
# 						nn.ReLU(),            
# 						nn.Linear(42,64),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(64,118),
# 						nn.ReLU()
# 						#nn.Dropout(0.5)
# 						)
# 		self.data_net2=nn.Sequential(
# 						nn.Linear(128,256),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(256,502),
# 						nn.ReLU()
# 						#nn.Dropout(0.5)
# 						)
# 		self.data_net3=nn.Sequential(
# 						nn.Linear(512,256),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(256,118),
# 						nn.ReLU()
# 						#nn.Dropout(0.5)
# 						)
# 		self.data_net4=nn.Sequential(
# 						nn.Linear(748,256),
# 						nn.ReLU(),
# 						nn.Linear(256,64),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(64,2),
# 						nn.ReLU()
# 						)

# 	def forward(self, data_i):
# 		out1 = self.data_net1(data_i)
# 		out2 = torch.cat((data_i,out1), dim=-1)
# 		out2 = self.data_net2(out2)
# 		out3 = torch.cat((data_i,out2), dim=-1)
# 		out3 = self.data_net3(out3)
# 		out4 = torch.cat((data_i,out1,out2,out3), dim=-1)
# 		out = self.data_net4(out4)
# 		return out

#################################################### Network Model########################################
class DATA(nn.Module):
	def __init__(self):
		super(DATA, self).__init__()
		self.data_net1=nn.Sequential(
						nn.Linear(10,32),
						nn.ReLU(),            
						nn.Linear(32,64),
						nn.ReLU(),
						#nn.Dropout(0.5),
						nn.Linear(64,128),
						nn.ReLU(),
						nn.Linear(128,64),
						nn.ReLU(),
						nn.Linear(64,2),
						nn.ReLU()
						#nn.Dropout(0.5)
						)

	def forward(self, data_i):
		out1 = self.data_net1(data_i)
		return out1

In [None]:
C1, C2 = torch.tensor(70, dtype=torch.float32), torch.tensor(1000, dtype= torch.float32)

def score(y_true, y_pred):
	# tf.dtypes.cast(y_true, tf.float32)
	# tf.dtypes.cast(y_pred, tf.float32)
	sigma = y_pred[:, 0]
	fvc_pred = y_pred[:, 1]
	
	#sigma_clip = sigma + C1
	#sigma_clip = tf.maximum(sigma, C1)
	#c1_same_shape = torch.ones(sigma.size(),device=y_pred.device)*C1 
	#sigma_clip = torch.max(sigma, c1_same_shape)
	delta = (y_true[:, 0] - fvc_pred).abs()

	#c2_same_shape = torch.ones(delta.size(),device=y_pred.device)*C2 
	#delta = torch.min(delta, c2_same_shape)

	sq2 = torch.tensor(2.).sqrt()
	metric = (delta / sigma)*sq2 + (sigma* sq2).log()
	return (metric).mean()



def quartile_loss(y_true, y_pred): # 0.65 
	#def loss(y_true, y_pred): # (batch_size, 1), (batch_size , 3)
	loss = score(y_true, y_pred) # 0.35
	return loss

In [None]:
#     return npEval
######### ----------------- Making the Evaluation Data -------------------- #######
def make_eval_data(npEval, model, device = 'cuda'):
    x_features = npEval[['base_Weeks','base_FVC','Age','Male','Female','Ex-smoker','Never smoked','Currently smokes','Week','Healthy-FVC']]
    x_features = torch.tensor(x_features.values).float()
    x_patientids_name = npEval[['Patient']].values
    

    #y_labels = npEval['actual_FVC'].values 
    #patientsID = npEval['Patient'].values
    
    
    
    if torch.cuda.is_available() and device == 'cuda':
        model.to('cuda')
    model.eval()
    predictions = []
    #raw_submission_df = pd.DataFrame([]) 
    for i, patientid in enumerate(x_patientids_name):

        x_feature = x_features[i] # taking the feature for the particular patientid and week of npEval dataframe 
        x_feature = x_feature.unsqueeze(0)
        
        
        if torch.cuda.is_available() and device == 'cuda':
            x_feature = x_feature.cuda()
            
        prediction = model(x_feature)
        predictions.append(prediction.to('cpu').detach().numpy()[0]) # since it returns batched output , and we use batch_size =1, taking the [0] output 
    
    predictions = np.array(predictions) # shape = [none, 3]
    npEval['FVC'] = predictions[:,1]
    npEval['Confidence'] = predictions[:,0]
    
    return npEval

In [None]:
data_train = pd.read_csv("../input/osic-pulmonary-fibrosis-progression/train.csv")
data_test = pd.read_csv("../input/osic-pulmonary-fibrosis-progression/test.csv")
submission  = pd.read_csv("../input/osic-pulmonary-fibrosis-progression/sample_submission.csv")


In [None]:
submission['Patient']=submission['Patient_Week'].apply(lambda x:x.split('_')[0])
submission['Weeks']=submission['Patient_Week'].apply(lambda x:x.split('_')[1]).astype(int)
submission=submission.sort_values(by=['Patient','Weeks'], ascending=True ).reset_index(drop=True)


In [None]:
merge=pd.merge(data_test,submission,on=['Patient'],how='left').sort_values(['Patient','Weeks_y']).reset_index(drop=True)
merge=merge.drop(['FVC_y'],axis=1)
merge=merge.rename(columns={'FVC_x':'base_FVC','Weeks_y':'Week','Weeks_x':'base_Weeks'})

del data_test
del submission

data_test=merge.loc[:,['Patient','base_Weeks','base_FVC','Percent','Age','Sex','SmokingStatus','Week']]
submission=merge.loc[:,['Patient_Week','base_FVC','Confidence']]
submission=submission.rename(columns={'base_FVC':'FVC'})

In [None]:
data = data_test.copy()
data['Healthy-FVC']=round((data['base_FVC']*100)/data['Percent'])
FE=[]
FE.append('Healthy-FVC')

#Create Male, Female, Ex-smoker, Current-smoker and Never smoked
COLS = ['Sex','SmokingStatus']
for col in COLS:
    for mod in data[col].unique():
        FE.append(mod)
        data[mod] = (data[col] == mod).astype(int)
FE1=['Male','Female','Ex-smoker','Never smoked','Currently smokes']
npData=pd.DataFrame(columns=['Patient','base_Weeks','base_FVC','Age','Healthy-FVC']+FE1+['Week'])
npData=npData.append(data, sort=True)
npData=npData.fillna(0)

# npData['Week']=npData['Week']-npData['base_Weeks']
# npData['base_Weeks']=0.0

del data_test, data
data_test = npData[['Patient','base_Weeks','base_FVC','Age','Healthy-FVC','Male','Female','Ex-smoker','Never smoked','Currently smokes','Week']]
del npData   

In [None]:
model = DATA()
model.load_state_dict(torch.load('../input/meta-42/Epoch42_Score6.829206974134532_Acc0.9300930269189462.pth'))
#model.eval()
# for the validation inputset of sigma 
# ndf = pd.concat([npValid,npTest])
# outdf = make_eval_data(df_test, model)
# for the training inputset of sigma 
#train_inp_sigma = make_eval_data(df_train, model)
# for the final test inputset of sigma
test = make_eval_data(data_test.copy(), model)


In [None]:
# FVC and confidence correction at base week 
for nid in test.Patient.unique():
    index = test[(test.Patient==nid) & (test.Week==test.base_Weeks)].index.values
    test.iloc[index[0],test.columns.get_loc('FVC')]=test.iloc[index[0],test.columns.get_loc('base_FVC')]
    test.iloc[index[0],test.columns.get_loc('Confidence')] = 70

In [None]:
# # Week bias correction and confidence clipping at 70
# test.base_Weeks += -12
# test.Week += -12
test.loc[test.Confidence < 70, 'Confidence' ] = 70

In [None]:
submission.loc[:, 'FVC']=test.FVC
submission.loc[:, 'Confidence']= test.Confidence
submission.to_csv('submission.csv',index=False)