In [None]:
###=================================== Importing the required Modules ======================================================###
import numpy as np 
import pandas as pd 
import pydicom # this one is to read the dicom files 
import os 
import scipy.ndimage
import matplotlib.pyplot as plt 
import sklearn
from sklearn.preprocessing import normalize
from tqdm.auto import tqdm 


import torch
import torch.nn as nn
import torch.nn.functional as F

from skimage import measure, morphology 
#from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from sklearn.preprocessing import normalize

#from mpl_toolkits.mplot3d.art3d import Poly3DCollection 
from torch.utils.data import DataLoader 
from torch.utils.data import TensorDataset 

In [None]:
### Listing the number of ct scan in the train folder of dataset 
TRAIN_FOLDER = '../data/train'




# Load the scans in given folder path 

def load_scan(path): # Here path == (../input/osic-pulmonary-fibrosis-progression/train/patientId)
	try:
		slices = [pydicom.dcmread(path+os.sep+s) for s in os.listdir(path)]
		slices.sort(key = lambda x:float(x.ImagePositionPatient[2]))
	except:
		files = os.listdir(path)
		files.sort()
		slices = [pydicom.dcmread(path+os.sep+s) for s in files]

	# 	try:
	# 		slice_thickness = np.abs(slices[0].ImagePositionPatient[2]-\
	# 								slices[1].ImagePositionPatient[2])
	# 	except:
	# 		slice_thickness = np.abs(slices[0].SliceLocation - \
	# 								slices[1].SliceLocation)

	# 	for s in slices:
	# 		s.SliceThickness = slice_thickness         
	return slices 


### Load the slices of all patient and make a dict
'''
patientIDs = ['ID00007637202177411956430', 'ID00009637202177434476278']
'''
def make_dict_with_slices(patientIDs): # this will be very huge memory consuming . don't use it 
	patient_slices_dict = {}
	failed_slices_patiendIDs = []

	for patientID in patientIDs:
		path = TRAIN_FOLDER + os.sep + patientID

		try:
			patient_slices_dict[patientID] = load_scan(path)
		except:
			failed_slices_patiendIDs.append(patientID)

	return patient_slices_dict, failed_slices_patiendIDs 





####================= Taking the data into Hounsfield Unit (HU) =============
'''
here slices is a pydicom.dcmread() file combinations 
'''
def get_pixels_hu(slices):
	image = np.stack([s.pixel_array for s in slices])
	# Convert to int16
	image = image.astype(np.int16)
	try:
		# Set outside-of-scan pixel to 0 
		image[image <= -2000] = 0
		# Convert to Hounsfield units (HU)
		for slice_number in range(len(slices)):
			intercept = slices[slice_number].RescaleIntercept
			slope = slices[slice_number].RescaleSlope

			if slope !=1:
				image[slice_number] =slope *image[slice_number].astype(np.float64)
				image[slice_number] = image[slice_number].astype(np.int16)
			image[slice_number] += np.int16(intercept)
	except:
		print('HU conversion Failed!!')

	# here return a 3d numpy array 
	return np.array(image, dtype = np.int16)


#### ============================ visualization of data =========================
'''
Here input is either pydicom.dcmread class or numpy3d array 
'''
def plot_show_slice(slices):
	if not isinstance(slices, type(np.array([]))):
		first_patient_pixels = get_pixels_hu(slices) ## conversion to np array and HU unit 
	else:
		first_patient_pixels = slices 

	print('Number of Total Slices in this Scan:', len(slices))
	try:
		print('Shape of the the Image is:', slices.shape[1], slices.shape[2])
	except:
		print('Shape of the the Image is:BLANK')
	fig = plt.figure(figsize=(10,10))  
	for i,slice in enumerate(first_patient_pixels[:16]):
		y=fig.add_subplot(4,4, i+1)
		y.imshow(slice, cmap='gray')
	plt.show()

#### =================== Make the array of desired shape ======================
'''
Here input slices are numpy array not a pydicom.dcmread class 
returned a numpy 3darray 
'''
def resize_along_zaxis(slices, target_dimension=30):
	present_dimension = len(slices)
	if target_dimension == present_dimension:
		return slices

	zoom_factor = float(target_dimension)/float(present_dimension)
	resize_image=scipy.ndimage.zoom(slices, [zoom_factor, 1., 1.])
	
	return resize_image

####### ================= Making array of desird shape along all axis ============= 
'''
input: 3d numpy array
output: 3d numpy array 

'''

def resize_along_allaxis(slices, target_dimensionZ=30, target_dimensionY= 100, target_dimensionX= 100):
	present_dimensionZ, present_dimensionY, present_dimensionX = slices.shape[0], slices.shape[1] ,slices.shape[2]
	if target_dimensionZ == present_dimensionZ and\
            target_dimensionY == present_dimensionY and target_dimensionX == present_dimensionX:        
		return slices
	zoom_factorZ = float(target_dimensionZ)/float(present_dimensionZ)
	zoom_factorY = float(target_dimensionY)/float(present_dimensionY)
	zoom_factorX = float(target_dimensionX)/float(present_dimensionX)
                         
	resize_image=scipy.ndimage.zoom(slices, [zoom_factorZ,zoom_factorY , zoom_factorX], mode='nearest')
	
	return resize_image


MIN_BOUND = -1000.0
MAX_BOUND = 400.0
    
def image_normalize(image):
    image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
    image[image>1] = 1.
    image[image<0] = 0.
    return image
##### ======================== Saving the data in npy format =========================== ### 
'''
patient_folder = './osic-pulmonary-fibrosis-progression/train'
			### where the patientId folder and their slices in it 
output_folder = 'Relative path with respect to PWD where data will be saved'
'''
def save_array(patientID_folder, output_folder,Z=100,Y=200,X=200): #patientID_folder, output_folder
	#TRAIN_FOLDER = './osic-pulmonary-fibrosis-progression/train'
	patientIDs = os.listdir(patientID_folder)
	patientIDs.sort()
	Save_dir = output_folder # './trainset'
	if not os.path.exists(Save_dir):
		print('The output directory doesnt exists')
		raise Exception 
		
	for i,patientID in enumerate(patientIDs):
		path = TRAIN_FOLDER + os.sep + patientIDs[i]
		slices = load_scan(path)
		try:
			image_array = get_pixels_hu(slices) # HU unit conversion + nparray
			ctimage_resizedAll = resize_along_allaxis(image_array, target_dimensionX=X,
													target_dimensionY=Y,target_dimensionZ=Z)
			image=(image_normalize(ctimage_resizedAll)*255.0).astype('uint8')
# 			#saving the array 
# 			if Normalize:
# 				npSlices= []
# 				for npSlice in ctimage_resizedAll:
# 					npSlices.append(normalize(npSlice))
# 				ctimage_resizedAll = np.array(npSlices)
            
			np.save(Save_dir+os.sep+patientID+'.npy', image)
		except:
			print('PatientId:%s couldnt be save and converted'%(patientID))

### ========================== Loading the data from npy format ====================== #### 
def load_array(path): ### path='./traindataset/ID00052637202186188008618.npy'
	try:
		if(path.endswith('.npy')):
			image_array = np.load(path)
		else:
			path= path+'.npy'
			image_array = np.load(path)
	except:
		
		print('The file in the Path:%s doesnetexists!!'%(path.split('\\')[-1].split('/')[-1].split('.')[0]))
		return []
	return image_array

### ========================== Loading the data from npy format ====================== ####
def read_image(dir_name,patientid,Z=100,Y=200,X=200): #patientID_folder, output_folder
	path = dir_name + os.sep + patientid
	slices = load_scan(path)
	try:
		image_array = get_pixels_hu(slices) # HU unit conversion + nparray
		ctimage_resizedAll = resize_along_allaxis(image_array, target_dimensionX=X,
												target_dimensionY=Y,target_dimensionZ=Z+10)
		#saving the array 
		ctimage = ctimage_resizedAll[5:105][:][:]
		image=(image_normalize(ctimage)*255.0).astype('uint8')
		return image
	except:
		print('PatientId:%s couldnt be converted'%(patientid))

In [None]:
def csv_preprocess (data):
    
    #Healthy FVC
    data['Healthy-FVC']=round((data['FVC']*100)/data['Percent'])
    FE=[]
    FE.append('Healthy-FVC')
    
    #Create Male, Female, Ex-smoker, Current-smoker and Never smoked
    COLS = ['Sex','SmokingStatus']
    for col in COLS:
        for mod in data[col].unique():
            FE.append(mod)
            data[mod] = (data[col] == mod).astype(int)
    
    data =  data[['Patient','Weeks','FVC','Age']+FE]
    data = data.sort_values(['Patient','Weeks'], ascending=True).reset_index(drop=True)
    
    FE1=['Male','Female','Ex-smoker','Never smoked','Currently smokes']
    #Rename base_Weeks and base_FVC
    rename_col={'Weeks':'base_Weeks','FVC':'base_FVC'}
    data=data.rename(columns=rename_col)
    
    #Weeks biasing Week=-12 to Week =0 and Week = 133 to Week = 145
    #data.base_Weeks+=t_min_week
    #Add new fields Week and actual_FCV
    npData=pd.DataFrame(columns=['Patient','base_Weeks','base_FVC','Age']+FE1+['Week','Healthy-FVC','actual_FVC'])
    
    for pid in data['Patient'].unique():
        weeks=data.loc[data['Patient']==pid].base_Weeks
        fvc = data.loc[data['Patient']==pid].base_FVC
        index = data.loc[data['Patient']==pid].index
        weeks.reset_index(inplace = True, drop = True)
        fvc.reset_index(inplace = True, drop = True)
        for k in range(len(weeks)):
            npData=pd.concat([npData,data.loc[data.index==index[0]]], sort=False)
            npData.iloc[-1, npData.columns.get_loc('Week')]=weeks[k]
            npData.iloc[-1, npData.columns.get_loc('actual_FVC')]=fvc[k]
    npData.reset_index(inplace = True, drop = True)
    npData=npData.fillna(0)
    
#     npData['Week']=npData['Week']-npData['base_Weeks']
#     npData['base_Weeks']=0.0
    #Random Shuffle
    #npData=sklearn.utils.shuffle(npData)
    #npData.reset_index(inplace = True, drop = True)
    
    return npData

In [None]:
#################################################### Network Model########################################
class Flatten(nn.Module):
	def forward(self, input):
		return input.view(input.size(0), -1)

class ds_3d_conv(nn.Module):
	def __init__(self, nin, nout, kernel_size, padding, kernels_per_layer):
		super(ds_3d_conv, self).__init__()
		self.depthwise = nn.Conv3d(nin, nin * kernels_per_layer, kernel_size=kernel_size, padding=padding, groups=nin)
		self.pointwise = nn.Conv3d(nin * kernels_per_layer, nout, kernel_size=1)

	def forward(self, x):
		out = self.depthwise(x)
		out = self.pointwise(out)
		return out

# class SIGMA(nn.Module):
# 	def __init__(self):
# 		super(SIGMA, self).__init__()
# 		self.data_net1=nn.Sequential(
# 						nn.Linear(42,64),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(64,118),
# 						nn.ReLU()
# 						#nn.Dropout(0.5)
# 						)
# 		self.data_net2=nn.Sequential(
# 						nn.Linear(128,256),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(256,502),
# 						nn.ReLU()
# 						#nn.Dropout(0.5)
# 						)
# 		self.data_net3=nn.Sequential(
# 						nn.Linear(512,256),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(256,118),
# 						nn.ReLU()
# 						#nn.Dropout(0.5)
# 						)
# 		self.data_net4=nn.Sequential(
# 						nn.Linear(780,256),
# 						nn.ReLU(),
# 						nn.Linear(256,64),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(64,2),
# 						nn.ReLU()
# 						)

# 	def forward(self, data_i, image_o):
# 		x = torch.cat((data_i, image_o), dim=-1)
# 		out1 = self.data_net1(x)
# 		out2 = torch.cat((data_i,out1), dim=-1)
# 		out2 = self.data_net2(out2)
# 		out3 = torch.cat((data_i,out2), dim=-1)
# 		out3 = self.data_net3(out3)
# 		out4 = torch.cat((x,out1,out2,out3), dim=-1)
# 		out = self.data_net4(out4)
# 		return out

# class DATA(nn.Module):
# 	def __init__(self):
# 		super(DATA, self).__init__()
# 		self.data_net1=nn.Sequential(
# 						nn.Linear(41,100),
# 						nn.ReLU(),
# 						#nn.Dropout(0.5),
# 						nn.Linear(100,100),
# 						nn.ReLU()
# 						#nn.Dropout(0.5)
# 						)
# 		self.data_net2=nn.Sequential(
# 						nn.Linear(100,3),
# 						nn.ReLU()
# 						)
        
# 		self.data_net3=nn.Sequential(
# 						nn.Linear(100,3)
# 						)

# 	def forward(self, data_i):
# 		out1 = self.data_net1(data_i)
# 		out2 = self.data_net2(out1)
# 		out3 = self.data_net3(out1)
# 		out = out2 + out3
# 		return out


class IMAGE(nn.Module):
	def __init__(self, channel_number=[32, 64, 128, 256, 256, 64], output_dim=16, dropout=True):
		super(IMAGE, self).__init__()
		n_layer = len(channel_number)
		self.feature_extractor = nn.Sequential()
		for i in range(n_layer):
			if i == 0:
				in_channel = 1
			else:
				in_channel = channel_number[i-1]
			out_channel = channel_number[i]
			if i < n_layer-1:
				self.feature_extractor.add_module('conv_%d' % i,
												  self.conv_layer(in_channel,
																  out_channel,
																  maxpool=True,
																  kernel_size=3,
																  padding=1,
																  kernels_per_layer=1))
			else:
				self.feature_extractor.add_module('conv_%d' % i,
												  self.conv_layer(in_channel,
																  out_channel,
																  maxpool=False,
																  kernel_size=1,
																  padding=0,
																  kernels_per_layer=1))
		self.classifier = nn.Sequential()
		#avg_shape = [3, 3, 3]
		#self.classifier.add_module('average_pool', nn.AvgPool3d(avg_shape))
		if dropout is True:
			self.classifier.add_module('dropout', nn.Dropout(0.5))
		i = n_layer
		in_channel = channel_number[-1]
		out_channel = output_dim
		self.classifier.add_module('conv_%d' % i,
								   nn.Conv3d(in_channel, out_channel, padding=0, kernel_size=1)
								   #ds_3d_conv(in_channel, out_channel, kernel_size=1, padding=0, kernels_per_layer=1)
								  )
		self.flat=nn.Sequential(
			Flatten(),
			nn.Linear(1728,511),
			nn.ReLU()
		)
		self.f=nn.Sequential(
			nn.Linear(512,128),
			nn.ReLU(),
			nn.Linear(128,32),
			nn.ReLU(),
			nn.Linear(32,2),
			nn.ReLU()
		)
	@staticmethod
	def conv_layer(in_channel, out_channel, maxpool=True, kernel_size=3, padding=1, kernels_per_layer=1, maxpool_stride=2):
		if maxpool is True:
			layer = nn.Sequential(
				#nn.Conv3d(in_channel, out_channel, padding=padding, kernel_size=kernel_size),
				ds_3d_conv(in_channel, out_channel, kernel_size, padding, kernels_per_layer),
				nn.BatchNorm3d(out_channel),
				nn.MaxPool3d(2, stride=maxpool_stride),
				nn.ReLU(),
			)
		else:
			layer = nn.Sequential(
				#nn.Conv3d(in_channel, out_channel, padding=padding, kernel_size=kernel_size),
				ds_3d_conv(in_channel, out_channel, kernel_size, padding, kernels_per_layer),
				nn.BatchNorm3d(out_channel),
				nn.ReLU()
			)
		return layer

	def forward(self, image_i, data_i):
		image_o = self.feature_extractor(image_i)
		image_o = self.classifier(image_o)
		image_o = self.flat(image_o)
		image_o = torch.cat((image_o,data_i), dim=-1)
		image_o = self.f(image_o)
		return image_o
	
# class Combined_NET(nn.Module):
# 	def __init__(self):
# 		super(Combined_NET, self).__init__()
# 		self.image = IMAGE()
# 		self.data = SIGMA()
		
# 	def forward(self, image_i, data_i):
# 		image_o = self.image(image_i)
# 		data_o = self.data(data_i,image_o)
# 		#print(x.shape)
# 		return data_o

In [None]:
C1, C2 = torch.tensor(70, dtype=torch.float32), torch.tensor(1000, dtype= torch.float32)

def score(y_true, y_pred):
	# tf.dtypes.cast(y_true, tf.float32)
	# tf.dtypes.cast(y_pred, tf.float32)
	sigma = y_pred[:, 0]
	fvc_pred = y_pred[:, 1]
	
	#sigma_clip = sigma + C1
	#sigma_clip = tf.maximum(sigma, C1)
	c1_same_shape = torch.ones(sigma.size(),device=y_pred.device)*C1 
	sigma_clip = torch.max(sigma, c1_same_shape)
	delta = (y_true[:, 0] - fvc_pred).abs()

	c2_same_shape = torch.ones(delta.size(),device=y_pred.device)*C2 
	delta = torch.min(delta, c2_same_shape)

	sq2 = torch.tensor(2.).sqrt()
	metric = (delta / sigma_clip)*sq2 + (sigma_clip* sq2).log()
	return (metric).mean()



def quartile_loss(y_true, y_pred): # 0.65 
	#def loss(y_true, y_pred): # (batch_size, 1), (batch_size , 3)
	loss = score(y_true, y_pred) # 0.35
	return loss

In [None]:
# def train_combined_net(epochs, batch_size, npTrain, npValid, processed_img_dir, model, train_device ='cpu'):

# 	x_train_values_df = npTrain[['base_FVC','Age','Male','Female','Ex-smoker','Never smoked','Currently smokes','Week','Healthy-FVC']]

# 	x_train_values = x_train_values_df.values
# 	y_train_values = npTrain['actual_FVC'].values

# 	x_valid_values_df = npValid[['base_FVC','Age','Male','Female','Ex-smoker','Never smoked','Currently smokes','Week','Healthy-FVC']]

# 	x_valid_values = x_valid_values_df.values
# 	y_valid_values = npValid['actual_FVC'].values
	
# 	x_train_patientId_df = npTrain['Patient'] # df of only patientId 
# 	x_valid_patientId_df = npValid['Patient'] # df of only patientId 


# # 	abnormaltraindata=pd.DataFrame(columns=npTrain.columns)
# # 	abnormalvaliddata=pd.DataFrame(columns=npValid.columns)

# 	train_patientIDs_name = x_train_patientId_df.values 
# 	index_of_patientIDS_train = np.arange(len(train_patientIDs_name)) 



# 	valid_patientIDs_name = x_valid_patientId_df.values 
# 	index_of_patientIDS_valid = np.arange(len(valid_patientIDs_name))



# 	x_train_values = torch.tensor(x_train_values, dtype = torch.float)
# 	y_train_values = torch.tensor(y_train_values, dtype = torch.float).unsqueeze(1)
# 	index_of_patientIDS_train = torch.tensor(index_of_patientIDS_train, dtype= torch.int16)


# 	x_valid_values = torch.tensor(x_valid_values, dtype = torch.float)
# 	y_valid_values = torch.tensor(y_valid_values, dtype = torch.float).unsqueeze(1)
# 	index_of_patientIDS_valid = torch.tensor(index_of_patientIDS_valid, dtype= torch.int16)
    
# 	#print('train input',x_train_values.size(0))
# 	#print('train target: ',y_train_values.size(0))
# 	#print('train patient id: ',index_of_patientIDS_train.size(0))
# 	#print('valid input',x_valid_values.size(0))
# 	#print('valid target: ',y_valid_values.size(0))
# 	#print('valid patient id: ',index_of_patientIDS_valid.size(0))


# 	train_ds = TensorDataset(x_train_values, index_of_patientIDS_train , y_train_values)
# 	valid_ds = TensorDataset(x_valid_values, index_of_patientIDS_valid , y_valid_values)

# 	train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
# 	valid_dl = DataLoader(valid_ds , batch_size = batch_size)




# 	if train_device =='cuda':
# 		device = torch.device("cuda")
# 		model.to(device)


# 	torch.backends.cudnn.benchmark = True

# 	for epoch in range(epochs):

# 		train_acc_total = 0. 
# 		score_train_total = 0. 
# 		pbar = tqdm(train_dl)
# 		model.train()
# 		for i, (xb_meta, xb_patients_idxs, Y_target) in enumerate(pbar):     

# 			xb_patients_names = train_patientIDs_name[xb_patients_idxs.to('cpu').numpy()]
# 			xb_ct = []

# 			for patientId in xb_patients_names:
# 				path = processed_img_dir + os.sep + patientId
# 				image_3darray = load_array(path)
# 				xb_ct.append(image_3darray)

# 			xb_image =torch.tensor(xb_ct).float() 

# 			if train_device == 'cuda':
# 				xb_image = xb_image.cuda()
# 				xb_meta = xb_meta.cuda()
# 				Y_target = Y_target.cuda()
# 			xb_image = xb_image.unsqueeze(1)
# 			prediction = model(xb_image, xb_meta)


# 			loss = quartile_loss(Y_target, prediction)

# 			loss.backward()
# 			optimizer.step()


# 			with torch.no_grad():
# 				accuracy =(1- ((prediction[:,1]- Y_target)/Y_target).abs()).mean()
# 				score_train = score(Y_target, prediction).item()

# # 			if (accuracy.data.item() < 0.75 or score_train > 8.5 ):
# # 				abnormaltraindata=abnormaltraindata.append(npTrain.iloc[xb_patients_idxs.to('cpu').numpy()])
                
# 			s = ('Epochs: %5d/%d , Steps: %8d/%d , train_loss: %5.3f  ,trian_accuracy: %5.3f, score: %4.4f'%\
# 				(epoch, epochs, i, len(train_dl), loss.data.item(), accuracy.data.item(), score_train))
# 			pbar.set_description(s)  
# 			optimizer.zero_grad()
# 			train_acc_total += accuracy.data.item()*len(xb_meta)
# 			score_train_total += score_train * len(xb_meta)
# 			del prediction
			
# 		avg_train_acc = (train_acc_total)/len(train_ds) 
# 		avg_score_train = score_train_total/ len(train_ds)
# 		print('Average trianing accuracy:', avg_train_acc)
# 		print("Average training Score:", avg_score_train)

# 		#abnormaltraindata.to_csv('abnormaltraindata.csv',index=False)

	 
			 
			
			 


# 		## Validation
# 		score_valid_total = 0.
# 		val_acc_total = 0.
# 		pbar2 = tqdm(valid_dl)
# 		model.eval()
# 		for i, (xb_meta, xb_patients_idxs, Y_target) in enumerate(pbar2):  

# 			xb_patients_names = valid_patientIDs_name[xb_patients_idxs.to('cpu').numpy()]
# 			xb_ct = []
# 			#print(xb_patients_names)           
# 			for patientId in xb_patients_names:
# 				#print(patientId)
# 				path = processed_img_dir + os.sep + patientId
# 				image_3darray = load_array(path)
# 				xb_ct.append(image_3darray)

# 			xb_image =torch.tensor(xb_ct).float() 

# 			if train_device == 'cuda':
# 				xb_image = xb_image.cuda()
# 				xb_meta = xb_meta.cuda()
# 				Y_target = Y_target.cuda()
# 			xb_image = xb_image.unsqueeze(1)
# 			prediction = model(xb_image, xb_meta)


# 			loss = quartile_loss(Y_target, prediction)
			
# 			with torch.no_grad():
# 				accuracy =(1- ((prediction[:,1]- Y_target)/Y_target).abs()).mean()
# 				score_valid = score(Y_target, prediction).item()
                
# # 			if (accuracy.data.item() < 0.75 or score_valid > 8.5 ):
# # 				abnormalvaliddata=abnormalvaliddata.append(npValid.iloc[xb_patients_idxs.to('cpu').numpy()])
               
                
# 			s = ('Epochs: %5d/%d , Steps: %8d/%d ,Valid_loss: %5.3f  ,valid_accuracy: %5.3f, Score: %4.4f'%\
# 				  (epoch, epochs, i, len(valid_dl), loss.data.item(), accuracy.data.item(), score_valid))
# 			pbar2.set_description(s)  
			
# 			val_acc_total += accuracy.data.item()*len(xb_meta)
# 			score_valid_total += score_valid * len(xb_meta)
# 			del prediction
			
# 		avg_valid_acc = (val_acc_total)/len(valid_ds) 
# 		avg_score_valid = score_valid_total/ len(valid_ds)
# 		print('Average validation accuracy:', avg_valid_acc)
# 		print('Average validation score :', avg_score_valid)

# 		#abnormalvaliddata.to_csv('abnormalvaliddata.csv',index=False)


# 		PATH = 'Epoch'+'%s_'%epoch+'Score'+'%s_'%avg_score_valid+'Acc'+'%s'%avg_valid_acc + '.pth'
# 		torch.save(model.state_dict(), './model_weight'+os.sep+PATH)

In [None]:
#     return npEval
######### ----------------- Making the Evaluation Data -------------------- #######
def make_eval_data(npEval, model, device = 'cuda'):
    x_features = npEval[['Week']]
    x_features = torch.tensor(x_features.values).float()
    x_patientids_name = npEval[['Patient']].values
    
    unique_patients = npEval.Patient.unique()
    loaded_images = {}
    dir_name_of_patientid= '../input/osic-pulmonary-fibrosis-progression/train'
    
    for unique_patient in unique_patients:
        loaded_images[unique_patient] = read_image(dir_name_of_patientid, unique_patient, Z=100,Y=200,X=200)
    
    #y_labels = npEval['actual_FVC'].values 
    #patientsID = npEval['Patient'].values
    
    
    
    if torch.cuda.is_available() and device == 'cuda':
        model.to('cuda')
    model.eval()
    predictions = []
    #raw_submission_df = pd.DataFrame([]) 
    for i, patientid in enumerate(x_patientids_name):
        #x_image = read_image(dir_name_of_patientid, patientid,Z=100,Y=200,X=200, Normalize = True)
        x_image = loaded_images[patientid[0]]
#         x_image = np.zeros((100,200,200))
        x_image = torch.tensor(x_image, dtype = torch.float32).unsqueeze(0).unsqueeze(0) # one for channel another for batch 
        x_feature = x_features[i] # taking the feature for the particular patientid and week of npEval dataframe 
        x_feature = x_feature.unsqueeze(0)
        
        
        if torch.cuda.is_available() and device == 'cuda':
            x_image = x_image.cuda()
            x_feature = x_feature.cuda()
            
        prediction = model(x_image, x_feature)
        predictions.append(prediction.to('cpu').detach().numpy()[0]) # since it returns batched output , and we use batch_size =1, taking the [0] output 
    
    predictions = np.array(predictions) # shape = [none, 3]
    npEval['FVC'] = predictions[:,1]
    npEval['Confidence'] = predictions[:,0]
    
    return npEval

In [None]:
data_train = pd.read_csv("../input/osic-pulmonary-fibrosis-progression/train.csv")
data_test = pd.read_csv("../input/osic-pulmonary-fibrosis-progression/test.csv")
submission  = pd.read_csv("../input/osic-pulmonary-fibrosis-progression/sample_submission.csv")


In [None]:
submission['Patient']=submission['Patient_Week'].apply(lambda x:x.split('_')[0])
submission['Weeks']=submission['Patient_Week'].apply(lambda x:x.split('_')[1]).astype(int)
submission=submission.sort_values(by=['Patient','Weeks'], ascending=True ).reset_index(drop=True)


merge=pd.merge(data_test,submission,on=['Patient'],how='left').sort_values(['Patient','Weeks_y']).reset_index(drop=True)
merge=merge.drop(['FVC_y'],axis=1)
merge=merge.rename(columns={'FVC_x':'base_FVC','Weeks_y':'Week','Weeks_x':'base_Weeks'})

del data_test
del submission

data_test=merge.loc[:,['Patient','base_Weeks','base_FVC','Percent','Age','Sex','SmokingStatus','Week']]
submission=merge.loc[:,['Patient_Week','base_FVC','Confidence']]
submission=submission.rename(columns={'base_FVC':'FVC'})

In [None]:
data = data_test.copy()
data['Healthy-FVC']=round((data['base_FVC']*100)/data['Percent'])
FE=[]
FE.append('Healthy-FVC')

#Create Male, Female, Ex-smoker, Current-smoker and Never smoked
COLS = ['Sex','SmokingStatus']
for col in COLS:
    for mod in data[col].unique():
        FE.append(mod)
        data[mod] = (data[col] == mod).astype(int)
FE1=['Male','Female','Ex-smoker','Never smoked','Currently smokes']
npData=pd.DataFrame(columns=['Patient','base_Weeks','base_FVC','Age','Healthy-FVC']+FE1+['Week'])
npData=npData.append(data, sort=True)
npData=npData.fillna(0)

# npData['Week']=npData['Week']-npData['base_Weeks']
# npData['base_Weeks']=0.0

del data_test, data
data_test = npData[['Patient','base_Weeks','base_FVC','Age','Healthy-FVC','Male','Female','Ex-smoker','Never smoked','Currently smokes','Week']]
del npData   

In [None]:
# #train, valid, test = csv_split(data_train, v=15, t=0)

data_train.drop_duplicates(keep=False, inplace=True, subset=['Patient','Weeks'])
data_train=csv_preprocess(data_train)

# #npTrain=csv_preprocess(train)
# #npValid=csv_preprocess(valid)

In [None]:
# data=data_train[:1]
# print(data)

In [None]:
# min_week = min(data_train['Week'].min(), data_test['Week'].min())
# max_week = max(data_train['Week'].max(), data_test['Week'].max())

# min_age = min(data_train['Age'].min(), data_test['Age'].min()) 
# max_age = max(data_train['Age'].max(), data_test['Age'].max())

# min_Healthy_FVC = min(data_train['Healthy-FVC'].min(), data_test['Healthy-FVC'].min()) 
# max_Healthy_FVC = max(data_train['Healthy-FVC'].max(), data_test['Healthy-FVC'].max())

# min_FVC = min(data_train['base_FVC'].min(), data_test['base_FVC'].min()) 
# max_FVC = max(data_train['base_FVC'].max(), data_test['base_FVC'].max())

# def Normalize (data,min_age,max_age,min_week,max_week,min_FVC,max_FVC,min_Healthy_FVC,max_Healthy_FVC):
    
#     data['Age'] = (data['Age'] - min_age ) / ( max_age - min_age )
#     data['Week'] = (data['Week'] - min_week ) / ( max_week - min_week )
#     data['base_FVC'] = (data['base_FVC'] - min_FVC ) / ( max_FVC - min_FVC )
#     data['Healthy-FVC'] = (data['Healthy-FVC'] - min_Healthy_FVC ) / ( max_Healthy_FVC - min_Healthy_FVC )
    
#     return data

In [None]:
model = IMAGE()
model.load_state_dict(torch.load('../input/image-w/Epoch61_Score6.620608233458159_Acc0.9449220416166925.pth'))
#model.eval()
# for the validation inputset of sigma 
# ndf = pd.concat([npValid,npTest])
# outdf = make_eval_data(df_test, model)
# for the training inputset of sigma 
#train_inp_sigma = make_eval_data(df_train, model)
# for the final test inputset of sigma
test = make_eval_data(data_test.copy(), model)

In [None]:
# start = torch.cuda.Event(enable_timing=True)
# end = torch.cuda.Event(enable_timing=True)

# start.record()
# train = make_eval_data(data.copy(), model)
# end.record()
# torch.cuda.synchronize()
# print(start.elapsed_time(end))

In [None]:
# !pip install torch-summary

In [None]:
# from torchvision import models
# from torchsummary import summary


In [None]:
# summary(model, ((1, 100, 200, 200), (10,)))

In [None]:
# train=train.rename(columns={'Confidence':'pred_sigma','FVC':'pred_FVC'})

In [None]:
# train=train[['Patient','Week','actual_FVC','pred_FVC','pred_sigma']]

In [None]:
# train.head(8)

In [None]:
# train

In [None]:
# FVC and confidence correction at base week 
for nid in test.Patient.unique():
    index = test[(test.Patient==nid) & (test.Week==test.base_Weeks)].index.values
    test.iloc[index[0],test.columns.get_loc('FVC')]=test.iloc[index[0],test.columns.get_loc('base_FVC')]
    test.iloc[index[0],test.columns.get_loc('Confidence')] = 70

In [None]:
# # Week bias correction and confidence clipping at 70
# test.base_Weeks += -12
# test.Week += -12
test.loc[test.Confidence < 70, 'Confidence' ] = 70

In [None]:
submission.loc[:, 'FVC']=test.FVC
submission.loc[:, 'Confidence']= test.Confidence
submission.to_csv('submission.csv',index=False)

In [None]:
submission