<a href="https://colab.research.google.com/github/Kasaligan/Personal_methods/blob/main/target_correlation_filter().ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#The following method eliminates atributes from a dataset if the absolute value of the Pearson correlation (R) with target variable is below a certain threshold.
#Example: if threshold is set to 0.3, all atributes whose R correlation with target variable in the range (-0.3,0.3) will be erased. 
#Requires pandas library.

def target_correlation_filter(df,target, threshold=0,axis=0,inplace=False,show_correlations=False,show_erased_atributes=False): 
  #Parameters description: 
  #   - df: base dataframe 
  #   - target: target dataframe to correlate with
  #   - threshold: R threshold below which the method will filter atributes. Mustt be in range 0<=threshold<=1 
  #   - inplace: Boolean. False will return processed dataframe, True will overwrite base dataframe
  #   - show_correlations: shows the correlations with target for each variable
  #   - show_erased_atributes: shows the erased atributes based on the NaN percentage threshold

  correlation_method='pearson'    #correlation method for corrwith is defined here, this function could be modified to allow different correlation methods

  if type(inplace)!=bool:                           #checks if inplace parameter is boolean
    print('Inplace parameter must be boolean.')
    return
  if abs(threshold)>1:                              #checks if threshold is in correct range
    print('Threshold value must be between -1 and 1.')
    return


  correlation_percentage=df.corrwith(target,axis=axis,method=correlation_method)  #calculate correlation vector with target variable
  if show_correlations==True:
    print('Correlations with target: \n', correlation_percentage)                   #show correlations with target
  correlation_percentage=abs(correlation_percentage)                              #aply absolute value to the correlation vector
  threshold_index=correlation_percentage[correlation_percentage<threshold].index  #recover index of atributes with absolute correlations below threshold
  if show_erased_atributes==True:
    print('The following parameters have been erased: ',threshold_index)            #show erased parameters
  if inplace==True:         #overwrites base dataset or returns result dataset acording to inplace parameter 
    df.drop(threshold_index,axis=1,inplace=inplace)
  elif inplace==False:
    return df.drop(threshold_index,axis=1,inplace=inplace)
