In [1]:
import pandas as pd
import numpy as np

**There are two methods to calculate Spearman's correlation depending on whether: <br/>**
**1. data does not have tied ranks or <br/>**
**2. data has tied ranks. <br/>**

# Data does not have tied ranks.

In [2]:
english = [56,75,45,71,62,64,58,80,76,61]
maths = [66,70,40,60,65,56,59,77,67,63]

In [3]:
marks = pd.DataFrame()
marks["english"] = english
marks["maths"] = maths
marks.head()

Unnamed: 0,english,maths
0,56,66
1,75,70
2,45,40
3,71,60
4,62,65


In [61]:
def rank(feature):
    sorted_feature = sorted(list(set(list(feature))), reverse=True)
    sorted_feature_dict = {key: value for value, key in enumerate(sorted_feature, start=1)}
    rank_feature = list(feature.map(sorted_feature_dict))
    rank_feature_df = pd.DataFrame(rank_feature, columns=[feature.name+"_rank"])
    return rank_feature_df

def difference_squared(df):
    df["d"] = (df.iloc[:,0] - df.iloc[:,1]).abs()
    df["d^2"] = df["d"]**2
    d_squa_sum = df["d^2"].sum()
    length = len(df["d^2"])
    return df, d_squa_sum, length

def spearman_corr_coeff(d_square_sum, n):
    numerator = (6*d_square_sum)
    denominator = (n*((n**2)-1))
    coeff = 1-(numerator/denominator)
    return coeff

def get_rank_df(columns_name, df):
    """
    params: df is the dataframe, 
            columns_name is list of column names for which rank needs to be computed.
    returns df along with computed ranks for the columns.
    """ 
    # computing rank
    features_list = [df[cols] for cols in columns_name]
    rank_feature_list = list(map(rank, features_list))
    
    rank_df = pd.concat(rank_feature_list, axis=1)
    
    # computing difference and difference squared
    differenc_squared_df, d_sq_sum, n = difference_squared(rank_df)
    
    # compute spearman correlation coeffecient
    sp_coeff = spearman_corr_coeff(d_sq_sum, n)
    
    frames = [df, differenc_squared_df]
    final_rank_df = pd.concat(frames, axis=1)
    
    return final_rank_df, sp_coeff

In [62]:
columns_name = ["english", "maths"]
result_df, spearman_coeff = get_rank_df(columns_name, marks)
print(result_df)
print("\nSpearman Correlation Coefficient",spearman_coeff)

   english  maths  english_rank  maths_rank  d  d^2
0       56     66             9           4  5   25
1       75     70             3           2  1    1
2       45     40            10          10  0    0
3       71     60             4           7  3    9
4       62     65             6           5  1    1
5       64     56             5           9  4   16
6       58     59             8           8  0    0
7       80     77             1           1  0    0
8       76     67             2           3  1    1
9       61     63             7           6  1    1

Spearman Correlation Coefficient 0.6727272727272727
