# Dissimilarity marix for Binary Attributes

In [9]:
import pandas as pd
from sklearn.metrics import pairwise_distances

# Load the dataset
data = pd.read_csv("student-mat.csv")

# Select binary attributes (you need to identify which columns are binary)
binary_attributes = ["schoolsup", "famsup", "paid", "activities", "nursery", "higher", "internet", "romantic"]

# Convert binary attributes to numerical (0 or 1)
data[binary_attributes] = data[binary_attributes].apply(lambda x: pd.factorize(x)[0])

# Calculate Jaccard distance matrix
jaccard_matrix = 1 - pairwise_distances(data[binary_attributes], metric="hamming")
jaccard_df = pd.DataFrame(jaccard_matrix, columns=data.index, index=data.index)

print("Jaccard Distance Matrix Matrix for Binary Attributes:")
print(jaccard_df)


Jaccard Distance Matrix for Binary Attributes:
       0      1      2      3      4      5      6      7      8      9    \
0    1.000  0.500  0.750  0.250  0.625  0.375  0.750  0.875  0.500  0.375   
1    0.500  1.000  0.500  0.500  0.625  0.625  0.750  0.625  0.750  0.625   
2    0.750  0.500  1.000  0.500  0.625  0.625  0.750  0.625  0.750  0.625   
3    0.250  0.500  0.500  1.000  0.625  0.875  0.500  0.375  0.750  0.875   
4    0.625  0.625  0.625  0.625  1.000  0.750  0.625  0.750  0.875  0.750   
..     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
390  0.625  0.625  0.625  0.625  1.000  0.750  0.625  0.750  0.875  0.750   
391  0.625  0.875  0.625  0.375  0.500  0.500  0.875  0.500  0.625  0.500   
392  0.750  0.750  0.500  0.250  0.625  0.375  0.750  0.625  0.500  0.375   
393  0.625  0.875  0.625  0.375  0.500  0.500  0.875  0.500  0.625  0.500   
394  0.750  0.750  0.750  0.500  0.625  0.625  1.000  0.625  0.750  0.625   

     ...    385    386    38

# Dissimilarity marix for Numerical Attributes

In [10]:
from sklearn.metrics.pairwise import euclidean_distances

# Select numerical attributes
numerical_attributes = ["age", "absences", "G1", "G2", "G3"]

# Compute Euclidean distance matrix
numerical_matrix = pd.DataFrame(euclidean_distances(data[numerical_attributes], data[numerical_attributes]), 
                                columns=data.index, index=data.index)
print("Euclidean Distance Matrix for Numerical Attributes:")
print(numerical_matrix)


Euclidean Distance Matrix for Numerical Attributes:
           0          1          2          3          4          5    \
0     0.000000   2.449490   7.000000  16.431677   6.403124  16.792856   
1     2.449490   0.000000   8.306624  16.431677   6.557439  17.832555   
2     7.000000   8.306624   0.000000  13.747727   6.480741  11.789826   
3    16.431677  16.431677  13.747727   0.000000  11.269428   8.124038   
4     6.403124   6.557439   6.480741  11.269428   0.000000  12.922848   
..         ...        ...        ...        ...        ...        ...   
390   7.937254   9.949874   5.656854  14.247807   8.717798  11.180340   
391  17.058722  17.406895  14.212670   3.316625  11.747340   7.280110   
392   6.928203   7.211103  10.148892  12.727922   7.416198  14.560220   
393  11.135529  10.862780  11.874342   7.615773   7.000000  12.409674   
394   5.385165   6.244998   6.633250  11.618950   4.000000  12.449900   

           6          7          8          9    ...        385        

# Dissimilarity marix for Nominal Attributes

In [11]:
from scipy.spatial.distance import hamming

# Select nominal attributes (you need to identify which columns are nominal)
nominal_attributes = ["Mjob", "Fjob", "reason", "guardian"]

# Convert nominal attributes to one-hot encoded representation
one_hot_encoded = pd.get_dummies(data[nominal_attributes], prefix=nominal_attributes)

# Compute Hamming distance matrix
hamming_matrix = pd.DataFrame([[hamming(one_hot_encoded.iloc[i], one_hot_encoded.iloc[j]) 
                                for j in range(len(data))] for i in range(len(data))],
                              columns=data.index, index=data.index)
print("Hamming Distance Matrix for Nominal Attributes:")
print(hamming_matrix)


Hamming Distance Matrix for Nominal Attributes:
          0         1         2         3         4         5         6    \
0    0.000000  0.235294  0.235294  0.352941  0.470588  0.352941  0.352941   
1    0.235294  0.000000  0.235294  0.470588  0.235294  0.352941  0.352941   
2    0.235294  0.235294  0.000000  0.352941  0.352941  0.235294  0.235294   
3    0.352941  0.470588  0.352941  0.000000  0.352941  0.352941  0.235294   
4    0.470588  0.235294  0.352941  0.352941  0.000000  0.352941  0.117647   
..        ...       ...       ...       ...       ...       ...       ...   
390  0.352941  0.352941  0.470588  0.352941  0.470588  0.352941  0.470588   
391  0.235294  0.352941  0.352941  0.235294  0.470588  0.235294  0.352941   
392  0.352941  0.235294  0.352941  0.470588  0.235294  0.352941  0.235294   
393  0.235294  0.235294  0.235294  0.352941  0.352941  0.117647  0.235294   
394  0.352941  0.235294  0.470588  0.470588  0.235294  0.470588  0.352941   

          7         8      