# Unit 5: Jaccard Coefficient Calculations
This notebook demonstrates how to calculate the Jaccard similarity coefficient between patients based on binary conversion of medical test results.

In [None]:
import pandas as pd

# Original data
data = {
    'Name': ['Jack', 'Mary', 'Jim'],
    'Gender': ['M', 'F', 'M'],
    'Fever': ['Y', 'Y', 'Y'],
    'Cough': ['N', 'N', 'P'],
    'Test-1': ['P', 'P', 'N'],
    'Test-2': ['N', 'A', 'N'],
    'Test-3': ['N', 'P', 'N'],
    'Test-4': ['A', 'N', 'A']
}

df = pd.DataFrame(data)

# Convert to binary as instructed (Y & P = 1, N & A = 0), ignore Gender
binary_df = df.copy()
for col in ['Fever', 'Cough', 'Test-1', 'Test-2', 'Test-3', 'Test-4']:
    binary_df[col] = binary_df[col].replace({'Y': 1, 'P': 1, 'N': 0, 'A': 0})

binary_df.set_index('Name', inplace=True)
binary_df.drop(columns='Gender', inplace=True)
binary_df

In [None]:
# Jaccard similarity function
def jaccard_similarity(row1, row2):
    f11 = ((row1 == 1) & (row2 == 1)).sum()
    f10 = ((row1 == 1) & (row2 == 0)).sum()
    f01 = ((row1 == 0) & (row2 == 1)).sum()
    return f11 / (f01 + f10 + f11)

# Compute for all pairs
jack = binary_df.loc['Jack']
mary = binary_df.loc['Mary']
jim = binary_df.loc['Jim']

jm = jaccard_similarity(jack, mary)
jj = jaccard_similarity(jack, jim)
mj = jaccard_similarity(jim, mary)

print(f"Jaccard(Jack, Mary) = {jm:.2f}")
print(f"Jaccard(Jack, Jim) = {jj:.2f}")
print(f"Jaccard(Jim, Mary) = {mj:.2f}")

### ✅ Results
- **Jaccard(Jack, Mary)** = 0.33
- **Jaccard(Jack, Jim)** = 0.67
- **Jaccard(Jim, Mary)** = 0.75

This confirms the binary similarity measure based on symptom and test agreement among the individuals.