<a href="https://colab.research.google.com/github/rahulamatapu/Community-Perspectives/blob/master/ML_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## XG Boost

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import ndcg_score
import csv
from io import StringIO
import ast

# Read the file content
with open('community_focused_unbiased_data.csv', 'r') as file:
    content = file.read()

# Use the csv module to handle proper parsing of nested commas in ordered_pair column
data = list(csv.reader(StringIO(content)))

# Extract header and data
header = data[0]
data = data[1:]

# Create DataFrame
df = pd.DataFrame(data, columns=header)

# Strip whitespace from column names
df.columns = df.columns.str.strip()

# Convert numeric columns
df['population'] = pd.to_numeric(df['population'])
df['social_vulnerability_score'] = pd.to_numeric(df['social_vulnerability_score'])
df['access_to_resources'] = pd.to_numeric(df['access_to_resources'])

# Parse the ordered_pair string into a list
df['ordered_pair'] = df['ordered_pair'].apply(ast.literal_eval)

# Create label based on ordered_pair
df['label'] = (df['ordered_pair'].apply(lambda x: x[0]) == df['option1']).astype(int)

# Function to encode repair options
def encode_repair_option(option):
    action = 1 if 'Water' in option else 0
    facility = 2 if 'School' in option else (1 if 'Residential' in option else 0)
    community = int(option.split()[-1]) - 1
    return action, facility, community

# Encode options
df[['action_1', 'facility_1', 'community_1']] = df['option1'].apply(encode_repair_option).tolist()
df[['action_2', 'facility_2', 'community_2']] = df['option2'].apply(encode_repair_option).tolist()

# Prepare features
categorical_features = ['community', 'action_1', 'facility_1', 'community_1', 'action_2', 'facility_2', 'community_2']
numerical_features = ['population', 'social_vulnerability_score', 'access_to_resources']

# Encode categorical features
le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

# Normalize numerical features
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Prepare features and target
features = categorical_features + numerical_features
X = df[features]
y = df['label']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create DMatrix for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set parameters
params = {
    'objective': 'binary:logistic',
    'eta': 0.1,
    'max_depth': 6,
    'min_child_weight': 1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'tree_method': 'hist'
}

# Train the model
model = xgb.train(
    params,
    dtrain,
    num_boost_round=100,
    evals=[(dtest, 'eval')],
    early_stopping_rounds=10,
    verbose_eval=True
)

# Make predictions
y_pred = model.predict(dtest)

# Evaluate the model
from sklearn.metrics import accuracy_score, roc_auc_score
accuracy = accuracy_score(y_test, y_pred > 0.5)
auc = roc_auc_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(f"AUC: {auc}")

# Feature importance
print("\nFeature importance:")
importance = model.get_score(importance_type='gain')
for feature, score in sorted(importance.items(), key=lambda x: x[1], reverse=True):
    print(f"{feature}: {score}")

# Function to get ranking for all repair options
def get_overall_ranking(model, df, features):
    all_options = set(df['option1'].unique()) | set(df['option2'].unique())
    option_scores = {option: 0 for option in all_options}

    for _, row in df.iterrows():
        context = row[['community', 'population', 'social_vulnerability_score', 'access_to_resources']].values
        for option in all_options:
            action, facility, community = encode_repair_option(option)
            feature_values = np.concatenate([context, [action, facility, community, 0, 0, 0]])
            feature_dict = dict(zip(features, feature_values))
            score = model.predict(xgb.DMatrix(pd.DataFrame([feature_dict])))[0]
            option_scores[option] += score

    return sorted(option_scores.items(), key=lambda x: x[1], reverse=True)

# Get overall ranking
overall_ranking = get_overall_ranking(model, df, features)

print("\nOverall Rankings:")
for rank, (option, score) in enumerate(overall_ranking, 1):
    print(f"{rank}. {option} (Score: {score:.2f})")

[0]	eval-logloss:0.62777
[1]	eval-logloss:0.60607
[2]	eval-logloss:0.59064
[3]	eval-logloss:0.57637
[4]	eval-logloss:0.56190
[5]	eval-logloss:0.55219
[6]	eval-logloss:0.54112
[7]	eval-logloss:0.53236
[8]	eval-logloss:0.52409
[9]	eval-logloss:0.52107
[10]	eval-logloss:0.51364
[11]	eval-logloss:0.51038
[12]	eval-logloss:0.50555
[13]	eval-logloss:0.50064
[14]	eval-logloss:0.49641
[15]	eval-logloss:0.49419
[16]	eval-logloss:0.49069
[17]	eval-logloss:0.48804
[18]	eval-logloss:0.48642
[19]	eval-logloss:0.48520
[20]	eval-logloss:0.48347
[21]	eval-logloss:0.48130
[22]	eval-logloss:0.48113
[23]	eval-logloss:0.48017
[24]	eval-logloss:0.47952
[25]	eval-logloss:0.47869
[26]	eval-logloss:0.47807
[27]	eval-logloss:0.47824
[28]	eval-logloss:0.47804
[29]	eval-logloss:0.47756
[30]	eval-logloss:0.47682
[31]	eval-logloss:0.47662
[32]	eval-logloss:0.47651
[33]	eval-logloss:0.47630
[34]	eval-logloss:0.47541
[35]	eval-logloss:0.47533
[36]	eval-logloss:0.47562
[37]	eval-logloss:0.47567
[38]	eval-logloss:0.47

## XG Boost with Tuning

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
import xgboost as xgb
from sklearn.metrics import accuracy_score, roc_auc_score
import csv
from io import StringIO
import ast

# Read and preprocess data
with open('community_focused_unbiased_data.csv', 'r') as file:
    content = file.read()

data = list(csv.reader(StringIO(content)))
header, data = data[0], data[1:]
df = pd.DataFrame(data, columns=header)
df.columns = df.columns.str.strip()

df['population'] = pd.to_numeric(df['population'])
df['social_vulnerability_score'] = pd.to_numeric(df['social_vulnerability_score'])
df['access_to_resources'] = pd.to_numeric(df['access_to_resources'])

df['ordered_pair'] = df['ordered_pair'].apply(ast.literal_eval)
df['label'] = (df['ordered_pair'].apply(lambda x: x[0]) == df['option1']).astype(int)

def encode_repair_option(option):
    action = 1 if 'Water' in option else 0
    facility = 2 if 'School' in option else (1 if 'Residential' in option else 0)
    community = int(option.split()[-1]) - 1
    return action, facility, community

df[['action_1', 'facility_1', 'community_1']] = df['option1'].apply(encode_repair_option).tolist()
df[['action_2', 'facility_2', 'community_2']] = df['option2'].apply(encode_repair_option).tolist()

categorical_features = ['community', 'action_1', 'facility_1', 'community_1', 'action_2', 'facility_2', 'community_2']
numerical_features = ['population', 'social_vulnerability_score', 'access_to_resources']

le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

features = categorical_features + numerical_features
X = df[features]
y = df['label']

# Feature engineering
X['pop_vulnerability'] = X['population'] * X['social_vulnerability_score']
X['action1_vulnerability'] = X['action_1'] * X['social_vulnerability_score']
X['action2_vulnerability'] = X['action_2'] * X['social_vulnerability_score']
features = list(X.columns)
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning
param_grid = {
    'max_depth': [3, 4, 5, 6],
    'min_child_weight': [1, 2, 3],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

xgb_model = xgb.XGBClassifier(objective='binary:logistic', n_estimators=100, learning_rate=0.1)
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring='roc_auc', n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best parameters:", grid_search.best_params_)
print("Best AUC:", grid_search.best_score_)

# Evaluate on test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
test_auc = roc_auc_score(y_test, best_model.predict_proba(X_test)[:, 1])

print(f"Test Accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")

# Analyze misclassifications
misclassified = X_test[y_test != y_pred]
print("\nMisclassified samples:")
print(misclassified.describe())

# Get feature importance
importance = best_model.feature_importances_
feature_importance = sorted(zip(features, importance), key=lambda x: x[1], reverse=True)

print("\nFeature Importance:")
for feature, importance in feature_importance:
    print(f"{feature}: {importance:.4f}")

# Function to get ranking for all repair options
def get_overall_ranking(model, df, features):
    all_options = set(df['option1'].unique()) | set(df['option2'].unique())
    option_scores = {option: 0 for option in all_options}

    for _, row in df.iterrows():
        context = row[['community', 'population', 'social_vulnerability_score', 'access_to_resources']].values
        for option in all_options:
            action, facility, community = encode_repair_option(option)
            base_features = np.concatenate([context, [action, facility, community, 0, 0, 0]])

            # Calculate additional features
            pop_vulnerability = base_features[1] * base_features[2]  # population * social_vulnerability_score
            action1_vulnerability = action * base_features[2]  # action * social_vulnerability_score
            action2_vulnerability = 0 * base_features[2]  # We use 0 for action_2 as it's not the current option

            feature_values = np.concatenate([base_features, [pop_vulnerability, action1_vulnerability, action2_vulnerability]])
            feature_dict = dict(zip(features, feature_values))
            score = model.predict_proba(pd.DataFrame([feature_dict]))[0, 1]
            option_scores[option] += score

    return sorted(option_scores.items(), key=lambda x: x[1], reverse=True)

# Get overall ranking with the best model
overall_ranking = get_overall_ranking(best_model, df, X.columns)

print("\nOverall Rankings (Best Model):")
for rank, (option, score) in enumerate(overall_ranking, 1):
    print(f"{rank}. {option} (Score: {score:.2f})")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['pop_vulnerability'] = X['population'] * X['social_vulnerability_score']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['action1_vulnerability'] = X['action_1'] * X['social_vulnerability_score']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['action2_vulnerability'] = X['action_2'] * X['socia

Best parameters: {'colsample_bytree': 0.9, 'max_depth': 6, 'min_child_weight': 3, 'subsample': 0.9}
Best AUC: 0.8243760002517487
Test Accuracy: 0.7525
Test AUC: 0.8284752747252747

Misclassified samples:
       community   action_1  facility_1  community_1   action_2  facility_2  \
count  99.000000  99.000000   99.000000    99.000000  99.000000   99.000000   
mean    0.808081   0.525253    1.131313     0.434343   0.808081    1.151515   
std     0.804030   0.501903    0.527730     0.608806   0.395814    0.541420   
min     0.000000   0.000000    0.000000     0.000000   0.000000    0.000000   
25%     0.000000   0.000000    1.000000     0.000000   1.000000    1.000000   
50%     1.000000   1.000000    1.000000     0.000000   1.000000    1.000000   
75%     1.000000   1.000000    1.000000     1.000000   1.000000    1.000000   
max     2.000000   1.000000    2.000000     2.000000   1.000000    2.000000   

       community_2  population  social_vulnerability_score  \
count    99.000000   9

In [3]:
!pip install trueskill

Collecting trueskill
  Downloading trueskill-0.4.5.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: trueskill
  Building wheel for trueskill (setup.py) ... [?25l[?25hdone
  Created wheel for trueskill: filename=trueskill-0.4.5-py3-none-any.whl size=18048 sha256=3a34ecc320e2072fd9e914d8e2237b97e8f9a64d13d7951a3649d1494bb03a63
  Stored in directory: /root/.cache/pip/wheels/b9/4f/29/c79f0a2956775524c7a23638ac2b6fbb516c680f8e5eed9b53
Successfully built trueskill
Installing collected packages: trueskill
Successfully installed trueskill-0.4.5


## PageRank and TrueSkill


In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
import xgboost as xgb
from sklearn.metrics import accuracy_score, roc_auc_score
import csv
from io import StringIO
import ast
import networkx as nx
from trueskill import Rating, quality_1vs1, rate_1vs1
from scipy.stats import kendalltau

# Read and preprocess data
with open('community_focused_unbiased_data.csv', 'r') as file:
    content = file.read()

data = list(csv.reader(StringIO(content)))
header, data = data[0], data[1:]
df = pd.DataFrame(data, columns=header)
df.columns = df.columns.str.strip()

df['population'] = pd.to_numeric(df['population'])
df['social_vulnerability_score'] = pd.to_numeric(df['social_vulnerability_score'])
df['access_to_resources'] = pd.to_numeric(df['access_to_resources'])

df['ordered_pair'] = df['ordered_pair'].apply(ast.literal_eval)
df['label'] = (df['ordered_pair'].apply(lambda x: x[0]) == df['option1']).astype(int)

def encode_repair_option(option):
    action = 1 if 'Water' in option else 0
    facility = 2 if 'School' in option else (1 if 'Residential' in option else 0)
    community = int(option.split()[-1]) - 1
    return action, facility, community

df[['action_1', 'facility_1', 'community_1']] = df['option1'].apply(encode_repair_option).tolist()
df[['action_2', 'facility_2', 'community_2']] = df['option2'].apply(encode_repair_option).tolist()

categorical_features = ['community', 'action_1', 'facility_1', 'community_1', 'action_2', 'facility_2', 'community_2']
numerical_features = ['population', 'social_vulnerability_score', 'access_to_resources']

le = LabelEncoder()
for feature in categorical_features:
    df[feature] = le.fit_transform(df[feature])

scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

features = categorical_features + numerical_features
X = df[features]
y = df['label']

# Feature engineering
X['pop_vulnerability'] = X['population'] * X['social_vulnerability_score']
X['action1_vulnerability'] = X['action_1'] * X['social_vulnerability_score']
X['action2_vulnerability'] = X['action_2'] * X['social_vulnerability_score']
features = list(X.columns)

# Train XGBoost model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = xgb.XGBClassifier(objective='binary:logistic', n_estimators=100, learning_rate=0.1)
xgb_model.fit(X_train, y_train)

# PageRank ranking
def pagerank_ranking(df):
    G = nx.DiGraph()

    for _, row in df.iterrows():
        option1 = row['option1']
        option2 = row['option2']
        preference = row['label']

        if preference == 1:
            G.add_edge(option1, option2)
        else:
            G.add_edge(option2, option1)

    pagerank = nx.pagerank(G)
    return sorted(pagerank.items(), key=lambda x: x[1], reverse=True)

# TrueSkill ranking
def trueskill_ranking(df):
    ratings = {}

    for _, row in df.iterrows():
        option1 = row['option1']
        option2 = row['option2']
        preference = row['label']

        if option1 not in ratings:
            ratings[option1] = Rating()
        if option2 not in ratings:
            ratings[option2] = Rating()

        if preference == 1:
            ratings[option1], ratings[option2] = rate_1vs1(ratings[option1], ratings[option2])
        else:
            ratings[option2], ratings[option1] = rate_1vs1(ratings[option2], ratings[option1])

    return sorted(ratings.items(), key=lambda x: x[1].mu, reverse=True)

# XGBoost ranking
def get_xgboost_ranking(model, df, features):
    all_options = set(df['option1'].unique()) | set(df['option2'].unique())
    option_scores = {option: 0 for option in all_options}

    for _, row in df.iterrows():
        context = row[['community', 'population', 'social_vulnerability_score', 'access_to_resources']].values
        for option in all_options:
            action, facility, community = encode_repair_option(option)
            base_features = np.concatenate([context, [action, facility, community, 0, 0, 0]])

            pop_vulnerability = base_features[1] * base_features[2]
            action1_vulnerability = action * base_features[2]
            action2_vulnerability = 0 * base_features[2]

            feature_values = np.concatenate([base_features, [pop_vulnerability, action1_vulnerability, action2_vulnerability]])
            feature_dict = dict(zip(features, feature_values))
            score = model.predict_proba(pd.DataFrame([feature_dict]))[0, 1]
            option_scores[option] += score

    return sorted(option_scores.items(), key=lambda x: x[1], reverse=True)

# Calculate rankings
pagerank_results = pagerank_ranking(df)
trueskill_results = trueskill_ranking(df)
xgboost_results = get_xgboost_ranking(xgb_model, df, features)

# Print rankings
print("PageRank Rankings:")
for rank, (option, score) in enumerate(pagerank_results, 1):
    print(f"{rank}. {option} (Score: {score:.4f})")

print("\nTrueSkill Rankings:")
for rank, (option, rating) in enumerate(trueskill_results, 1):
    print(f"{rank}. {option} (Score: {rating.mu:.2f} ± {rating.sigma:.2f})")

print("\nXGBoost Rankings:")
for rank, (option, score) in enumerate(xgboost_results, 1):
    print(f"{rank}. {option} (Score: {score:.4f})")

# Calculate Kendall's Tau correlations
pagerank_order = [option for option, _ in pagerank_results]
trueskill_order = [option for option, _ in trueskill_results]
xgboost_order = [option for option, _ in xgboost_results]

tau_pr_ts, p_value_pr_ts = kendalltau(pagerank_order, trueskill_order)
tau_pr_xgb, p_value_pr_xgb = kendalltau(pagerank_order, xgboost_order)
tau_ts_xgb, p_value_ts_xgb = kendalltau(trueskill_order, xgboost_order)

print(f"\nKendall's Tau correlation between PageRank and TrueSkill rankings: {tau_pr_ts:.4f} (p-value: {p_value_pr_ts:.4f})")
print(f"Kendall's Tau correlation between PageRank and XGBoost rankings: {tau_pr_xgb:.4f} (p-value: {p_value_pr_xgb:.4f})")
print(f"Kendall's Tau correlation between TrueSkill and XGBoost rankings: {tau_ts_xgb:.4f} (p-value: {p_value_ts_xgb:.4f})")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['pop_vulnerability'] = X['population'] * X['social_vulnerability_score']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['action1_vulnerability'] = X['action_1'] * X['social_vulnerability_score']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['action2_vulnerability'] = X['action_2'] * X['socia

PageRank Rankings:
1. Repair Water School in Community 3 (Score: 0.1180)
2. Repair Water Residential in Community 3 (Score: 0.1180)
3. Repair Water Residential in Community 2 (Score: 0.1180)
4. Repair Power Residential in Community 3 (Score: 0.1180)
5. Repair Power Residential in Community 1 (Score: 0.1164)
6. Repair Power Residential in Community 2 (Score: 0.1164)
7. Repair Water Commercial in Community 2 (Score: 0.1143)
8. Repair Water Residential in Community 1 (Score: 0.1033)
9. Repair Water School in Community 1 (Score: 0.0778)

TrueSkill Rankings:
1. Repair Water School in Community 1 (Score: 29.35 ± 0.83)
2. Repair Water Residential in Community 2 (Score: 28.99 ± 0.82)
3. Repair Water Residential in Community 1 (Score: 28.70 ± 0.82)
4. Repair Water School in Community 3 (Score: 28.19 ± 0.81)
5. Repair Power Residential in Community 2 (Score: 25.72 ± 0.80)
6. Repair Water Residential in Community 3 (Score: 24.29 ± 0.79)
7. Repair Power Residential in Community 1 (Score: 22.55 ± 0

## RankNet and ListNet

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.stats import kendalltau

feature_names = ['community', 'population', 'social_vulnerability_score', 'access_to_resources',
                 'action_1', 'facility_1', 'community_1', 'action_2', 'facility_2', 'community_2']

# Prepare data
def prepare_data(df):
    X = df[feature_names]
    y = df['label']
    return X.values, y.values

X, y = prepare_data(df)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.FloatTensor(y_test)

# Dataset and DataLoader
class PairwiseDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = PairwiseDataset(X_train_tensor, y_train_tensor)
test_dataset = PairwiseDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# RankNet Model
class RankNet(nn.Module):
    def __init__(self, input_size):
        super(RankNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)

# ListNet Model
class ListNet(nn.Module):
    def __init__(self, input_size):
        super(ListNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)

# Training function
def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for X, y in train_loader:
            optimizer.zero_grad()
            outputs = model(X).squeeze()
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X, y in test_loader:
            outputs = model(X).squeeze()
            predicted = (outputs > 0.5).float()
            total += y.size(0)
            correct += (predicted == y).sum().item()
    return correct / total

# Train and evaluate RankNet
ranknet = RankNet(X_train_scaled.shape[1])
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(ranknet.parameters(), lr=0.001)

print("Training RankNet...")
train_model(ranknet, train_loader, criterion, optimizer, epochs=50)

ranknet_accuracy = evaluate_model(ranknet, test_loader)
print(f"RankNet Accuracy: {ranknet_accuracy:.4f}")

# Train and evaluate ListNet
listnet = ListNet(X_train_scaled.shape[1])
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(listnet.parameters(), lr=0.001)

print("\nTraining ListNet...")
train_model(listnet, train_loader, criterion, optimizer, epochs=50)

listnet_accuracy = evaluate_model(listnet, test_loader)
print(f"ListNet Accuracy: {listnet_accuracy:.4f}")

# Function to get ranking for all repair options
def get_nn_ranking(model, scaler, df, feature_names):
    all_options = set(df['option1'].unique()) | set(df['option2'].unique())
    option_scores = {option: 0 for option in all_options}

    for _, row in df.iterrows():
        context = row[['community', 'population', 'social_vulnerability_score', 'access_to_resources']].values
        for option in all_options:
            action, facility, community = encode_repair_option(option)
            feature_values = np.concatenate([context, [action, facility, community, 0, 0, 0]])
            feature_values_scaled = scaler.transform([feature_values])
            feature_values_tensor = torch.FloatTensor(feature_values_scaled)
            score = model(feature_values_tensor).item()
            option_scores[option] += score

    return sorted(option_scores.items(), key=lambda x: x[1], reverse=True)

# Get and print rankings
ranknet_rankings = get_nn_ranking(ranknet, scaler, df, feature_names)
listnet_rankings = get_nn_ranking(listnet, scaler, df, feature_names)

print("\nRankNet Rankings:")
for rank, (option, score) in enumerate(ranknet_rankings, 1):
    print(f"{rank}. {option} (Score: {score:.4f})")

print("\nListNet Rankings:")
for rank, (option, score) in enumerate(listnet_rankings, 1):
    print(f"{rank}. {option} (Score: {score:.4f})")

# Compare with previous rankings
ranknet_order = [option for option, _ in ranknet_rankings]
listnet_order = [option for option, _ in listnet_rankings]

for name, order in [("RankNet", ranknet_order), ("ListNet", listnet_order)]:
    tau_pr, p_value_pr = kendalltau(order, pagerank_order)
    tau_ts, p_value_ts = kendalltau(order, trueskill_order)
    tau_xgb, p_value_xgb = kendalltau(order, xgboost_order)

    print(f"\n{name} correlations:")
    print(f"Kendall's Tau correlation with PageRank: {tau_pr:.4f} (p-value: {p_value_pr:.4f})")
    print(f"Kendall's Tau correlation with TrueSkill: {tau_ts:.4f} (p-value: {p_value_ts:.4f})")
    print(f"Kendall's Tau correlation with XGBoost: {tau_xgb:.4f} (p-value: {p_value_xgb:.4f})")

Training RankNet...
Epoch 1/50, Loss: 0.6495
Epoch 2/50, Loss: 0.5885
Epoch 3/50, Loss: 0.5523
Epoch 4/50, Loss: 0.5362
Epoch 5/50, Loss: 0.5244
Epoch 6/50, Loss: 0.5164
Epoch 7/50, Loss: 0.5109
Epoch 8/50, Loss: 0.5036
Epoch 9/50, Loss: 0.4988
Epoch 10/50, Loss: 0.4951
Epoch 11/50, Loss: 0.4904
Epoch 12/50, Loss: 0.4884
Epoch 13/50, Loss: 0.4853
Epoch 14/50, Loss: 0.4827
Epoch 15/50, Loss: 0.4813
Epoch 16/50, Loss: 0.4768
Epoch 17/50, Loss: 0.4772
Epoch 18/50, Loss: 0.4735
Epoch 19/50, Loss: 0.4733
Epoch 20/50, Loss: 0.4708
Epoch 21/50, Loss: 0.4693
Epoch 22/50, Loss: 0.4693
Epoch 23/50, Loss: 0.4676
Epoch 24/50, Loss: 0.4679
Epoch 25/50, Loss: 0.4646
Epoch 26/50, Loss: 0.4659
Epoch 27/50, Loss: 0.4660
Epoch 28/50, Loss: 0.4645
Epoch 29/50, Loss: 0.4636
Epoch 30/50, Loss: 0.4628
Epoch 31/50, Loss: 0.4625
Epoch 32/50, Loss: 0.4623
Epoch 33/50, Loss: 0.4609
Epoch 34/50, Loss: 0.4622
Epoch 35/50, Loss: 0.4595
Epoch 36/50, Loss: 0.4592
Epoch 37/50, Loss: 0.4611
Epoch 38/50, Loss: 0.4598
E