# Model Evaluation

In [1]:
# Load libraries
import numpy as np
import matplotlib.pylab as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import binarize

import sys
import os

In [2]:
# Set working directory - ONLY RUN ONCE
os.chdir('../')

In [3]:
import resources.data_preprocessing as dp
import resources.split_normalization as sn
import resources.dummies_colinearity as dc

In [4]:
# File paths
file_dir = os.path.abspath('')
face_cov_filepath = os.path.join(file_dir,"data","face_covering.csv")
demographics_filepath = os.path.join(file_dir,"data","demographics_2021.csv")

In [5]:
# Load data
demographics = dp.load_demographics(demographics_filepath)
face_covering = dp.load_referendum(face_cov_filepath)

# Merge data
merged_data = dp.merge_demographics_referendum(demographics, face_covering)

In [6]:
# Remove irrelevant attributes
rm_attr = ["id", "municipality_dem", "canton_id", "municipality_ref", "yes_count", "no_count", "yes_perc"]
merged_data = merged_data.drop(rm_attr, axis = 1)

# Create dummy columns for categorical attributes
dummy_cols = ["canton"]
merged_data = dc.add_dummies(merged_data, dummy_cols)

In [7]:
# Separate X and y
X_attr = merged_data.columns.to_list().remove('yes')
X = merged_data.drop(["yes"], axis = 1).values
y_attr = "yes"
y = merged_data["yes"].values

In [8]:
# Split data into train, development and test
X_train, X_test, X_dev, y_train, y_test, y_dev = sn.split(X, y)
print("X_train: ", X_train.shape, "X_dev: ", X_dev.shape, "X_test: ", X_test.shape)

X_train:  (1737, 73) X_dev:  (218, 73) X_test:  (217, 73)


In [9]:
# Scale attributes
X_train, X_test, X_dev = sn.min_max_scaling(X_train, X_test, X_dev)

In [23]:
# MODELS