In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt

# Load data from Excel file
data = pd.read_excel("./data/Census_Data_2008.xlsx")

# Features and target variable
X = data[['Income to Poverty Level Ratio', 'Median House Value']]
y = data['Vote Skew']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating the KNN regressor model
knn_regressor = KNeighborsRegressor(n_neighbors=25)

# Fitting the model
knn_regressor.fit(X_train, y_train)

# Making predictions
predictions = knn_regressor.predict(X_test)

# Evaluating the model
rmse = sqrt(mean_squared_error(y_test, predictions))
print(f'Root Mean Squared Error: {rmse}')

# Tallying up the predicted vote skew values
total_predicted_skew = predictions.sum()
print(f'Total Predicted Vote Skew: {total_predicted_skew}')

# Displaying some predictions
results = pd.DataFrame({'Actual': y_test, 'Predicted': predictions})
print(results.head())

Root Mean Squared Error: 6091.4508124237345
Total Predicted Vote Skew: 35831.67999999999
     Actual  Predicted
127   -2078    2516.00
66    -1919    2941.84
104     787   -1198.16
19     -747   -1160.36
42    17680    2685.20


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score

# Load data from Excel file
data = pd.read_excel("./data/Census_Data_2008.xlsx")

# Features and target variable
X = data[['Income to Poverty Level Ratio', 'Median House Value']]
y = data['Vote Skew']

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating the KNN regressor model
knn_regressor = KNeighborsRegressor(n_neighbors=25)

# Fitting the model
knn_regressor.fit(X_train, y_train)

# Making predictions
predictions = knn_regressor.predict(X_test)

# Calculating R-squared
r_squared = r2_score(y_test, predictions)
print(f'R-squared: {r_squared}')

# Displaying some predictions
results = pd.DataFrame({'Actual': y_test, 'Predicted': predictions})
print(results.head())

R-squared: 0.03998429015408411
     Actual  Predicted
127   -2078    2516.00
66    -1919    2941.84
104     787   -1198.16
19     -747   -1160.36
42    17680    2685.20
