In [None]:
#importing possible libraries and dependencies
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from path import Path
from config import db_password
from sqlalchemy import create_engine
from sqlalchemy import inspect
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced
from collections import Counter

In [None]:
#creating string to our Database, engine and calling in dataset
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/Arizona_Elections"
engine = create_engine(db_string)
df_voters = pd.read_sql('SELECT * from machinelearning', engine)

In [None]:
df_voters.head(10)

In [None]:
#Drop all columns contained unecesarry features or null nan 
df_voters.drop(columns=['Voter Score','voter_id','Turnout Score','Kids in HH','Liberal Ideology'], inplace=True)
df_voters


In [None]:
# Convert the target column values to low_chance and high_chance based on values

x = {'False':'Low_Chance'}
df_voters = df_voters.replace(x)

x = dict.fromkeys(['True'],'High_Chance')
df_voters = df_voters.replace(x)

df_voters.reset_index(inplace=True, drop=True)

df_voters

In [None]:
# Create a list of our conditions
conditions = [
    (df_voters['Age'] >= 18) & (df_voters['Age'] <= 24),
    (df_voters['Age'] >= 25) & (df_voters['Age'] <= 34),
    (df_voters['Age'] >= 35) & (df_voters['Age'] <=44),
    (df_voters['Age'] >= 45) & (df_voters['Age'] <=54),
    (df_voters['Age'] >= 55) & (df_voters['Age'] <=64),
    (df_voters['Age'] >= 65),
    ]

# Create of values we want assigned to the conditions
values = ['1', '2', '3','4','5','6']

# Create a new column with np.select to assign values to it using our lists as arguments
df_voters['Age'] = np.select(conditions, values)

# Display updated DataFrame
df_voters

In [None]:
#seeing if age code is working properly by adding unique values
print(df_voters['Age'].value_counts())

In [None]:
# Converting label columns from text to numerical data as model only works with numerical data

X = pd.get_dummies(df_voters, columns=["Party","Sex","Ethnicity",'Zip']).drop("Swing Voter", axis=1)

# Create our target

y = df_voters["Swing Voter"]
X.head()

In [None]:
#verifying out target was selected correclty
y.value_counts()

In [None]:
#creating our training sample and testing sample 
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
# Check balances
print(Counter(y_train))
print(Counter(y_test))

## undersampling using logistic regresion

In [None]:
#checking our resample counters

from imblearn.under_sampling import RandomUnderSampler
ros = RandomUnderSampler(random_state=1)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)
Counter(y_resampled)

In [None]:
#logistic regression being process
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='lbfgs', random_state=1)
model.fit(X_resampled, y_resampled)

In [None]:
#checking out our matrix

from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_test)
confusion_matrix(y_test, y_pred)

In [None]:

#viewing accuracy scores
from sklearn.metrics import balanced_accuracy_score
balanced_accuracy_score(y_test, y_pred)

In [None]:
#Creating the classification report to see our scores
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

In [None]:
# Display the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual High_chane", "Actual low_Chance"], columns=["Predicted high_Chance", "Predicted low_Chance"])
cm_df

In [None]:
#another way to view our results

predictions = model.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test}) 
