In [2]:
%matplotlib widget
import numpy as np
import pandas as pd
import scipy 
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

np.random.seed(0)

In [3]:
demographics_df = (
    pd.read_csv("demographics.csv")
    .assign(gender_ratio=lambda x: x.male / x.female)
    .drop(columns=["male", "female"])
)
features = [
    "hispanic_latino",
    "white",
    "black",
    "native_american",
    "asian",
    "nhpi",
    "other_race",
    "two_or_more",
    "income",
    "foreign_born",
    "gender_ratio",
    "bachelors",
    "median_age"
]
X = demographics_df[features]
y = demographics_df["margin"]
features


['hispanic_latino',
 'white',
 'black',
 'native_american',
 'asian',
 'nhpi',
 'other_race',
 'two_or_more',
 'income',
 'foreign_born',
 'gender_ratio',
 'bachelors',
 'median_age']

In [7]:
X_standardized = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_standardized, y, random_state=0)
model= Ridge().fit(X_train, y_train)

y_test_pred = model.predict(X_test)
y_pred = model.predict(X_standardized)

r_sqr_train = model.score(X_train, y_train)
r_sqr_test = model.score(X_test, y_test)
r_sqr_all = model.score(X_standardized, y)

print("R-squared (Train Set): {}".format(r_sqr_train))
print("R-squared (Test Set): {}".format(r_sqr_test))
print("R-squared (All): {}".format(r_sqr_all))

R-squared (Train Set): 0.7147525849351544
R-squared (Test Set): 0.5408435716533544
R-squared (All): 0.6712266790732592


In [9]:
linear_regression_df = demographics_df.assign(pred_margin=y_pred)
linear_regression_df[linear_regression_df.state == "Massachusetts"]

Unnamed: 0,state,district,pop,hispanic_latino,white,black,native_american,asian,nhpi,other_race,...,income,native_born,foreign_born,biden_pct,trump_pct,margin,bachelors,median_age,gender_ratio,pred_margin
190,Massachusetts,1,727193,0.187,0.713,0.056,0.001,0.022,0.0,0.002,...,61444,0.925,0.075,0.611,0.369,0.242,0.306,41.5,0.930502,-0.07056
191,Massachusetts,2,749700,0.103,0.76,0.047,0.001,0.057,0.001,0.003,...,74432,0.875,0.125,0.618,0.36,0.258,0.401,39.4,0.953125,0.02703
192,Massachusetts,3,768471,0.198,0.658,0.034,0.001,0.081,0.0,0.005,...,83233,0.817,0.183,0.634,0.346,0.288,0.39,39.1,0.976285,0.05922
193,Massachusetts,4,760391,0.05,0.814,0.03,0.0,0.067,0.0,0.007,...,104781,0.864,0.136,0.645,0.337,0.308,0.519,41.2,0.934236,0.147541
194,Massachusetts,5,771266,0.098,0.687,0.048,0.001,0.128,0.0,0.009,...,104727,0.756,0.244,0.745,0.239,0.506,0.592,38.9,0.949318,0.363062
195,Massachusetts,6,773421,0.1,0.799,0.032,0.001,0.042,0.0,0.003,...,94965,0.861,0.139,0.626,0.356,0.27,0.463,42.9,0.934236,0.030115
196,Massachusetts,7,808106,0.222,0.403,0.227,0.002,0.106,0.001,0.008,...,73201,0.679,0.321,0.853,0.133,0.72,0.466,32.0,0.945525,0.554172
197,Massachusetts,8,770058,0.061,0.711,0.099,0.002,0.082,0.0,0.017,...,97583,0.818,0.182,0.662,0.322,0.34,0.492,39.4,0.934236,0.264573
198,Massachusetts,9,744397,0.06,0.847,0.027,0.002,0.015,0.0,0.017,...,76031,0.904,0.096,0.579,0.403,0.176,0.365,46.0,0.937984,-0.014205
