In [4]:
%matploblib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Matcher import train_parameters, model_parameters, Matcher
import ot

UsageError: Line magic function `%matploblib` not found.


In [None]:
FlData = pd.read_csv('Fl_Data.csv', usecols = ['District', 'County','Voters_Age', 'Voters_Gender', 'PID', 'vote08', 
                    'SR.WHI', 'SR.BLA', 'SR.HIS', 'SR.ASI', 'SR.NAT', 'SR.OTH']) 

FlData = FlData.dropna()

### Data Preprocessing

In [None]:
# Use 0-1 encoding for gender
FlData['Voters_Gender'] = FlData['Voters_Gender'].map({'M': 1, 'F': 0})

# Normalize age to [0, 1]
FlData['Voters_Age'] = ((FlData['Voters_Age'] -
                         FlData['Voters_Age'].min()) /
                        (FlData['Voters_Age'].max() -
                         FlData['Voters_Age'].min()))

# Get one hot encoding of column PID
one_hot = pd.get_dummies(FlData['PID'])

# Drop column PID as it is now encoded
FlData = FlData.drop('PID', axis=1)

# Join the encoded df
FlData = FlData.join(one_hot)

# Rename the new columns
FlData.rename(columns={0: 'Other', 1: 'Democrat', 2: 'Republican'},
              inplace=True)


In [None]:
# Set SR.OTH as (SR.OTH or SR.NAT)
FlData['SR.OTH'] = (FlData['SR.OTH'] + FlData['SR.NAT']) > 0
FlData['SR.OTH'] = FlData['SR.OTH'].astype(float)
FlData.drop('SR.NAT', axis=1, inplace=True)
FlData.head()

In [None]:
FlData.describe()

### Compute Each Ethnicity's Profile

In [None]:
ethnicities = ['WHI', 'BLA', 'HIS', 'ASI', 'OTH']
profile = []
for ethnicity in ethnicities:
    profile.append(np.mean(FlData[FlData['SR.' + ethnicity] == 1.0]))
    profile[-1].drop(['District', 'County'], inplace=True)
    profile[-1] = profile[-1].as_matrix()

In [None]:
print(profile)

### Compute Sample Matching Matrix

In [None]:
#           Other Democrat Republican
# White     0.38    0.26     0.35 
# Black     0.29    0.64     0.05
# Hispanic  0.50    0.32     0.13
# Asian     0.46    0.36     0.17
# Other     0.49    0.32     0.18


# Demographic of Florida
# White:     60.1%
# Black:     17.0%
# Hispanic:  20.0%
# Asian:     2.1%
# Other:     0.8%

percentages = [0.601, 0.17, 0.2, 0.021, 0.008]
pi_sample = np.array([
               [.38, .26, .35],
               [.29, .64, .05],
               [.50, .32, .13],
               [.46, .36, .17],
               [.49, .32, .18]
               ])
for i, percent in enumerate(percentages):
    pi_sample[i] /= np.sum(pi_sample[i])
    pi_sample[i] *= percent

In [None]:
U = np.array(profile).T
V = np.eye(3)

p, m = U.shape
q, n = V.shape
r = 5

seed = 3
rng = np.random.RandomState(seed)
G0 = rng.rand(r, p)
D0 = rng.rand(r, q)
A0 = np.dot(G0.T, D0)

model = Matcher(pi_sample=pi_sample, U0=U, V0=V, r=r)

train_param = train_parameters(max_outer_iteration=200, max_inner_iteration=20, learning_rate=1)
model_param = model_parameters(A0=A0, gamma=0.2, const=1, degree=2, lam=1.0, lambda_mu=1.0, lambda_nu=1.0, delta=0.005)

C, A, pi = model.riot(model_param=model_param, train_param=train_param)

xtick = range(0, 3)
xlabel = ['Other', 'Democrat', 'Republican']
ytick = range(0, 5)
ylabel = ['White', 'Black', 'Hispanic', 'Asian', 'Other']
ztick = range(0, 11)
zlabel = ['Age', 'Gender', 'Vote08', 'White', 'Black', 'Hispanic', 'Asian', 'Other Race', 'Other', 'Democrat', 'Republican']

f, ax = plt.subplots(1, 1, figsize=(16,3))
cax = ax.imshow(A.T, cmap=plt.cm.Greens)
ax.title.set_text('leanrned interaction matrix')
ax.set_xticks(ztick)
ax.set_yticks(xtick)
ax.set_xticklabels(zlabel)
ax.set_yticklabels(xlabel)
f.colorbar(cax, ax=ax)