### Kaleigh Strohl | Problem Set 5: Matching

In [141]:
import numpy as np
import pandas as pd
import geopy
from geopy import Point
import scipy.optimize as opt

In [142]:
df = pd.read_csv("radio_merger_data.csv")
df.head(n=10)

Unnamed: 0,year,buyer_id,target_id,buyer_lat,buyer_long,target_lat,target_long,price,hhi_target,num_stations_buyer,population_target,corp_owner_buyer
0,2007,1,1,46.592512,-92.549564,44.375073,-92.039543,157763.91,80,3,21676,0
1,2007,2,2,32.578185,-85.349003,33.025375,-86.059702,1472463.2,376,1,11539,0
2,2007,3,3,30.639867,-88.25445,31.122499,-87.766408,3786333.9,129,1,182265,0
3,2007,4,4,38.956806,-94.683236,36.196946,-94.006823,473291.74,188,20,203065,0
4,2007,5,5,41.054082,-73.536216,40.909898,-73.457023,1840579.0,284,0,1493350,0
5,2007,6,6,39.223416,-106.88277,39.269849,-107.02335,1104347.4,51,1,17148,0
6,2007,7,7,39.863544,-96.617219,39.442281,-98.05066,184057.9,58,4,6373,0
7,2007,8,8,38.819795,-121.27744,41.972979,-87.94712,17879910.0,82,208,916924,0
8,2007,9,9,33.786594,-118.29866,28.709332,-80.856849,25242226.0,34,37,543376,0
9,2007,10,10,42.443224,-114.6295,44.533039,-115.85844,946583.47,50,4,9862,0


In [143]:
# Re-scale the variables as suggested
df['new_pop'] = df['population_target']/1000000
df['new_prices'] = df['price']/1000000

# create distance variable
df['buyer_loc'] = df.apply(lambda row: Point(latitude=row['buyer_lat'], longitude=row['buyer_long']), axis=1)
df['target_loc'] = df.apply(lambda row: Point(latitude=row['target_lat'], longitude=row['target_long']), axis=1)
df['distance'] = df.apply(lambda row: geopy.distance.geodesic(row['buyer_loc'], row['target_loc']).miles, axis=1)

In [144]:
# create counterfactuals and create distance variable for counterfacts 
buyer = ['year', 'buyer_id', 'buyer_lat', 'buyer_long', 'buyer_loc', 'num_stations_buyer', 'corp_owner_buyer']
target = ['target_id', 'target_lat', 'target_long', 'target_loc', 'new_prices', 'hhi_target', 'new_pop']

years = [(df.loc[df['year'] == 2007]), (df.loc[df['year'] == 2008])]

counter = [x[buyer].iloc[i].values.tolist() + x[target].iloc[j].values.tolist()
             for x in years for i in range(len(x) - 1)
             for j in range(i + 1, len(x))]
counterfacts = pd.DataFrame(counter, columns = buyer + target)

counterfacts['distance'] = counterfacts.apply(lambda row: geopy.distance.geodesic(row['buyer_loc'], row['target_loc']).miles, axis=1)

In [149]:
# payoff function: f_m(b,t) = x1_bm*y1_tm + alpha*x2_bm*y1_tm + beta*distance_btm + epsilon_btm
def payoff_fn(dataframe, params):
    '''
    Args:
        dataframe: dataframe of interest for payoff calculation
        params: initial guesses for model parameters
    Returns:
        payoff: the payoff to the merger between radio station b and target t in market m
    '''
    alpha, beta = params

    payoff = dataframe['num_stations_buyer'] * dataframe['new_pop'] + alpha * dataframe['corp_owner_buyer'] * dataframe['new_pop'] + beta * dataframe['distance']
    return(payoff)

params = (0.5, 0.5)

# Calculate observed payoffs
df['payoff'] = payoff_fn(dataframe=df, params=params)

# Calculate counterfactual payoffs
counterfacts['payoff'] = payoff_fn(dataframe=counterfacts, params=params)

In [176]:
# Create MSE objective function
# Payoff function: f_m(b,t) = x1_bm*y1_tm + alpha*x2_bm*y1_tm + beta*distance_btm + epsilon_btm

def objective_1(params): 
    '''
    Args:
        params: initial guesses for model parameters (alpha and beta)
    Returns:
        payoff: the payoff to the merger between radio station b and target t in market m
        score: maximum score estimation
    '''
    alpha, beta = params
    
    count = 0
    score = 0
    
    
    def payoff_fn(dataframe, index):
        '''
        Args:
            dataframe: dataframe of interest for payoff calculation
            index: identification numbers for radio station b and target t
        Returns:
            payoff: the payoff to the merger between radio station b and target t in market m
        '''
        b, t = index

        payoff = dataframe.num_stations_buyer[b] * dataframe.new_pop[t] + alpha * dataframe.corp_owner_buyer[b] * dataframe.new_pop[t] + beta * dataframe.distance[(dataframe['buyer_id']==1) & (dataframe['target_id']==1)]
        return(payoff)
    
    
    for x in years:
        for i in range(len(df.buyer_id)):
            for j in range(i+1, len(df.buyer_id)):
                count += 1
                if payoff_fn(dataframe=df, index=(i, i)) + payoff_fn(dataframe=df, index=(j, j)) > payoff_fn(dataframe=counterfacts, index=(i, j)) + payoff_fn(dataframe=counterfacts, index=(j, i)): 
                    score += 1
    return(x, score)

guess = (0.5, 0.5)
result = opt.minimize(objective_1, guess, method='Nelder-Mead', tol=1e-15)
print(result)

ValueError: Can only compare identically-labeled Series objects