In [1]:
import pandas as pd
import numpy as np
from haversine import haversine
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline



In [2]:
north_pole = [90.,0.]
weight_limit = 1000.
sleigh_weight = 10.

In [None]:
chunks=[data[x:x+100] for x in xrange(0, len(data), 100)]


In [3]:
def weighted_trip_length(stops, weights): 
    tuples = [tuple(x) for x in stops.values]
    # adding the last trip back to north pole, with just the sleigh weight
    tuples.append(north_pole)
    weights.append(sleigh_weight)
    
    dist = 0.0
    prev_stop = north_pole
    prev_weight = sum(weights)
    for location, weight in zip(tuples, weights):
        dist = dist + haversine(location, prev_stop) * prev_weight
        prev_stop = location
        prev_weight = prev_weight - weight
    if (np.sum(weights)> weight_limit):
        return np.inf
    else:
        return dist

def weighted_reindeer_weariness(all_trips):
    uniq_trips = all_trips.TripId.unique()
    
    if any(all_trips.groupby('TripId').Weight.sum() > weight_limit):
        raise Exception("One of the sleighs over weight limit!")
 
    dist = 0.0
    for t in uniq_trips:
        this_trip = all_trips[all_trips.TripId==t]
        dist = dist + weighted_trip_length(this_trip[['Latitude','Longitude']], this_trip.Weight.tolist())
    
    return dist    

In [4]:
import pandas as pd

In [5]:
gifts = pd.read_csv('gifts.csv')
sample_sub = pd.read_csv('sample_submission.csv')

all_trips = sample_sub.merge(gifts, on='GiftId')

In [6]:
#naive_score=weighted_reindeer_weariness(all_trips)

In [7]:
best_score=12395765387.87850
print(naive_score,best_score)

NameError: name 'naive_score' is not defined

# Initializiaton: Setup stops, Check for Validity

In [8]:
# Add lots of 'north pole' stops, concatenate them to the original gifts data frame
def initialize(frac):
    notValid=True
    while (notValid):
        seed1=np.insert(north_pole,0,-1)
        seed1=np.insert(seed1,3,0.)
        s=pd.DataFrame(seed1,index=gifts.columns.values).T
        s.head()
        s=pd.DataFrame(seed1,index=gifts.columns.values).T
        for i in np.arange(np.log(len(gifts)*frac)/np.log(2.)):
            s=pd.concat([s,s])
        print(len(s))
    
        dfc=pd.concat([gifts,s])
        dfc.head()
        
        # Now randomly distribute the stops
        dfc=dfc.iloc[np.random.permutation(len(dfc))]
        dfc.head()
    
        stops=np.where(dfc['GiftId']==-1)[0]
        
        dfc['tripW']=np.zeros(len(dfc))
        dfc['TripId']=np.zeros(len(dfc))

        ###### CHECK IF A VALID SET OF STOPS
        cumWeights=[]
    
        np.insert(stops,0,0)
        np.insert(stops,len(stops),len(dfc)+1)
    
        tripWeight=np.sum(dfc['Weight'].values[:stops[0]])
        cumWeights.append(tripWeight)
        dfc['tripW'].values[:stops[0]]=tripWeight
        dfc['TripId'].values[:stops[0]]=0
        
        for i in np.arange(len(stops)-1):
    #    print(i)
            tripWeight=np.sum(dfc['Weight'].values[stops[i]:stops[i+1]])
            cumWeights.append(tripWeight)
            dfc['tripW'].values[stops[i]:stops[i+1]]=tripWeight
            dfc['TripId'].values[stops[i]:stops[i+1]]=i
        
        tripWeight=np.sum(dfc['Weight'].values[stops[-1]:])
        cumWeights.append(tripWeight)
        dfc['tripW'].values[stops[-1]:]=tripWeight
        dfc['TripId'].values[stops[-1]:]=i+1

        cumWeights=np.array(cumWeights)
        
        if np.any(dfc['tripW'].values > 1000.-10.):
            print('Too much weight in the sleigh!')
            frac*=1.1
        else:
            print('legal set of stops')
            notValid=False
    if np.any(np.isnan(dfc)):
        print('WARNING THERE ARE NAN TRIP IDS')
    #print('calculating initial score fraction...relative to Naive')
    return dfc
   


# Two Opt Code: Swap two stops, see if valid and check for improvement

In [9]:
def swap2(i,j,dfc):
    for attr in ['GiftId','Latitude','Longitude','Weight']:
        tmpi=dfc.iloc[i][attr]
        tmpj=dfc.iloc[j][attr]
        dfc.iloc[i][attr]=tmpj
        dfc.iloc[j][attr]=tmpi

In [10]:
def propose_swap(dfc,Temp,lbound,hbound):
    """
    Propose a random Swap of two cities in the traveling salesmen problem
    """
    i1,i2=np.random.randint(lbound,high=hbound,size=2)
    trip0ID=dfc.iloc[i1]['TripId']
    trip1ID=dfc.iloc[i2]['TripId']
    trip0=dfc[dfc['TripId']==trip0ID]
    trip1=dfc[dfc['TripId']==trip1ID]

    dist1=weighted_trip_length(trip0[['Latitude','Longitude']], trip0.Weight.tolist())+weighted_trip_length(trip1[['Latitude','Longitude']], trip1.Weight.tolist())
    
    swap2(i1,i2,dfc)
    trip0ID=dfc.iloc[i1]['TripId']
    trip1ID=dfc.iloc[i2]['TripId']
    trip0=dfc[dfc['TripId']==trip0ID]
    trip1=dfc[dfc['TripId']==trip1ID]

    dist2=weighted_trip_length(trip0[['Latitude','Longitude']], trip0.Weight.tolist())+weighted_trip_length(trip1[['Latitude','Longitude']], trip1.Weight.tolist())

    if (dist2 < dist1):
#        print('accepted')
#        print(dist2-dist1)
        return (dist2 - dist1)
    else:
        prob=np.exp((dist1-dist2)/Temp)
        sample=np.random.rand()
        # Accept Swap with probability exp(-deltaD/T)
        if (sample < prob):
#            print('accepted with probability :',prob)
#            print(dist2-dist1)
            return (dist2 - dist1)
        else:
#            print('rejected with probability :',1.-prob)
            swap2(i1,i2,dfc)
            return 0.
    # should never get here
    return (dist2 - dist1)

In [11]:
def running_mean(x,N):
    return np.convolve(x, np.ones((N,))/N, mode='valid')

# The Burn In Process

In [None]:
df0=initialize(0.1)
weighted_reindeer_weariness(df0[all_trips.columns])/best_score

In [12]:
def burn_in(T,m,df,lbound,hbound):
    c=[]
    for i in np.arange(m):
        delta=propose_swap(df,Temp,lbound,hbound)
        c.append(delta)
    return np.array(c)

In [None]:
m0=1000
count=0
Temp=10**5.6
var=100.

In [None]:
swap2(1,2,all_trips)

In [248]:
df0=pd.read_csv('santas_route_2.csv')
df0=df0.T.drop('Unnamed: 0').T
df=df0[df0.GiftId!=-1]
df=df.T.drop('tripW').T

In [249]:
def swp(i,j,df):
    i,j=1,2
    cols=df.T.columns.tolist()
    tmp=cols[i]
    cols[i]=cols[j]
    cols[j]=tmp
    
    # Swap Trip Id's
    tmp=df.T[i].TripId
    df.T[i].TripId=df.T[j].TripId
    df.T[j].TripId=tmp

    return (df.T.reindex(columns=cols)).T

In [251]:
df=df.set_index(np.arange(0,len(df)))
df.iloc[:10]

Unnamed: 0,GiftId,Latitude,Longitude,Weight,TripId
0,18900,67.02674,95.997109,14.559178,0
1,70517,62.126464,50.578612,25.789425,0
2,26034,69.638752,87.285451,9.682529,0
3,90157,-4.787417,-38.984069,13.911547,0
4,24443,-9.312866,-39.641796,1.0,0
5,35588,-13.154346,-41.403243,1.0,0
6,94264,-79.495667,-23.68207,1.329576,0
7,13737,12.888474,21.175827,1.0,0
8,93124,19.128447,47.132578,1.0,0
9,58934,32.29348,91.891433,1.0,0


In [258]:
i,j=30,31
df.iloc[i-2:j+2]

Unnamed: 0,GiftId,Latitude,Longitude,Weight,TripId
28,50438,-12.858438,28.763446,1.0,2
29,92392,-27.9875,-68.482628,5.11464,2
30,60274,28.030108,110.886673,21.124946,2
31,81282,52.000656,117.268228,1.0,3
32,53507,35.434307,114.388548,12.099344,3


In [261]:
import improve_path as path

In [262]:
df.iloc[:10]

Unnamed: 0,GiftId,Latitude,Longitude,Weight,TripId
0,18900,67.02674,95.997109,14.559178,0
1,70517,62.126464,50.578612,25.789425,0
2,26034,69.638752,87.285451,9.682529,0
3,90157,-4.787417,-38.984069,13.911547,0
4,24443,-9.312866,-39.641796,1.0,0
5,35588,-13.154346,-41.403243,1.0,0
6,94264,-79.495667,-23.68207,1.329576,0
7,13737,12.888474,21.175827,1.0,0
8,93124,19.128447,47.132578,1.0,0
9,58934,32.29348,91.891433,1.0,0


In [266]:
path.swap(1,2,df)
df.iloc[:10]

Unnamed: 0,GiftId,Latitude,Longitude,Weight,TripId
0,18900,67.02674,95.997109,14.559178,0
1,70517,62.126464,50.578612,25.789425,0
2,26034,69.638752,87.285451,9.682529,0
3,90157,-4.787417,-38.984069,13.911547,0
4,24443,-9.312866,-39.641796,1.0,0
5,35588,-13.154346,-41.403243,1.0,0
6,94264,-79.495667,-23.68207,1.329576,0
7,13737,12.888474,21.175827,1.0,0
8,93124,19.128447,47.132578,1.0,0
9,58934,32.29348,91.891433,1.0,0


In [267]:
df[:10]

Unnamed: 0,GiftId,Latitude,Longitude,Weight,TripId
0,18900,67.02674,95.997109,14.559178,0
1,70517,62.126464,50.578612,25.789425,0
2,26034,69.638752,87.285451,9.682529,0
3,90157,-4.787417,-38.984069,13.911547,0
4,24443,-9.312866,-39.641796,1.0,0
5,35588,-13.154346,-41.403243,1.0,0
6,94264,-79.495667,-23.68207,1.329576,0
7,13737,12.888474,21.175827,1.0,0
8,93124,19.128447,47.132578,1.0,0
9,58934,32.29348,91.891433,1.0,0


In [None]:
t = np.copy(a[:,0,0])
a[:,0,0] = b[:,0,0]
b[:,0,0] = t

In [None]:
i1,i2=np.random.randint(0,high=len(df),size=2)
i1,i2

In [None]:
tmp1=df.iloc[[i1]]
tmp2=df.iloc[[i2]]
tmp1

In [None]:
import improve_path

In [None]:
total_weights=[df0[df0.TripId==t].Weight.sum() for t in df0['TripId'].unique()]
plt.hist(total_weights)
plt.show()

In [None]:
def update_trip_weights(df):
    df

In [None]:
Temp=10**4.2
m0=20000
var=5.01
count=0
print('log(T): '+str(np.log(Temp)/np.log(10.))+' var: '+str(var))
for n in np.arange(100):
    m0=np.amax([var,1.])**2.5*400
    mu=burn_in(Temp,m0,df0,0,len(df0))
    var=np.std(mu/Temp)
    print('        equilibriation('+str(count)+')  var: '+str(var))
    score2=weighted_reindeer_weariness(df0[all_trips.columns])
    print(score2)
    if (var < 5.):
        Temp*=.9
        print('log(T): '+str(np.log(Temp)/np.log(10.))+' var: '+str(var))
        count=0
    else:
        count+=m0

In [None]:
df0.to_csv('santas_route_2.csv')

In [None]:
np.savetxt('submission3.csv',df0[df0['GiftId']!=-1][['GiftId','TripId']].values.astype(np.int),fmt='%2.0d',delimiter=',',header='GiftId,TripId')
%more submission1.csv

# Parellelized Burn in

In [None]:
from IPython import parallel

rc = parallel.Client()
all_engines = rc[:]
lbv = rc.load_balanced_view()
lbv.block = True

In [None]:
@lbv.parallel()
def f(x):
    return 10.0*x**4

In [None]:
f.map(np.arange(10**2))

In [None]:
def power(base, exponent):
    return base ** exponent

from functools import partial

square = partial(power, exponent=2)
cube = partial(power, exponent=3)

def test_partials():
    assert square(2) == 4
    assert cube(2) == 8