I will set KNN=60 and try to improve IoU metric for training process. I will consider all positive samples, 

In [None]:
import pandas as pd
import numpy as np
import gc
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
from sklearn.neighbors import NearestNeighbors
from tqdm.notebook import tqdm
from sklearn.model_selection import GroupKFold
tqdm.pandas()

In [None]:
def get_id2poi(input_df: pd.DataFrame) -> dict:
    return dict(zip(input_df['id'], input_df['point_of_interest']))

def get_poi2ids(input_df: pd.DataFrame) -> dict:
    return input_df.groupby('point_of_interest')['id'].apply(set).to_dict()

def get_score(input_df: pd.DataFrame):
    scores = []
    for id_str, matches in zip(input_df['id'].to_numpy(), input_df['matches'].to_numpy()):
        targets = poi2ids[id2poi[id_str]]
        preds = set(matches.split())
        score = len((targets & preds)) / len((targets | preds))
        scores.append(score)
    scores = np.array(scores)
    return scores.mean()
def reduce_memory(df):
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type != object:
            cmin = df[col].min()
            cmax = df[col].max()
            if str(col_type)[:3] == 'int':
                if cmin > np.iinfo(np.int8).min and cmax < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif cmin > np.iinfo(np.int16).min and cmax < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif cmin > np.iinfo(np.int32).min and cmax < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif cmin > np.iinfo(np.int64).min and cmax < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if cmin > np.finfo(np.float16).min and cmax < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif cmin > np.finfo(np.float32).min and cmax < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    return df

In [None]:
df = pd.read_csv(f"../input/foursquare-location-matching/train.csv")
print(df.shape)
df.head()

### Calculating NearestNeighbors
1. N=60, we want to get all pairs as mcuh as we can
2. algorithm=kd_tree, fastest algorithm

In [None]:
#kdtree
neighbors=60
print('KNN:',neighbors)
coo_cols = ["latitude", "longitude"]
matcher = NearestNeighbors(n_neighbors = neighbors,
                       metric = 'minkowski',
                       radius=1.0,
                       algorithm='kd_tree',
                       leaf_size=30,
                       p=2,
                       n_jobs = -1)
matcher.fit(df[coo_cols])
distances, indices = matcher.kneighbors(df[coo_cols])

### Creating the Target

In [None]:
candidate_df = pd.DataFrame()
for i in tqdm(range(indices.shape[1])):
    tmp_df = df[["id"]].copy()
    tmp_df["dist"] = distances[:, i]
    tmp_df["match_id"] = df["id"].values[indices[:, i]]
    tmp_df["match_rank"] = i
    tmp_df["match"] = df["point_of_interest"] == df["point_of_interest"].values[indices[:, i]]
    candidate_df=pd.concat([candidate_df,tmp_df])
    del tmp_df
    gc.collect()

In [None]:
del distances,indices
gc.collect()

In [None]:
candidate_df=reduce_memory(candidate_df)

In [None]:
candidate_df.shape

In [None]:
candidate_df.match.value_counts(normalize=True)

### Sampling
Sampling can be improved, let me know if you have a suggestion

In [None]:
candidate_df=pd.concat([candidate_df[candidate_df.match==1],
                        candidate_df[candidate_df.match==0].sample(n=12345678,random_state=1337)])

In [None]:
candidate_df.match.value_counts(normalize=True)

In [None]:
## Eval
id2poi = get_id2poi(df)
poi2ids = get_poi2ids(df)

eval_df = pd.DataFrame()
eval_df['id'] = df['id'].unique().tolist()
eval_df['match_id'] = eval_df['id']
print('Unique id: %s' % len(eval_df))
del df
gc.collect()
eval_df_ = candidate_df[candidate_df['match'] == 1][['id', 'match_id']]
eval_df = pd.concat([eval_df, eval_df_])

eval_df = eval_df.groupby('id')['match_id'].\
                        apply(list).reset_index()
eval_df['matches'] = eval_df['match_id'].apply(lambda x: ' '.join(set(x)))
print('Unique id: %s' % len(eval_df))

iou_score = get_score(eval_df)
print('IoU score: %s' % iou_score)
#del eval_df
gc.collect()

If you use NN=10, IoU score will be around 0.88, with this method it is 0.932561608024588 with almost same sample!  