# Average top public notebooks

Weighted mean in cartesian coordinates at equal altitude.

**All credit to the authors of those notebooks; please look at their work:**
* [GSDC phones mean prediction](https://www.kaggle.com/t88take/gsdc-phones-mean-prediction)
* [device EDA & Interpolate by removing device[en,ja]](https://www.kaggle.com/columbia2131/device-eda-interpolate-by-removing-device-en-ja)
* [GSDC: Position shift](https://www.kaggle.com/wrrosa/gsdc-position-shift)

In [None]:
import os
import numpy
import pandas
import wgs_ecef

In [None]:
def blend(folder_to_weight):
    """ Return a weighted mean of folders' submissions. 
    
        Assumes equal ordering to submission files' rows.
    """
    norm = sum(folder_to_weight.values())

    def submissions():
        for folder, weight in folder_to_weight.items():
            frame = pandas.read_csv(
                os.path.join("../input/", folder, "submission.csv"),
                dtype={
                    "phone": str,
                    "millisSinceGpsEpoch": numpy.uint64,
                    "latDeg": numpy.float64,
                    "lngDeg": numpy.float64,
                })
            yield frame, weight / norm

    subs = submissions()

    # add in ECEF coordinates at 0 altitude
    example, weight = next(subs)
    xyz = get_ecef(example) * weight

    for frame, weight in subs:
        xyz += get_ecef(frame) * weight

    # convert back to WSG, update example in place
    example.latDeg, example.lngDeg, _ = wgs_ecef.ecef_to_wgs(*xyz)

    return example


def get_ecef(frame):
    """ Return ECEF positions from frame WGS at sea level. """
    lat = frame.latDeg
    lng = frame.lngDeg
    alt = numpy.zeros_like(frame.latDeg)
    return numpy.stack(wgs_ecef.wgs_to_ecef(lat, lng, alt))

In [None]:
submission = blend({
    "gsdc-phones-mean-prediction": 5,
    "device-eda-interpolate-by-removing-device-en-ja": 1,
    "gsdc-position-shift": 1,
})

submission.head()

In [None]:
submission.to_csv("submission.csv", index=False)