In [1]:
from scipy import misc
import urllib2
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

from osgeo import gdal
# General libraries.
from __future__ import division
from collections import Counter
import csv
import dateutil
import os
import pandas as pd
import re
import numpy as np

# SK-learn libraries for preprocessing.
from sklearn.preprocessing import OneHotEncoder # for integer values
from sklearn.feature_extraction import DictVectorizer as DV

# SK-learn libraries for learning.
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.base import TransformerMixin


# SK-learn libraries for evaluation.
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import classification_report

In [2]:
data_path = "data"
submissions_path = "submissions"
if not data_path or not submissions_path:
    raise Exception("Set the data and submission paths in competition_utilities.py!")

def parse_date_maybe_null(date):
    if date:
        return dateutil.parser.parse(date)
    return None

df_converters = {"Dates": dateutil.parser.parse}

def get_reader(file_name="train.csv"):
    reader = csv.reader(open(os.path.join(data_path, file_name)))
    header = reader.next()
    return reader

def get_header(file_name="train.csv"):
    reader = csv.reader(open(os.path.join(data_path, file_name)))
    header = reader.next()
    return header

def get_dataframe(file_name="train.csv"):
    return pd.io.parsers.read_csv(os.path.join(data_path, file_name), converters = df_converters)

    
def write_submission(file_name, predictions):
    writer = csv.writer(open(os.path.join(submissions_path, file_name), "w"), lineterminator="\n")
    writer.writerows(predictions)  
    
dataframe = get_dataframe()
df = dataframe.sample(n=1000)

df = df.reindex(np.random.permutation(df.index))

# take 80% from the top:
upper = np.floor(len(df)*.8).astype(int)
train_data = df.head(n=upper)
train_labels = train_data['Category']
del train_data['Category']
del train_data['Descript']
del train_data['Resolution']




# take 20% from the bottom
lower = np.ceil(len(df)*.2).astype(int)
dev_data = df.tail(n=lower)
dev_labels = dev_data['Category']
del dev_data['Category']

testdf = get_dataframe("test.csv")
test_data = testdf.reindex(np.random.permutation(testdf.index))
print test_data.shape

(884262, 7)


In [3]:
class pictureandlatlontransforms:
    def __init__(self, image, Xconst,Xcoeff,Yconst,Ycoeff):
        self.image = f = mpimg.imread(urllib2.urlopen(image), format=image[-3:])
        self.Xconst =Xconst
        self.Xcoeff = Xcoeff
        self.Yconst =Yconst
        self.Ycoeff = Ycoeff
        
    def XYToLatLong(self,X,Y):
        lat = round(self.Xconst + self.Xcoeff*X)
        lon =round(self.Yconst + self.Ycoeff*Y)
        return(lat,lon)

    def LatLongToXY(self,lat,lon):
        print(lat,lon)
        X = round((lat - self.Xconst)/ self.Xcoeff)
        Y = round((lon - self.Yconst) / self.Ycoeff)
        return (X,Y)
    
    def getPixelXY(self,X,Y):
        return self.image[X,Y]
    
    def getPixelLatLong(self,lat,lon):
        return getPixelXY(*getXY(X,Y))
    


In [14]:
from osgeo import gdal
from osgeo import osr
# The following method translates given latitude/longitude pairs into pixel locations on a given GEOTIF
# INPUTS: geotifAddr - The file location of the GEOTIF
#      latLonPairs - The decimal lat/lon pairings to be translated in the form [[lat1,lon1],[lat2,lon2]]
# OUTPUT: The pixel translation of the lat/lon pairings in the form [[x1,y1],[x2,y2]]
# NOTE:   This method does not take into account pixel size and assumes a high enough 
#  image resolution for pixel size to be insignificant
def latLonToPixel(geotifAddr, latLonPairs):
    # Load the image dataset
    ds = gdal.Open(geotifAddr)
    # Get a geo-transform of the dataset
    gt = ds.GetGeoTransform()
    print(gt)
    # Create a spatial reference object for the dataset
    srs = osr.SpatialReference()
    srs.ImportFromWkt(ds.GetProjection())
    # Set up the coordinate transformation object
    srsLatLong = srs.CloneGeogCS()
    ct = osr.CoordinateTransformation(srsLatLong,srs)
    # Go through all the point pairs and translate them to latitude/longitude pairings
    pixelPairs = []
    for point in latLonPairs:
        # Change the point locations into the GeoTransform space
        (point[1],point[0],holder) = ct.TransformPoint(point[1],point[0])
        print(point)
        # Translate the x and y coordinates into pixel values
        x = (point[1]-gt[0])/gt[1]
        y = (point[0]-gt[3])/gt[5]
        # Add the point to our return array
        pixelPairs.append([int(x),int(y)])
    return pixelPairs
# The following method translates given pixel locations into latitude/longitude locations on a given GEOTIF
# INPUTS: geotifAddr - The file location of the GEOTIF
#      pixelPairs - The pixel pairings to be translated in the form [[x1,y1],[x2,y2]]
# OUTPUT: The lat/lon translation of the pixel pairings in the form [[lat1,lon1],[lat2,lon2]]
# NOTE:   This method does not take into account pixel size and assumes a high enough 
#	  image resolution for pixel size to be insignificant
def pixelToLatLon(geotifAddr,pixelPairs):
    # Load the image dataset
    ds = gdal.Open(geotifAddr)
    # Get a geo-transform of the dataset
    gt = ds.GetGeoTransform()
    # Create a spatial reference object for the dataset
    srs = osr.SpatialReference()
    srs.ImportFromWkt(ds.GetProjection())
    # Set up the coordinate transformation object
    srsLatLong = srs.CloneGeogCS()
    ct = osr.CoordinateTransformation(srs,srsLatLong)
    # Go through all the point pairs and translate them to pixel pairings
    latLonPairs = []
    for point in pixelPairs:
        # Translate the pixel pairs into untranslated points
        ulon = point[0]*gt[1]+gt[0]
        ulat = point[1]*gt[5]+gt[3]
        # Transform the points to the space
        (lon,lat,holder) = ct.TransformPoint(ulon,ulat)
        # Add the point to our return array
        latLonPairs.append([lat,lon])

    return latLonPairs

In [None]:



# create a file-like object from the url
f = pictureandlatlontransforms('http://mapwarper.net/maps/export/13355?format=tif',\
                                    -122.51414755019515,0.00015361020540501148,37.812400176881148,-0.00015361020540501148)
print(latLonToPixel("http://mapwarper.net/maps/export/13355?format=tif", [[-122.4850273085,37.7838524288]]))
# im = plt.imread(io.BytesIO(urllib2.urlopen(url).read()), format='jpeg')
plt.figure(figsize=(20, 20))

plt.imshow(f.image, cmap='Greys_r')
small_data = [f.LatLongToXY(d['X'],d['Y']) for i,d in train_data[:1].iterrows()]
plt.scatter(*zip(*small_data),c = "c", s = 50)
plt.show()



(-122.51414755019515, 0.00015361020540501148, 0.0, 37.81240017688115, 0.0, -0.00015361020540501148)
[-122.48502730850001, 37.7838524288]
[[1043537, 1043533]]
(-122.40367144217699, 37.77703244359461)