In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=RuntimeWarning)
warnings.filterwarnings('ignore', category=UserWarning)

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
scoring = 'r2'

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor

In [None]:
train=pd.read_csv("../input/google-smartphone-decimeter-challenge/baseline_locations_train.csv")
train.head()

In [None]:
test=pd.read_csv("../input/google-smartphone-decimeter-challenge/baseline_locations_test.csv")
test.head()

In [None]:
print(len(train['collectionName'].unique()))
print(train['collectionName'].unique())

print(len(test['collectionName'].unique()))
print(test['collectionName'].unique())

In [None]:
print(len(train['phoneName'].unique()))
print(train['phoneName'].unique())

print(len(test['phoneName'].unique()))
print(test['phoneName'].unique())

In [None]:
#train
train.drop(['collectionName','heightAboveWgs84EllipsoidM','phone'],axis=1,inplace=True)

x1_train=train.iloc[:,:-2]
y1_train=train['latDeg']
train_phone = pd.get_dummies(train.loc[:,"phoneName"])
x1_train = pd.concat([x1_train,train_phone], axis=1, ignore_index=False)
x1_train.drop(['phoneName'],axis=1,inplace=True)
x_train1, x_val1, y_train1, y_val1 = train_test_split(x1_train, y1_train, test_size=0.33, random_state=42)
x_train1, x_val1, y_train1, y_val1 = x_train1.values, x_val1.values, y_train1.values, y_val1.values

x2_train=train.iloc[:,:-2]
y2_train=train['lngDeg']
train_phone = pd.get_dummies(train.loc[:,"phoneName"])
x2_train = pd.concat([x2_train,train_phone], axis=1, ignore_index=False)
x2_train.drop(['phoneName'],axis=1,inplace=True)
x_train2, x_val2, y_train2, y_val2 = train_test_split(x2_train, y2_train, test_size=0.33, random_state=42)
x_train2, x_val2, y_train2, y_val2 = x_train2.values, x_val2.values, y_train2.values, y_val2.values

#test
test.drop(['collectionName','heightAboveWgs84EllipsoidM','phone'],axis=1,inplace=True)

x1_test = test.iloc[:,:-2]
y1_test = test['latDeg']
test_phone = pd.get_dummies(test.loc[:,"phoneName"])
x1_test = pd.concat([x1_test, test_phone], axis=1, ignore_index=False)
x1_test.drop(['phoneName'],axis=1,inplace=True)
x_test1, y_test1 = x1_test.values, y1_test.values

x2_test = test.iloc[:,:-2]
y2_test = test['lngDeg']
test_phone = pd.get_dummies(test.loc[:,"phoneName"])
x2_test = pd.concat([x2_test,test_phone], axis=1, ignore_index=False)
x2_test.drop(['phoneName'],axis=1,inplace=True)
x_test2, y_test2 = x2_test.values, y2_test.values

In [None]:
def regression_report(y_true, y_pred):
  print('Mean Absolute Error    : ',mean_absolute_error(y_true, y_pred))
  print('Mean Squared Error     : ',mean_squared_error(y_true, y_pred))
  print('Root Mean Squared Error: ',math.sqrt(mean_squared_error(y_true, y_pred)))
  print('R-Squared              : ',r2_score(y_true, y_pred))

def regression(x_train, y_train, x_val, y_val):
  #linear regression
  reg1= LinearRegression()
  print("                       Linear Regression")
  reg1.fit(x_train,y_train)
  test_pred=reg1.predict(x_val)
  print('Regression Report - Validation: ')
  regression_report(y_val, test_pred)
  print('\n')

  #Random Forest Regression
  reg2= RandomForestRegressor(n_estimators=100)
  print("                       Random Forest Regression: ")
  reg2.fit(x_train,y_train)
  test_pred=reg2.predict(x_val)
  print('Regression Report - Validation: ')
  regression_report(y_val, test_pred)
  print('\n')

  #XGBoost Regression
  reg3= XGBRegressor(verbosity = 0)
  print("                       XGBoost Regression: ")
  reg3.fit(x_train,y_train)
  test_pred=reg3.predict(x_val)
  print('Regression Report - Validation: ')
  print(regression_report(y_val, test_pred))
  print("\n")

  #AdaBoost Regression
  reg4= AdaBoostRegressor(n_estimators=100)
  print("                       AdaBoost Regression: ")
  reg4.fit(x_train,y_train)
  test_pred=reg4.predict(x_val)
  print('Regression Report - Validation: ')
  regression_report(y_val, test_pred)
  print("\n")

  #KNeighbors Regression
  reg5= KNeighborsRegressor()
  print("                       KNeighbors Regression: ")
  reg5.fit(x_train,y_train)
  test_pred=reg5.predict(x_val)
  print('Regression Report - Validation: ')
  regression_report(y_val, test_pred)
  print("\n")

  #Decision Tree Regression
  reg6= DecisionTreeRegressor()
  print("                       Decision Tree Regression: ")
  reg6.fit(x_train,y_train)
  test_pred=reg6.predict(x_val)
  print('Regression Report - Validation: ')
  regression_report(y_val, test_pred)
  print("\n")

print('                               latDeg')
regression(x_train=x_train1, y_train=y_train1, x_val=x_val1, y_val=y_val1)
print('                               lngDeg')
regression(x_train=x_train2, y_train=y_train2, x_val=x_val2, y_val=y_val2)

In [None]:
reg = LinearRegression()
reg.fit(x1_train,y1_train)
latDeg = reg.predict(x_test1)

reg = LinearRegression()
reg.fit(x2_train,y2_train)
lngDeg = reg.predict(x_test2)

In [None]:
submission=pd.read_csv("../input/google-smartphone-decimeter-challenge/sample_submission.csv")
submission['latDeg']=latDeg
submission['lngDeg']=lngDeg
submission.to_csv("Submission.csv",index=False)
submission.head()