In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
import matplotlib.pyplot as plt
from scipy.stats import percentileofscore
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import *

In [None]:
#df_test = pd.read_csv("../input/how-much-did-it-rain-ii/test.zip")
df = pd.read_csv("../input/how-much-did-it-rain-ii/train.zip")

In [None]:
def process_data(df):
    df_mean = df.groupby('Id').mean()
    df_mean.fillna(0,inplace=True)
    df_mean.reset_index(inplace=True)
    return df_mean

In [None]:
df_mean = process_data(df)

In [None]:
df_mean['Expected'].describe()

In [None]:
plt.figure(figsize=(15,10))
df_mean['Expected'].plot(kind='box')
plt.show()

In [None]:
df_sample = df_mean.sample(n=100000)

In [None]:
df_sample['Expected']

In [None]:
plt.figure(figsize=(15,10))
plt.scatter([x for x in range(len(df_mean['Expected'].unique()))], df_mean['Expected'].unique())
plt.show()

In [None]:
plt.figure(figsize=(15,10))
plt.hist(df_mean['Expected'].unique())
plt.show()

In [None]:
percentileofscore(df_mean['Expected'],df_mean['Expected'].mean())

In [None]:
df_mean.drop(df_mean[df_mean['Expected'] > df_mean['Expected'].mean()].index,inplace=True)

In [None]:
df_sample = df_mean.sample(10000)

In [None]:
df_sample.shape

In [None]:
features = [f for f in df_sample.columns]
features.remove('Expected')
features.remove('Id')


In [None]:
X = df_sample[features]
y = df_sample['Expected']

In [None]:
scaler = StandardScaler()
x_train,x_test,y_train,y_test = train_test_split(scaler.fit_transform(X),y,test_size=0.3,random_state=0)

In [None]:
LR = LinearRegression()
LR.fit(x_train,y_train)

In [None]:
LR.score(x_test,y_test)

In [None]:
print(mean_squared_error(y_test,LR.predict(x_test)))

In [None]:
print(mean_absolute_error(y_test,LR.predict(x_test)))

In [None]:
RFR = RandomForestRegressor()
RFR.fit(x_train,y_train)

In [None]:
RFR.score(x_test,y_test)

In [None]:
print(mean_squared_error(y_test,RFR.predict(x_test)))

In [None]:
print(mean_absolute_error(y_test,RFR.predict(x_test)))

In [None]:
SVM = SVR()
SVM.fit(x_train,y_train)

In [None]:
SVM.score(x_test,y_test)

In [None]:
print(mean_squared_error(y_test,SVM.predict(x_test)))

In [None]:
print(mean_absolute_error(y_test,SVM.predict(x_test)))

In [None]:
MLP = MLPRegressor(max_iter=1000)
MLP.fit(x_train,y_train)

In [None]:
MLP.score(x_test,y_test)

In [None]:
print(mean_squared_error(y_test,MLP.predict(x_test)))

In [None]:
print(mean_absolute_error(y_test,MLP.predict(x_test)))

In [None]:
ABR = AdaBoostRegressor()
ABR.fit(x_train,y_train)

In [None]:
ABR.score(x_test,y_test)

In [None]:
print(mean_squared_error(y_test,ABR.predict(x_test)))

In [None]:
print(mean_absolute_error(y_test,ABR.predict(x_test)))

In [None]:
XGB = xgb.XGBRegressor()
XGB.fit(x_train,y_train)

In [None]:
XGB.score(x_test,y_test)

In [None]:
print(mean_squared_error(y_test,XGB.predict(x_test)))

In [None]:
print(mean_absolute_error(y_test,XGB.predict(x_test)))

In [None]:
df_sub = pd.read_csv("../input/how-much-did-it-rain-ii/test.zip")

In [None]:
df_sub_mean = process_data(df_sub)

In [None]:
y_preds = SVM.predict(scaler.fit_transform(df_sub_mean[features]))

In [None]:
sub = pd.DataFrame(columns=['Id','Expected'])

In [None]:
sub['Id'] = df_sub_mean['Id']
sub['Expected'] = y_preds

In [None]:
sub.to_csv('submission.csv',index=False)