In [5]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
from importlib import reload
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
pd.options.display.max_columns = 999
pd.options.display.max_rows = 99
import ReadFromDB as RDB

In [170]:
def station_function(i):
    """
    Creates a sparse matrix from station information on database
    """
    
    station = RDB.add_times(RDB.create_station_dictionary(i)[i])
    weather = RDB.weather_times(RDB.read_weather(i))
#     df = pd.merge(station, weather, left_on=['date', 'hour', 'first_half_hour', 'second_half_hour'], right_on=['date', 'hour', 'first_half_hour', 'second_half_hour'], how='inner')
#     df.drop_duplicates(subset='last_updated', inplace=True)
#     df.drop_duplicates(subset='time', inplace=True)
    df = pd.merge_asof(station, weather, left_on=['last_updated'], right_on=['time'], tolerance=600, direction='nearest')
    df = df.dropna()
    df = df.reset_index()
    df.drop('index', axis=1, inplace=True)
    
    days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    weather_icons = ['partly-cloudy-day',
                     'partly-cloudy-night',
                     'clear-night',
                     'clear-day',
                     'fog',
                     'wind',
                     'cloudy',
                     'rain'
                    ]
    weather_cols = ['apparentTemperature',
                    'cloudCover',
                    'dewPoint',
                    'precipIntensity',
                    'precipProbability',
                    'pressure',
                    'windBearing',
                    'windGust',
                    'uvIndex',
                    'visibility'
                    ]
    
    for day in days:
        df[day] = 0
    
    for icon in weather_icons:
        df[icon] = 0
    
    for j in df.index:
        val = df.loc[j, 'day_x']
        for day in days:
            if val == day:
                df.loc[j, day] = 1
        
        icon_val = df.loc[j, 'icon']
        for icon in weather_icons:
            if icon_val == icon:
                df.loc[j, icon] = 1
     
    data = pd.DataFrame((df.Mon, df.Tue, df.Wed, df.Thu, df.Fri, df.Sat, df.Sun, df.hour_x)).T #df.first_half_hour, df.second_half_hour)).T
    
    for icon in weather_icons:
        data[icon] = df[icon]
    for col in weather_cols:
        data[col] = df[col]
   
    label = pd.DataFrame((df.available_bikes))
    
    return [df, data, label]

In [191]:
def error(predictions, labels):
    errors = list()
    new_preds = list()

    for i in predictions:
        if i<0:
            new_preds.append(0)
        elif i>20:
            new_preds.append(20)
        else:
            new_preds.append(i)
    
    for i, pred in enumerate(new_preds):
        errors.append(pred - labels[i])
    errors_sq = [i**2 for i in errors]
    print("RMSE:", np.sqrt(np.mean(errors_sq)))

In [171]:
%%time
[station, data, label] = station_function(3)

Wall time: 19.4 s


In [172]:
%%time
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.2, shuffle=False)

Wall time: 5.98 ms


In [7]:
scaler = StandardScaler()

In [16]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
scaler.fit(y_train)
Y_train = scaler.transform(y_train)
Y_test = scaler.transform(y_test)

  return self.partial_fit(X, y)
  """
  


In [174]:
from sklearn.neural_network import MLPRegressor

clf = MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1)
clf.fit(X_train, np.array(y_train).ravel())

MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(15,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='lbfgs', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [175]:
predictions = clf.predict(X_test)
labels = np.array(y_test)

In [176]:
print("\nAbs. Prediction\t-\tLabel")
for i, pred in enumerate(predictions):
    print()
    print(abs(pred), "\t-\t", "\t-\t", labels[i][0])


Abs. Prediction	-	Label

4.817941455818422 	-	 	-	 14

5.042532972340235 	-	 	-	 14

5.898805981585516 	-	 	-	 14

5.862784154810578 	-	 	-	 14

5.735238263599429 	-	 	-	 14

5.6822583435569936 	-	 	-	 14

5.60814874940621 	-	 	-	 14

5.560996018259138 	-	 	-	 15

4.807130799267137 	-	 	-	 15

4.784926242756297 	-	 	-	 15

4.745487124562219 	-	 	-	 14

4.716257143244031 	-	 	-	 13

4.7097559310581385 	-	 	-	 13

4.683184974276648 	-	 	-	 12

4.332362936778248 	-	 	-	 12

4.348076263678751 	-	 	-	 12

4.310980334014532 	-	 	-	 12

4.196683791113452 	-	 	-	 12

4.1130307542804605 	-	 	-	 12

4.109761886975148 	-	 	-	 11

4.1099473636269455 	-	 	-	 10

3.9778683155353627 	-	 	-	 10

3.978399839262553 	-	 	-	 10

3.9839064855796793 	-	 	-	 10

3.9989174206200624 	-	 	-	 9

4.003441587928114 	-	 	-	 10

3.9212218219786394 	-	 	-	 9

3.9250233887645103 	-	 	-	 9

3.9408770056725353 	-	 	-	 7

4.3206973588186255 	-	 	-	 6

4.326449575982556 	-	 	-	 6

4.342581422457593 	-	 	-	 5

4.352109884

4.668504619406518 	-	 	-	 5

4.869291938904572 	-	 	-	 5

4.869291938904572 	-	 	-	 5

4.816981367076886 	-	 	-	 3

4.836117273389119 	-	 	-	 2

4.836117273389119 	-	 	-	 3

4.812001114009815 	-	 	-	 3

4.834441726386716 	-	 	-	 3

4.859579673685468 	-	 	-	 2

4.871953779227907 	-	 	-	 0

4.773639150410061 	-	 	-	 1

4.782877470508778 	-	 	-	 0

4.794971826856772 	-	 	-	 0

4.824030168303095 	-	 	-	 0

4.837381616578446 	-	 	-	 0

4.852072105967755 	-	 	-	 0

4.86640603325216 	-	 	-	 0

4.879757481527507 	-	 	-	 1

4.902587209341577 	-	 	-	 1

4.7759545256942975 	-	 	-	 2

4.700539342668106 	-	 	-	 1

4.709898467425939 	-	 	-	 3

4.721549854989549 	-	 	-	 6

4.729515657293897 	-	 	-	 5

4.742395494852312 	-	 	-	 9

4.752597295759051 	-	 	-	 9

4.762187159143155 	-	 	-	 9

4.773853473278367 	-	 	-	 7

4.779985396621191 	-	 	-	 8

4.801930249890058 	-	 	-	 8

4.9479304412839 	-	 	-	 7

4.955916810117637 	-	 	-	 5

4.964692734232846 	-	 	-	 4

4.971012328201743 	-	 	-	 5

4.98128287823903

5.265934497869294 	-	 	-	 2

5.272122645636317 	-	 	-	 2

5.261895748957274 	-	 	-	 3

5.332464425719602 	-	 	-	 3

5.326120365311882 	-	 	-	 4

5.314217961899578 	-	 	-	 4

5.305244069063489 	-	 	-	 3

5.301299154705584 	-	 	-	 3

5.292754059132156 	-	 	-	 5

5.27911265141597 	-	 	-	 5

5.25835178234937 	-	 	-	 5

5.230251870046347 	-	 	-	 4

5.216572720213401 	-	 	-	 5

5.196805797910807 	-	 	-	 6

5.183189406923228 	-	 	-	 7

5.149743334787713 	-	 	-	 8

5.149743334787713 	-	 	-	 10

5.453353150408011 	-	 	-	 11

5.453353150408011 	-	 	-	 12

5.449498006494521 	-	 	-	 11

5.4197578225659715 	-	 	-	 11

5.4197578225659715 	-	 	-	 8

5.418672043902431 	-	 	-	 8

5.413595715885156 	-	 	-	 8

5.438625181120324 	-	 	-	 7

5.427152045014447 	-	 	-	 7

4.967414052339615 	-	 	-	 7

4.9438801543767905 	-	 	-	 8

4.958449336664485 	-	 	-	 8

4.952511661716416 	-	 	-	 8

4.940328652354646 	-	 	-	 7

4.865210175440568 	-	 	-	 7

5.034837472513873 	-	 	-	 8

5.020735660179492 	-	 	-	 7

5.226299

4.383198350699251 	-	 	-	 2

4.385428467241157 	-	 	-	 2

4.386310066356733 	-	 	-	 0

4.402793853181736 	-	 	-	 5

4.4029384409538945 	-	 	-	 7

4.397303056981928 	-	 	-	 7

4.3945902462305835 	-	 	-	 7

4.391646750998932 	-	 	-	 7

4.394054617031811 	-	 	-	 6

4.396202604577796 	-	 	-	 7

4.397921794861125 	-	 	-	 8

4.394829026175676 	-	 	-	 7

4.381854931320335 	-	 	-	 8

4.381854931320335 	-	 	-	 8

4.373926185108894 	-	 	-	 8

4.372302576468838 	-	 	-	 9

4.3714152266633395 	-	 	-	 9

4.368164117489227 	-	 	-	 8

4.372787931345487 	-	 	-	 9

4.410410771427509 	-	 	-	 7

4.4136955441698 	-	 	-	 6

4.409803605433856 	-	 	-	 5

4.413839812771078 	-	 	-	 4

4.416247678803961 	-	 	-	 0

4.453718156615482 	-	 	-	 0

4.446334503537946 	-	 	-	 2

4.444745119739659 	-	 	-	 3

4.44269245073424 	-	 	-	 5

4.447770319421336 	-	 	-	 5

4.456127673571357 	-	 	-	 5

4.615517927980806 	-	 	-	 6

4.619944275143775 	-	 	-	 6

4.627621986026776 	-	 	-	 4

4.5303559522285095 	-	 	-	 4

4.33195558266

6.803084574661581 	-	 	-	 13

7.086278292633962 	-	 	-	 13

7.080001025028745 	-	 	-	 13

7.08681637599887 	-	 	-	 13

7.088165485791022 	-	 	-	 13

7.086074518314165 	-	 	-	 13

7.086074518314165 	-	 	-	 12

7.153203398964331 	-	 	-	 11

7.06637358463764 	-	 	-	 10

7.0592251507073325 	-	 	-	 9

7.072692639909262 	-	 	-	 9

7.058115791203621 	-	 	-	 8

7.038464479503821 	-	 	-	 8

7.038464479503821 	-	 	-	 8

7.05920004439689 	-	 	-	 7

7.0784769419593925 	-	 	-	 6

7.0471925376147215 	-	 	-	 6

7.028133099455948 	-	 	-	 6

6.926631356174935 	-	 	-	 6

6.914220132515174 	-	 	-	 6

6.9065329179146975 	-	 	-	 4

6.8771117152770405 	-	 	-	 3

6.81488626348033 	-	 	-	 4

6.8086359234601 	-	 	-	 4

6.7970806818276674 	-	 	-	 4

6.794296637868362 	-	 	-	 2

6.783458769489075 	-	 	-	 0

6.770959211334263 	-	 	-	 0

6.749973024806236 	-	 	-	 0

6.929619696904593 	-	 	-	 0

6.9137780130987405 	-	 	-	 0

6.868571380078587 	-	 	-	 1

6.868571380078587 	-	 	-	 0

6.8087104224819 	-	 	-	 0

6.8164

In [177]:
errors = list()
new_preds = list()

for i in predictions:
    if i<0:
        new_preds.append(0)
    elif i>20:
        new_preds.append(20)
    else:
        new_preds.append(i)

In [178]:
for i, pred in enumerate(new_preds):
    errors.append(pred - labels[i][0])

In [179]:
errors_sq = [i**2 for i in errors]

In [180]:
np.sqrt(np.mean(errors_sq))

4.827544772243516

In [182]:
clf.score(X_test, y_test)

0.11115375765543734

In [168]:
def MLP(X_train, y_train):
    clf = MLPRegressor(activation='relu',
                       solver='adam',
                       max_iter=1000,
                       alpha=1e-5,
                       random_state=1)
    clf.fit(X_train, np.array(y_train).ravel())
    predictions = clf.predict(X_test)
    errors = list()
    new_preds = list()

    for i in predictions:
        if i<0:
            new_preds.append(0)
        elif i>20:
            new_preds.append(20)
        else:
            new_preds.append(i)
            
    for i, pred in enumerate(new_preds):
        errors.append(pred - labels[i][0])
    errors_sq = [i**2 for i in errors]
    print("RMSE:",np.sqrt(np.mean(errors_sq)))
    print("Score:", clf.score(X_test, y_test))
    return clf

In [183]:
%%time
# activation logistic
clf = MLP(X_train, y_train)

RMSE: 5.690360263493611
Score: -0.27954524936338054
Wall time: 3.12 s


In [167]:
%%time
# activation tanh
clf = MLP(X_train, y_train)

RMSE: 4.593399673042915
Score: 0.39482147445155114
Wall time: 2.64 s


In [169]:
%%time
# activation relu
clf = MLP(X_train, y_train)
clf.get_params()

RMSE: 4.530951040348133
Score: 0.3864992327202252
Wall time: 1.98 s


In [189]:
import sklearn.linear_model
ridge_clf = sklearn.linear_model.Ridge(alpha=0.5)
ridge_clf.fit(X_train, y_train)
ridge_clf.predict(X_test)

array([[10.22732493],
       [10.08101274],
       [ 9.87888425],
       ...,
       [ 8.78715547],
       [ 9.50881516],
       [ 9.0391706 ]])

In [193]:
error(ridge_clf.predict(X_test), y_test)

KeyError: 0