In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import os

# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import scipy.stats as stats

import seaborn as sns

from datetime import datetime

import re

from tqdm.notebook import tqdm

from tools import * 

config = pd.Series({
    'devide_by':1,
    'year':2019,
    'datafrom': 'BICING_ESTACIONS',
    'dataset': 'BicingNou_MOD',
    'ttl': 30,
    'month': 1,
    'monthname': 'Gener'
})

os.system(f"mkdir -p ../dades/{config.year}/{config.dataset}")


0

# Data Gener 2019 

# Visualize data

In [2]:
# re read file
dades_2019_Gener_info_old = pd.read_csv(f'../dades/{config.year}/{config.dataset}/{config.year}_{config.month:02d}_{config.monthname}_{config.dataset}.csv', low_memory=False)

In [3]:
dades_2019_Gener_info_old

Unnamed: 0,station_id,physical_configuration,lat,lon,street_name,num_docks_available,num_bikes_available,status,last_updated,is_installed,is_renting,is_returning,is_charging_station,num_bikes_available_types.mechanical,num_bikes_available_types.ebike,post_code,street_number,capacity,altitude
0,1,0,41.397952,2.180042,Gran Via Corts Catalanes,10,16,1,1546301100,1,1,1,1,16,0,0,760,30,21
1,1,0,41.397952,2.180042,Gran Via Corts Catalanes,10,16,1,1546301340,1,1,1,1,16,0,0,760,30,21
2,1,0,41.397952,2.180042,Gran Via Corts Catalanes,10,16,1,1546301640,1,1,1,1,16,0,0,760,30,21
3,1,0,41.397952,2.180042,Gran Via Corts Catalanes,9,17,1,1546302000,1,1,1,1,17,0,0,760,30,21
4,1,0,41.397952,2.180042,Gran Via Corts Catalanes,8,18,1,1546302240,1,1,1,1,18,0,0,760,30,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3977986,194,0,41.381013,2.132319,Joan Güell,25,0,0,1548977640,1,1,1,1,0,0,0,98,25,40
3977987,194,0,41.381013,2.132319,Joan Güell,25,0,0,1548977940,1,1,1,1,0,0,0,98,25,40
3977988,194,0,41.381013,2.132319,Joan Güell,25,0,0,1548978240,1,1,1,1,0,0,0,98,25,40
3977989,194,0,41.381013,2.132319,Joan Güell,25,0,0,1548978540,1,1,1,1,0,0,0,98,25,40


In [4]:
print_duplicates(dades_2019_Gener_info_old, ['station_id', 'last_updated'])

physical_configuration                  1
lat                                     1
lon                                     1
street_name                             1
num_docks_available                     1
num_bikes_available                     1
status                                  1
is_installed                            1
is_renting                              1
is_returning                            1
is_charging_station                     1
num_bikes_available_types.mechanical    1
num_bikes_available_types.ebike         1
post_code                               1
street_number                           1
capacity                                1
altitude                                1
dtype: int64

In [5]:
get_features_nans(dades_2019_Gener_info_old)

{}

In [6]:
get_features_zero(dades_2019_Gener_info_old)

{'physical_configuration': 90.32194894357478,
 'num_docks_available': 3.3191880021850224,
 'num_bikes_available': 23.097161356071442,
 'status': 14.146386957637661,
 'num_bikes_available_types.mechanical': 23.097161356071442,
 'num_bikes_available_types.ebike': 100.0,
 'post_code': 100.0,
 'altitude': 3.694100866492659}

In [7]:
get_columns_unique(dades_2019_Gener_info_old)

{'station_id': array([  1, 320, 319, 318, 317, 316, 315, 314, 313, 312, 311, 310, 321,
        309, 307, 306, 305, 304, 303, 302, 301, 300, 299, 298, 297, 308,
        322, 323, 324, 349, 348, 347, 346, 345, 344, 343, 342, 341, 340,
        339, 338, 337, 336, 335, 334, 333, 332, 331, 330, 329, 328, 327,
        326, 325, 296, 350, 295, 292, 261, 260, 259, 258, 256, 255, 254,
        253, 252, 251, 250, 262, 249, 247, 246, 244, 243, 242, 241, 240,
        239, 238, 237, 236, 248, 263, 264, 265, 291, 289, 288, 287, 286,
        285, 284, 283, 282, 281, 280, 279, 278, 277, 276, 275, 274, 273,
        272, 271, 270, 269, 268, 267, 266, 294, 352, 353, 354, 466, 465,
        464, 463, 462, 461, 460, 459, 458, 457, 456, 467, 455, 453, 452,
        451, 428, 427, 426, 425, 424, 423, 421, 420, 454, 468, 469, 470,
        496, 495, 494, 493, 492, 491, 489, 488, 487, 486, 485, 484, 483,
        482, 481, 480, 479, 478, 477, 476, 475, 474, 473, 472, 471, 419,
        418, 416, 415, 381, 380, 379,