# Introduction

The purpose of this notebook is to download data and process it. As a result we will get a dataset ready for further analyses and modeling.

# Data loading and processing pipeline

In [1]:
# Change folder
%cd ../src

C:\Dev\private\wroclawski-rower-miejski\bikerentals\src


In [2]:
# execute tests to make sure everything is working as expected
! pytest
#! python -m pytest

platform win32 -- Python 3.6.8, pytest-5.2.2, py-1.8.0, pluggy-0.13.0
rootdir: C:\Dev\private\wroclawski-rower-miejski\bikerentals, inifile: pytest.ini
collected 2 items

tests\features\test_season.py ..                                         [100%]



In [3]:
# run data loading and processing pipeline
!python run_pipeline.py

Pipeline execution started
Root folder set to: C:\Dev\private\wroclawski-rower-miejski\bikerentals
* SeasonExtractor *
--> input data shape:  (447409, 10)
--> output data shape:  (447409, 11)
Data saved to: C:\Dev\private\wroclawski-rower-miejski\bikerentals\data\processed\bike_rentals.csv
Pipeline execution completed


# Visual inspection

In [4]:
import os
import pandas as pd

filepath = os.path.join(os.getcwd(), '..', 'data', 'processed', 'bike_rentals.csv')
bike_rentals_df = pd.read_csv(filepath)

In [5]:
bike_rentals_df['Rental datetime'] = pd.to_datetime(bike_rentals_df['Rental datetime'])
bike_rentals_df['Return datetime'] = pd.to_datetime(bike_rentals_df['Return datetime'])
bike_rentals_df['Duration'] = pd.to_timedelta(bike_rentals_df['Duration'])

In [6]:
bike_rentals_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 447409 entries, 0 to 447408
Data columns (total 11 columns):
Bike number                 447409 non-null int64
Rental datetime             447409 non-null datetime64[ns]
Return datetime             447409 non-null datetime64[ns]
Rental station              447409 non-null object
Return station              447409 non-null object
Duration                    447409 non-null timedelta64[ns]
Rental station latitude     389570 non-null float64
Rental station longitude    389570 non-null float64
Return station latitude     392435 non-null float64
Return station longitude    392435 non-null float64
Season                      447409 non-null int64
dtypes: datetime64[ns](2), float64(4), int64(2), object(2), timedelta64[ns](1)
memory usage: 37.5+ MB


In [7]:
bike_rentals_df.head()

Unnamed: 0,Bike number,Rental datetime,Return datetime,Rental station,Return station,Duration,Rental station latitude,Rental station longitude,Return station latitude,Return station longitude,Season
0,57719,2019-06-26 00:00:10,2019-06-26 00:06:19,Skarbowców / Wietrzna,Krzycka / Aleja Karkonoska (Park Południowy),00:06:09,51.07329,16.99485,51.074992,17.007058,3
1,650480,2019-06-26 00:00:13,2019-06-26 00:06:59,Rynek,Plac Legionów,00:06:46,51.109782,17.030175,51.104413,17.022536,3
2,650988,2019-06-26 00:00:15,2019-06-26 00:13:32,Poza oficjalną stacją,Wałbrzyska - pętla tramwajowa,00:13:17,,,51.065777,16.988575,3
3,57603,2019-06-26 00:00:21,2019-06-26 00:23:53,Plac Uniwersytecki (UWr),Legnicka / Wejherowska,00:23:32,51.113871,17.034484,51.125276,16.984447,3
4,650067,2019-06-26 00:00:40,2019-06-26 00:04:40,Powstańców Śląskich (Arkady Wrocławskie),Powstańców Śląskich (Arkady Wrocławskie),00:04:00,51.099713,17.027905,51.099713,17.027905,3


In [8]:
bike_rentals_df.tail()

Unnamed: 0,Bike number,Rental datetime,Return datetime,Rental station,Return station,Duration,Rental station latitude,Rental station longitude,Return station latitude,Return station longitude,Season
447404,57818,2019-10-25 11:56:00,2019-10-25 11:58:00,al. Armii Krajowej / Tarnogajska,al. Armii Krajowej / Tarnogajska,00:02:00,51.083551,17.060519,51.083551,17.060519,4
447405,650926,2019-10-25 11:56:00,2019-10-25 11:57:00,Trymanda / Mińska,Trymanda / Mińska,00:01:00,51.103216,16.9461,51.103216,16.9461,4
447406,650036,2019-10-25 11:57:00,2019-10-25 11:57:00,Plac Powstańców Warszawy (Muzeum Narodowe),Plac Powstańców Warszawy (Muzeum Narodowe),00:00:00,51.110001,17.047736,51.110001,17.047736,4
447407,57435,2019-10-25 11:57:00,2019-10-25 12:00:00,Krucza / Mielecka / Stalowa,Grabiszyńska / Stalowa,00:03:00,51.093426,17.002893,51.099354,17.000697,4
447408,57636,2019-10-25 11:58:00,2019-10-25 12:01:00,Trymanda / Mińska,Trymanda / Mińska,00:03:00,51.103216,16.9461,51.103216,16.9461,4
