In [1]:
import os
import pandas as pd
import glob
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import numpy as np
from datetime import datetime

In [2]:
path_input=os.path.join(os.getcwd(), '../data/input/dresden/')
path_output_weather = os.path.join(os.getcwd(), '../data/output/weather/')
path_output=os.path.join(os.getcwd(), '../data/output/')

## Read Data

In [3]:
directors = glob.glob(path_input+"*/**")

all_files = []
for director in directors:
    all_files=all_files+glob.glob(director+"/*.csv")
    
li = []

for filename in all_files:
    df_read = pd.read_csv(filename, index_col=None, header=0)
    li.append(df_read)

df = pd.concat(li, axis=0, ignore_index=True)

In [4]:
#df["datetime"].sort_values()[1450300:1450350]

## Drop Columns

In [5]:
#Columns including single value
#The following columns only included one value (or NaN values)
#city : ['dresden']
#p_rack_locks : [False]
#b_state : ['ok']
#b_active : [ True]
#b_battery_pack : [nan '{"percentage": 0}']
#p_special_racks : [nan 0.]
#p_free_special_racks : [nan 0.]

#Columns which highly correlate

#The following columns highly correlate - meaning they are complementary booleans or if one is high, the other is too

#p_bike [corr(p_spot)=-0.999501, complementary boolean to p_spot]

df_cleaned = df.drop(["city", 
                      "p_bike", 
                      "p_rack_locks", 
                      "b_active", 
                      "b_state", 
                      "b_battery_pack", 
                      "p_rack_locks", 
                      "p_special_racks", 
                      "p_free_special_racks",
                      "p_address", # These 4 last columns will be dropped because there are too many stored NaN values
                      "p_bike_types",
                      "b_pedelec_battery",
                      "p_terminal_type"], axis=1)
df_cleaned.head()

Unnamed: 0,p_spot,b_lock_types,p_maintenance,p_bike_racks,p_place_type,p_number,p_uid,b_number,p_free_racks,b_boardcomputer,datetime,p_lng,b_electric_lock,p_bikes,trip,p_booked_bikes,p_name,b_bike_type,p_lat
0,True,frame_lock,False,0,0,4505.0,10299865,93153,0,7551006387,2019-03-03 00:00:00,13.767586,True,3,first,0,Altleubitz,0,51.015597
1,True,frame_lock,False,0,0,4505.0,10299865,93153,0,7551006387,2019-03-03 23:59:00,13.767586,True,3,last,0,Altleubitz,0,51.015597
2,False,frame_lock,False,0,12,0.0,13101449,93616,0,7551006429,2019-03-03 00:00:00,13.752801,True,1,first,0,BIKE 93616,15,51.065062
3,False,frame_lock,False,0,12,0.0,13101449,93616,0,7551006429,2019-03-03 01:12:00,13.752801,True,1,start,0,BIKE 93616,15,51.065062
4,False,frame_lock,False,0,12,0.0,13103279,93616,0,7551006429,2019-03-03 01:16:00,13.752313,True,1,end,0,BIKE 93616,15,51.064738


## Create Trips

In [6]:
# No Duplicates
len(df_cleaned)-len(df_cleaned.drop_duplicates(subset=df_cleaned.columns.difference(["p_lat", "p_lng"])))

0

In [7]:
print("Starts:", len(df_cleaned[df_cleaned["trip"]=="start"]))
print("Ends:", len(df_cleaned[df_cleaned["trip"]=="end"]))
print("Difference:", len(df_cleaned[df_cleaned["trip"]=="start"])-len(df_cleaned[df_cleaned["trip"]=="end"]))

Starts: 583206
Ends: 567439
Difference: 15767


In [8]:
# df_2: start and end of trips
df_2 = df_cleaned[(df_cleaned["trip"]=="start") | (df_cleaned["trip"]=="end")]
print("Dropped", len(df_cleaned)-len(df_2), "first and last Values")

Dropped 307709 first and last Values


In [9]:
# df_3: start and end of trips sorted by b_number and datetime
df_3=df_2.sort_values(["b_number", "datetime"])
df_3[["b_number", "datetime", "trip"]].iloc[500010:500030] # two examples for multiple starts

Unnamed: 0,b_number,datetime,trip
446639,93547,2019-02-12 21:52:00,start
446640,93547,2019-02-12 21:54:00,start
446641,93547,2019-02-12 21:56:00,end
446642,93547,2019-02-12 22:11:00,start
446643,93547,2019-02-12 22:14:00,end
446644,93547,2019-02-12 22:27:00,start
446645,93547,2019-02-12 22:30:00,end
446646,93547,2019-02-12 23:24:00,start
446647,93547,2019-02-12 23:27:00,start
446648,93547,2019-02-12 23:31:00,end


In [10]:
# sr_1: True if after start entry comes end entry else False
sr_1 = (df_3['trip'] != df_3['trip'].shift())
df_3["valid_start"]=sr_1

# sr_2: 
#sr_2 = (df_3['trip'] != df_3['trip'].shift(-1))
#df_3['valid_end'] = sr_2


In [11]:
# df_4: start and end of trips sorted by b_number and datetime only valid starts
df_4=df_3[df_3["valid_start"]==True]
print("Deleted entries", len(df_3)-len(df_4))
print("Correct entries", len(df_4))

Deleted entries 15767
Correct entries 1134878


In [12]:
# Tests if there are values with two ends after each other
test = df_4["trip"] != df_4["trip"].shift(-1)
print("Entries where Bike X ends with an end and bike Y starts with an end")
pd.DataFrame(test)[pd.DataFrame(test)["trip"]==False]

Entries where Bike X ends with an end and bike Y starts with an end


Unnamed: 0,trip


In [13]:
df_starts = df_4[df_4["trip"]=="start"].reset_index(drop=True)
df_ends = df_4[df_4["trip"]=="end"].reset_index(drop=True)



In [14]:
print(df_starts.head()[["b_number", "datetime", "trip"]], "\n")
print(df_ends.head()[["b_number", "datetime", "trip"]])

   b_number             datetime   trip
0     11225  2019-04-29 06:31:00  start
1     11225  2019-04-29 11:27:00  start
2     11225  2019-04-29 11:58:00  start
3     11225  2019-04-29 12:22:00  start
4     11225  2019-04-29 13:00:00  start 

   b_number             datetime trip
0     11225  2019-04-29 06:35:00  end
1     11225  2019-04-29 11:33:00  end
2     11225  2019-04-29 12:03:00  end
3     11225  2019-04-29 12:39:00  end
4     11225  2019-04-29 13:14:00  end


In [15]:
df_merged = df_starts.merge(df_ends, left_on=df_starts.index, right_on=df_ends.index, suffixes=("_start", "_end"))
df_merged.drop(["key_0", 
                "valid_start_start", 
                "valid_start_end", 
                "trip_start", 
                "trip_end", 
                "b_number_end", 
                "b_boardcomputer_end", 
                "b_lock_types_end", 
                "b_bike_type_end", 
                "b_electric_lock_end",], axis=1, inplace=True)
df_merged.head()

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,...,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end
0,True,fork_lock,False,0,0,4389.0,264595,11225,0,22919,...,0,4389.0,264595,0,2019-04-29 06:35:00,13.769281,1,0,Gutenbergstraße,51.058771
1,False,fork_lock,False,0,12,0.0,15109859,11225,0,22919,...,12,0.0,15117356,0,2019-04-29 11:33:00,8.770495,1,0,BIKE 11225,50.813655
2,False,fork_lock,False,0,12,0.0,15117356,11225,0,22919,...,12,0.0,15118389,0,2019-04-29 12:03:00,8.772863,1,0,BIKE 11225,50.808976
3,False,fork_lock,False,0,12,0.0,15118389,11225,0,22919,...,12,0.0,15119670,0,2019-04-29 12:39:00,8.77455,1,0,BIKE 11225,50.822621
4,False,fork_lock,False,0,12,0.0,15119670,11225,0,22919,...,12,0.0,15120946,0,2019-04-29 13:14:00,8.763238,1,0,BIKE 11225,50.813241


## Merge Trips with Weather data

In [16]:
df_dwd=pd.read_csv(os.path.join(path_output_weather, "DWD.csv"))
df_dwd['MESS_DATUM'] = pd.to_datetime(df_dwd['MESS_DATUM'].astype(str), format="%Y-%m-%d %H:%M:%S", errors='coerce')

df_merged["datetime_start"] = pd.to_datetime(df_merged["datetime_start"].astype(str), format="%Y-%m-%d %H:%M:%S", errors="coerce")

In [17]:
df_full = pd.merge_asof(df_merged.sort_values('datetime_start'), 
                        df_dwd, left_on='datetime_start', 
                        right_on='MESS_DATUM', 
                        tolerance=pd.Timedelta('30 min'),
                        allow_exact_matches=True, direction='nearest')

In [18]:
df_full.head(5)

Unnamed: 0.1,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,...,p_name_end,p_lat_end,Unnamed: 0,MESS_DATUM,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,...,BIKE 93771,51.046234,1752,2019-01-20,-3.3,75.3,0.0,0.0,0.0,4.3
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,...,BIKE 93576,51.049069,1752,2019-01-20,-3.3,75.3,0.0,0.0,0.0,4.3
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,...,Wundtstr. / Zellescher Weg,51.02837,1752,2019-01-20,-3.3,75.3,0.0,0.0,0.0,4.3
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,...,BIKE 93322,51.041798,1752,2019-01-20,-3.3,75.3,0.0,0.0,0.0,4.3
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,...,Friedensstraße/Conradstr.,51.07174,1752,2019-01-20,-3.3,75.3,0.0,0.0,0.0,4.3


In [19]:
df_full.drop(["Unnamed: 0", "MESS_DATUM"], inplace=True, axis=1)

## Calculate additional datetime columns

In [20]:
df_full["datetime_start"] = pd.to_datetime(df_full["datetime_start"])
df_full["datetime_end"] = pd.to_datetime(df_full["datetime_end"])

df_full["month_start"] = df_full["datetime_start"].dt.month
df_full["month_end"] = df_full["datetime_end"].dt.month
df_full["day_start"] = df_full["datetime_start"].dt.day
df_full["day_end"] = df_full["datetime_end"].dt.day
df_full["day_of_week_start"] = df_full["datetime_start"].dt.dayofweek
df_full["day_of_week_end"] = df_full["datetime_end"].dt.dayofweek
df_full["hour_start"] = df_full["datetime_start"].dt.hour
df_full["hour_end"] = df_full["datetime_end"].dt.hour
df_full["day_of_year_start"] = df_full["datetime_start"].dt.dayofyear
df_full["day_of_year_end"] = df_full["datetime_end"].dt.dayofyear
df_full["season"] = (df_full["month_start"] % 12 + 3) // 3  # winter: 1, spring: 2, summer: 3, fall: 4
df_full["weekend"] = df_full["datetime_start"].dt.dayofweek > 4
df_full["booking_date_start"] = pd.to_datetime(df_full["datetime_start"].dt.date)

## Calculate trip duration

In [21]:
df_full['trip_duration'] = ((df_full['datetime_end'] - df_full['datetime_start']).dt.total_seconds() / 60.0).round(2)

In [22]:
# Activate parameter to show ALL columns from dataframe
pd.set_option('display.max_columns', None)
df_full.head(10)

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,2019-01-20 00:00:00,13.75038,False,1,0,BIKE 93771,15,51.071262,False,False,0,12,0.0,12099518,0,2019-01-20 00:28:00,13.693052,1,0,BIKE 93771,51.046234,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.03821,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.69113,True,1,0,Malterstraße (Haltestelle),0,51.04257,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.74773,3,0,Wundtstr. / Zellescher Weg,51.02837,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.68962,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.68962,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.07174,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.07174,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0
5,True,analog_code_lock,False,0,0,4310.0,45444,93660,0,0,2019-01-20 00:21:00,13.789338,False,1,0,Panometer,15,51.025741,True,False,0,0,4336.0,264532,0,2019-01-20 00:33:00,13.807733,1,0,Altenberger Platz,51.034938,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,12.0
6,False,analog_code_lock,False,0,12,0.0,12098942,93478,0,1206,2019-01-20 00:26:00,13.710755,False,1,0,BIKE 93478,0,51.028163,True,False,0,0,4491.0,10299678,0,2019-01-20 00:28:00,13.742453,1,0,Räcknitzhöhe,51.023416,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0
7,True,analog_code_lock,False,0,0,4392.0,264599,93577,0,0,2019-01-20 00:29:00,13.777725,False,1,0,Mosenstraße,15,51.04474,True,False,0,0,4392.0,264599,0,2019-01-20 00:31:00,13.777725,1,0,Mosenstraße,51.04474,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0
8,True,analog_code_lock,False,0,0,4403.0,38955,93258,0,1322,2019-01-20 00:31:00,13.739294,False,2,0,Palaisplatz,0,51.060231,True,False,0,0,4327.0,121771,0,2019-01-20 01:02:00,13.750328,4,0,Schauburg,51.071144,-3.4,75.0,0.0,0.0,0.0,4.4,1,1,20,20,6,6,0,1,20,20,1,True,2019-01-20,31.0
9,True,analog_code_lock,False,0,0,4348.0,264548,93785,0,22556,2019-01-20 00:33:00,13.722911,False,2,0,Nürnberger Ei,15,51.033767,True,False,0,0,4458.0,4405670,0,2019-01-20 00:49:00,13.74773,5,0,Wundtstr. / Zellescher Weg,51.02837,-3.4,75.0,0.0,0.0,0.0,4.4,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,16.0


In [23]:
df_full.sort_values(by="datetime_start", ascending=False)

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration
567438,False,frame_lock,False,0,12,0.0,27150164,93454,0,7551002419,2020-01-20 23:36:00,13.738484,True,1,0,BIKE 93454,71,51.044302,True,False,0,0,4504.0,10299848,0,2020-01-20 23:50:00,13.729830,17,0,Alexander-Puschkin-Platz,51.070350,-2.4,91.7,0.0,0.0,0.0,3.2,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,14.0
567437,True,frame_lock,False,0,0,4332.0,73,93824,0,7551016033,2020-01-20 23:33:00,13.741220,True,1,0,Bf. Dresden-Neustadt,71,51.065184,True,False,0,0,4356.0,264558,0,2020-01-20 23:45:00,13.723367,3,0,Hubertusplatz,51.089747,-2.4,91.7,0.0,0.0,0.0,3.2,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,12.0
567436,True,frame_lock,False,0,0,4373.0,264575,93334,0,7551005670,2020-01-20 23:31:00,13.741257,True,4,0,Friedensstraße/Conradstr.,71,51.071740,True,False,0,0,4327.0,121771,0,2020-01-20 23:36:00,13.750328,5,0,Schauburg,51.071144,-2.4,91.7,0.0,0.0,0.0,3.2,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,5.0
567435,True,frame_lock,False,0,0,4507.0,11248297,93908,0,7551012906,2020-01-20 23:29:00,13.703979,True,3,0,Tharandter Straße,71,51.043537,True,False,0,0,4458.0,4405670,0,2020-01-20 23:46:00,13.747730,4,0,Wundtstr. / Zellescher Weg,51.028370,-1.6,91.7,0.0,0.0,0.0,3.3,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,17.0
567434,True,frame_lock,False,0,0,4438.0,310792,93857,0,7551012762,2020-01-20 23:29:00,13.701679,True,6,0,Bonhoeffer Platz,71,51.038466,True,False,0,0,4504.0,10299848,0,2020-01-20 23:49:00,13.729830,16,0,Alexander-Puschkin-Platz,51.070350,-1.6,91.7,0.0,0.0,0.0,3.3,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.071740,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.071740,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.691130,True,1,0,Malterstraße (Haltestelle),0,51.042570,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.747730,3,0,Wundtstr. / Zellescher Weg,51.028370,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.689620,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.689620,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.038210,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0


# Calculate average idle time

In [24]:
# Calculate trips number by bike ID
df_full["b_number_start"].value_counts()

93917    1444
93905    1443
93900    1437
93578    1419
93654    1418
         ... 
43414       4
93032       3
93809       3
93787       1
93517       1
Name: b_number_start, Length: 608, dtype: int64

In [25]:
# sort dataset by bike number (= bikeID) and start time of the trip
df_clean_unique_trip = df_full.sort_values(by=["b_number_start", "datetime_start"])
df_clean_unique_trip

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration
125525,True,fork_lock,False,0,0,4389.0,264595,11225,0,22919,2019-04-29 06:31:00,13.769281,True,1,0,Gutenbergstraße,15,51.058771,True,False,0,0,4389.0,264595,0,2019-04-29 06:35:00,13.769281,1,0,Gutenbergstraße,51.058771,7.6,93.7,0.6,1.0,0.0,4.8,4,4,29,29,0,0,6,6,119,119,2,False,2019-04-29,4.0
125683,False,fork_lock,False,0,12,0.0,15109859,11225,0,22919,2019-04-29 11:27:00,8.765875,True,1,0,BIKE 11225,15,50.806704,False,False,0,12,0.0,15117356,0,2019-04-29 11:33:00,8.770495,1,0,BIKE 11225,50.813655,8.1,95.3,4.6,1.0,0.0,5.0,4,4,29,29,0,0,11,11,119,119,2,False,2019-04-29,6.0
125702,False,fork_lock,False,0,12,0.0,15117356,11225,0,22919,2019-04-29 11:58:00,8.770495,True,1,0,BIKE 11225,15,50.813655,False,False,0,12,0.0,15118389,0,2019-04-29 12:03:00,8.772863,1,0,BIKE 11225,50.808976,8.6,93.7,0.2,1.0,0.0,4.8,4,4,29,29,0,0,11,12,119,119,2,False,2019-04-29,5.0
125724,False,fork_lock,False,0,12,0.0,15118389,11225,0,22919,2019-04-29 12:22:00,8.772863,True,1,0,BIKE 11225,15,50.808976,False,False,0,12,0.0,15119670,0,2019-04-29 12:39:00,8.774550,1,0,BIKE 11225,50.822621,8.6,93.7,0.2,1.0,0.0,4.8,4,4,29,29,0,0,12,12,119,119,2,False,2019-04-29,17.0
125771,False,fork_lock,False,0,12,0.0,15119670,11225,0,22919,2019-04-29 13:00:00,8.774550,True,1,0,BIKE 11225,15,50.822621,False,False,0,12,0.0,15120946,0,2019-04-29 13:14:00,8.763238,1,0,BIKE 11225,50.813241,8.7,97.3,0.1,1.0,0.0,4.6,4,4,29,29,0,0,13,13,119,119,2,False,2019-04-29,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
566948,True,frame_lock,False,0,0,4459.0,4405696,93919,0,7551012828,2020-01-20 14:49:00,13.725730,True,10,0,Toeplerbau / Binderbau,71,51.027270,True,False,0,0,4348.0,264548,0,2020-01-20 14:54:00,13.722911,2,0,Nürnberger Ei,51.033767,4.4,69.0,0.0,0.0,11.0,2.8,1,1,20,20,0,0,14,14,20,20,1,False,2020-01-20,5.0
566993,True,frame_lock,False,0,0,4348.0,264548,93919,0,7551012828,2020-01-20 15:35:00,13.722911,True,2,0,Nürnberger Ei,71,51.033767,True,False,0,0,4349.0,264549,0,2020-01-20 15:44:00,13.715690,2,0,Nossener Brücke,51.037127,2.5,76.3,0.0,0.0,9.5,2.2,1,1,20,20,0,0,15,15,20,20,1,False,2020-01-20,9.0
567057,True,frame_lock,False,0,0,4349.0,264549,93919,0,7551012828,2020-01-20 16:25:00,13.715690,True,2,1,Nossener Brücke,71,51.037127,False,False,0,12,0.0,27143281,0,2020-01-20 16:33:00,13.725704,1,0,BIKE 93919,51.050204,2.5,76.3,0.0,0.0,9.5,2.2,1,1,20,20,0,0,16,16,20,20,1,False,2020-01-20,8.0
567372,False,frame_lock,False,0,12,0.0,27143281,93919,0,7551012828,2020-01-20 21:22:00,13.725468,True,1,0,BIKE 93919,71,51.049818,False,False,0,12,0.0,27152329,0,2020-01-20 21:24:00,13.725428,1,0,BIKE 93919,51.049818,-0.5,91.0,0.0,0.0,0.0,2.6,1,1,20,20,0,0,21,21,20,20,1,False,2020-01-20,2.0


In [26]:
# Compare the bike ID of each row with the row below (to check whether the next trip was made with the same bike or not)
# Necessary for computation of idle time
sr_previous_entry_differs = (df_clean_unique_trip["b_number_start"] != df_clean_unique_trip["b_number_start"].shift(-1))
sr_previous_entry_differs

125525    False
125683    False
125702    False
125724    False
125771    False
          ...  
566948    False
566993    False
567057    False
567372    False
567396     True
Name: b_number_start, Length: 567439, dtype: bool

In [27]:
# Compute idle time for each row
df_idle_time_shifted = ((df_clean_unique_trip["datetime_start"].shift(-1)-df_clean_unique_trip["datetime_end"]).dt.total_seconds() / 60.0).round(2)
df_idle_time_shifted

125525    292.0
125683     25.0
125702     19.0
125724     21.0
125771    191.0
          ...  
566948     41.0
566993     41.0
567057    289.0
567372     50.0
567396      NaN
Length: 567439, dtype: float64

In [28]:
# merge both above data frames and rename "idle_time" column
new_df = pd.concat([sr_previous_entry_differs, df_idle_time_shifted], axis=1, join='inner')
new_df.rename(columns = {0: "idle_time"}, inplace=True)
new_df

Unnamed: 0,b_number_start,idle_time
125525,False,292.0
125683,False,25.0
125702,False,19.0
125724,False,21.0
125771,False,191.0
...,...,...
566948,False,41.0
566993,False,41.0
567057,False,289.0
567372,False,50.0


In [29]:
# Necessary because start of next bike (VERY IMPORTANT)
new_df.loc[(new_df["b_number_start"]==True, "idle_time")] = 0
new_df

Unnamed: 0,b_number_start,idle_time
125525,False,292.0
125683,False,25.0
125702,False,19.0
125724,False,21.0
125771,False,191.0
...,...,...
566948,False,41.0
566993,False,41.0
567057,False,289.0
567372,False,50.0


In [30]:
# Shift down the "idle_time" because it is not possible to compute idle time (= bike was rented for the first time)
idle_time_shifted = new_df["idle_time"].shift()
idle_time_shifted.iloc[0] = 0
idle_time_shifted

125525      0.0
125683    292.0
125702     25.0
125724     19.0
125771     21.0
          ...  
566948     10.0
566993     41.0
567057     41.0
567372    289.0
567396     50.0
Name: idle_time, Length: 567439, dtype: float64

In [31]:
# Merge dataframes (1: grouped by bike ID and start of the booking time; 2: computed idle time)
df_with_computed_idle_time = pd.concat([df_clean_unique_trip, idle_time_shifted], axis=1, join='inner')
df_with_computed_idle_time

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time
125525,True,fork_lock,False,0,0,4389.0,264595,11225,0,22919,2019-04-29 06:31:00,13.769281,True,1,0,Gutenbergstraße,15,51.058771,True,False,0,0,4389.0,264595,0,2019-04-29 06:35:00,13.769281,1,0,Gutenbergstraße,51.058771,7.6,93.7,0.6,1.0,0.0,4.8,4,4,29,29,0,0,6,6,119,119,2,False,2019-04-29,4.0,0.0
125683,False,fork_lock,False,0,12,0.0,15109859,11225,0,22919,2019-04-29 11:27:00,8.765875,True,1,0,BIKE 11225,15,50.806704,False,False,0,12,0.0,15117356,0,2019-04-29 11:33:00,8.770495,1,0,BIKE 11225,50.813655,8.1,95.3,4.6,1.0,0.0,5.0,4,4,29,29,0,0,11,11,119,119,2,False,2019-04-29,6.0,292.0
125702,False,fork_lock,False,0,12,0.0,15117356,11225,0,22919,2019-04-29 11:58:00,8.770495,True,1,0,BIKE 11225,15,50.813655,False,False,0,12,0.0,15118389,0,2019-04-29 12:03:00,8.772863,1,0,BIKE 11225,50.808976,8.6,93.7,0.2,1.0,0.0,4.8,4,4,29,29,0,0,11,12,119,119,2,False,2019-04-29,5.0,25.0
125724,False,fork_lock,False,0,12,0.0,15118389,11225,0,22919,2019-04-29 12:22:00,8.772863,True,1,0,BIKE 11225,15,50.808976,False,False,0,12,0.0,15119670,0,2019-04-29 12:39:00,8.774550,1,0,BIKE 11225,50.822621,8.6,93.7,0.2,1.0,0.0,4.8,4,4,29,29,0,0,12,12,119,119,2,False,2019-04-29,17.0,19.0
125771,False,fork_lock,False,0,12,0.0,15119670,11225,0,22919,2019-04-29 13:00:00,8.774550,True,1,0,BIKE 11225,15,50.822621,False,False,0,12,0.0,15120946,0,2019-04-29 13:14:00,8.763238,1,0,BIKE 11225,50.813241,8.7,97.3,0.1,1.0,0.0,4.6,4,4,29,29,0,0,13,13,119,119,2,False,2019-04-29,14.0,21.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
566948,True,frame_lock,False,0,0,4459.0,4405696,93919,0,7551012828,2020-01-20 14:49:00,13.725730,True,10,0,Toeplerbau / Binderbau,71,51.027270,True,False,0,0,4348.0,264548,0,2020-01-20 14:54:00,13.722911,2,0,Nürnberger Ei,51.033767,4.4,69.0,0.0,0.0,11.0,2.8,1,1,20,20,0,0,14,14,20,20,1,False,2020-01-20,5.0,10.0
566993,True,frame_lock,False,0,0,4348.0,264548,93919,0,7551012828,2020-01-20 15:35:00,13.722911,True,2,0,Nürnberger Ei,71,51.033767,True,False,0,0,4349.0,264549,0,2020-01-20 15:44:00,13.715690,2,0,Nossener Brücke,51.037127,2.5,76.3,0.0,0.0,9.5,2.2,1,1,20,20,0,0,15,15,20,20,1,False,2020-01-20,9.0,41.0
567057,True,frame_lock,False,0,0,4349.0,264549,93919,0,7551012828,2020-01-20 16:25:00,13.715690,True,2,1,Nossener Brücke,71,51.037127,False,False,0,12,0.0,27143281,0,2020-01-20 16:33:00,13.725704,1,0,BIKE 93919,51.050204,2.5,76.3,0.0,0.0,9.5,2.2,1,1,20,20,0,0,16,16,20,20,1,False,2020-01-20,8.0,41.0
567372,False,frame_lock,False,0,12,0.0,27143281,93919,0,7551012828,2020-01-20 21:22:00,13.725468,True,1,0,BIKE 93919,71,51.049818,False,False,0,12,0.0,27152329,0,2020-01-20 21:24:00,13.725428,1,0,BIKE 93919,51.049818,-0.5,91.0,0.0,0.0,0.0,2.6,1,1,20,20,0,0,21,21,20,20,1,False,2020-01-20,2.0,289.0


In [32]:
# Add "idle_time" to "df_full" dataframe for later save as CSV file
df_full["idle_time"] = df_with_computed_idle_time["idle_time"]
df_full

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,2019-01-20 00:00:00,13.750380,False,1,0,BIKE 93771,15,51.071262,False,False,0,12,0.0,12099518,0,2019-01-20 00:28:00,13.693052,1,0,BIKE 93771,51.046234,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.038210,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0,0.0
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.691130,True,1,0,Malterstraße (Haltestelle),0,51.042570,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.747730,3,0,Wundtstr. / Zellescher Weg,51.028370,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0,0.0
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.689620,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.689620,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.071740,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.071740,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567434,True,frame_lock,False,0,0,4438.0,310792,93857,0,7551012762,2020-01-20 23:29:00,13.701679,True,6,0,Bonhoeffer Platz,71,51.038466,True,False,0,0,4504.0,10299848,0,2020-01-20 23:49:00,13.729830,16,0,Alexander-Puschkin-Platz,51.070350,-1.6,91.7,0.0,0.0,0.0,3.3,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,20.0,25.0
567435,True,frame_lock,False,0,0,4507.0,11248297,93908,0,7551012906,2020-01-20 23:29:00,13.703979,True,3,0,Tharandter Straße,71,51.043537,True,False,0,0,4458.0,4405670,0,2020-01-20 23:46:00,13.747730,4,0,Wundtstr. / Zellescher Weg,51.028370,-1.6,91.7,0.0,0.0,0.0,3.3,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,17.0,27.0
567436,True,frame_lock,False,0,0,4373.0,264575,93334,0,7551005670,2020-01-20 23:31:00,13.741257,True,4,0,Friedensstraße/Conradstr.,71,51.071740,True,False,0,0,4327.0,121771,0,2020-01-20 23:36:00,13.750328,5,0,Schauburg,51.071144,-2.4,91.7,0.0,0.0,0.0,3.2,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,5.0,125.0
567437,True,frame_lock,False,0,0,4332.0,73,93824,0,7551016033,2020-01-20 23:33:00,13.741220,True,1,0,Bf. Dresden-Neustadt,71,51.065184,True,False,0,0,4356.0,264558,0,2020-01-20 23:45:00,13.723367,3,0,Hubertusplatz,51.089747,-2.4,91.7,0.0,0.0,0.0,3.2,1,1,20,20,0,0,23,23,20,20,1,False,2020-01-20,12.0,99.0


## Creating temporal resolution features

In [33]:
df_full.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 567439 entries, 0 to 567438
Data columns (total 52 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   p_spot_start           567439 non-null  bool          
 1   b_lock_types_start     567439 non-null  object        
 2   p_maintenance_start    567439 non-null  bool          
 3   p_bike_racks_start     567439 non-null  int64         
 4   p_place_type_start     567439 non-null  int64         
 5   p_number_start         567377 non-null  float64       
 6   p_uid_start            567439 non-null  int64         
 7   b_number_start         567439 non-null  int64         
 8   p_free_racks_start     567439 non-null  int64         
 9   b_boardcomputer_start  567439 non-null  int64         
 10  datetime_start         567439 non-null  datetime64[ns]
 11  p_lng_start            567439 non-null  float64       
 12  b_electric_lock_start  567439 non-null  bool

### How we want to get temporal resolutions
- add 4 columns for each resolution: 24, 6, 2, one hour
- they display # bookings within given time period

### Function to aggregate datasets if needed

In [34]:
#data by timespan 24H, 1H, 4H, 12H
def setDataset(dataset, temp_resol, columnnamegroupby, functions_dic):
    newDataset = dataset.resample(temp_resol, on =columnnamegroupby).agg(functions_dic)
    return newDataset
#requested resolutions
temp_resol = ['24H', '6H', '2H', '1H']

In [35]:
df_full["count"] = 1
setDataset(df_full,'24H','datetime_start',{"count":"count"})

Unnamed: 0_level_0,count
datetime_start,Unnamed: 1_level_1
2019-01-20,265
2019-01-21,529
2019-01-22,477
2019-01-23,530
2019-01-24,1092
...,...
2020-01-16,1520
2020-01-17,1033
2020-01-18,810
2020-01-19,733


### Creating series with # bookings for given time interval

In [36]:
dataset = df_full.copy()

In [37]:
#base -> Shifts the base time to calculate from by some time amount
#shifting the base to the middle value of given time interval solved the NaN problem.
#Why? Because of our timedelta, which is half the amount of time of the resampled time interval 
#--> for time interval x we want to sum up all in!
#example: for daily counts aka 24 hours the base will be 12 am, the sum will be from 00:00 am until 23:59 pm
df_24 = dataset.resample("1D",on="datetime_start").agg({"count":"count"})
df_6 = dataset.resample("6H", base=3, on="datetime_start").agg({"count":"count"})
df_2 = dataset.resample("2H", base=1, on="datetime_start").agg({"count":"count"})
df_1 = dataset.resample("1H", base=0.5, on="datetime_start").agg({"count":"count"})
# df_24 = dataset.resample("1D",  on="datetime_start").agg({"count":"count"})
# df_6 = dataset.resample("6H", on="datetime_start").agg({"count":"count"})
# df_2 = dataset.resample("2H", on="datetime_start").agg({"count":"count"})
# df_1 = dataset.resample("1H",  on="datetime_start").agg({"count":"count"})

In [38]:
display(df_24,df_6,df_2,df_1)
#all NaN lol

Unnamed: 0_level_0,count
datetime_start,Unnamed: 1_level_1
2019-01-20,265
2019-01-21,529
2019-01-22,477
2019-01-23,530
2019-01-24,1092
...,...
2020-01-16,1520
2020-01-17,1033
2020-01-18,810
2020-01-19,733


Unnamed: 0_level_0,count
datetime_start,Unnamed: 1_level_1
2019-01-19 21:00:00,36
2019-01-20 03:00:00,33
2019-01-20 09:00:00,93
2019-01-20 15:00:00,93
2019-01-20 21:00:00,19
...,...
2020-01-19 21:00:00,106
2020-01-20 03:00:00,144
2020-01-20 09:00:00,406
2020-01-20 15:00:00,402


Unnamed: 0_level_0,count
datetime_start,Unnamed: 1_level_1
2019-01-19 23:00:00,19
2019-01-20 01:00:00,17
2019-01-20 03:00:00,8
2019-01-20 05:00:00,4
2019-01-20 07:00:00,21
...,...
2020-01-20 15:00:00,170
2020-01-20 17:00:00,127
2020-01-20 19:00:00,105
2020-01-20 21:00:00,52


Unnamed: 0_level_0,count
datetime_start,Unnamed: 1_level_1
2019-01-19 23:30:00,8
2019-01-20 00:30:00,17
2019-01-20 01:30:00,7
2019-01-20 02:30:00,8
2019-01-20 03:30:00,2
...,...
2020-01-20 19:30:00,61
2020-01-20 20:30:00,31
2020-01-20 21:30:00,21
2020-01-20 22:30:00,33


### Merging those temporal datasets to our Trips.csv

In [39]:
#24_sum without timedelta 
df_full = pd.merge_asof(dataset.sort_values('datetime_start'), 
                    df_24, left_on='booking_date_start', 
                    right_on='datetime_start', 
                    #tolerance=pd.Timedelta('12 hours'),
                    allow_exact_matches=True, direction='nearest')
#                    allow_exact_matches=True, direction='forward')
df_full.rename(columns={'count_x':'counter',
                          'count_y':'24_sum',}, 
                 inplace=True)
df_full.head(5)

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time,counter,24_sum
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,2019-01-20 00:00:00,13.75038,False,1,0,BIKE 93771,15,51.071262,False,False,0,12,0.0,12099518,0,2019-01-20 00:28:00,13.693052,1,0,BIKE 93771,51.046234,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,1,265
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.03821,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0,0.0,1,265
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.69113,True,1,0,Malterstraße (Haltestelle),0,51.04257,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.74773,3,0,Wundtstr. / Zellescher Weg,51.02837,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0,0.0,1,265
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.68962,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.68962,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0,1,265
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.07174,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.07174,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,1,265


In [40]:
#6_sum with td of 3 hours
df_full = pd.merge_asof(df_full.sort_values('datetime_start'), 
                    df_6, left_on='datetime_start', 
                    right_on='datetime_start', 
                    tolerance=pd.Timedelta('3 hours'),
                    allow_exact_matches=True, direction='nearest')
#                    allow_exact_matches=True, direction='forward')
df_full.rename(columns={'count':'6_sum',}, 
                 inplace=True)
df_full.head(5)

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time,counter,24_sum,6_sum
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,2019-01-20 00:00:00,13.75038,False,1,0,BIKE 93771,15,51.071262,False,False,0,12,0.0,12099518,0,2019-01-20 00:28:00,13.693052,1,0,BIKE 93771,51.046234,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,1,265,36
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.03821,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0,0.0,1,265,33
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.69113,True,1,0,Malterstraße (Haltestelle),0,51.04257,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.74773,3,0,Wundtstr. / Zellescher Weg,51.02837,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0,0.0,1,265,33
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.68962,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.68962,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0,1,265,33
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.07174,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.07174,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,1,265,33


In [41]:
#2_sum with td of 1 hours
df_full = pd.merge_asof(df_full.sort_values('datetime_start'), 
                    df_2, left_on='datetime_start', 
                    right_on='datetime_start', 
                    tolerance=pd.Timedelta('1 hours'),
                    allow_exact_matches=True, direction='nearest')
#                    allow_exact_matches=True, direction='forward')
df_full.rename(columns={
                          'count':'2_sum',}, 
                 inplace=True)
df_full.head(5)

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time,counter,24_sum,6_sum,2_sum
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,2019-01-20 00:00:00,13.75038,False,1,0,BIKE 93771,15,51.071262,False,False,0,12,0.0,12099518,0,2019-01-20 00:28:00,13.693052,1,0,BIKE 93771,51.046234,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,1,265,36,19
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.03821,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0,0.0,1,265,33,17
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.69113,True,1,0,Malterstraße (Haltestelle),0,51.04257,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.74773,3,0,Wundtstr. / Zellescher Weg,51.02837,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0,0.0,1,265,33,17
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.68962,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.68962,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0,1,265,33,17
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.07174,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.07174,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,1,265,33,17


In [42]:
#1_sum with td of 0.5 hours
df_full = pd.merge_asof(df_full.sort_values('datetime_start'), 
                    df_1, left_on='datetime_start', 
                    right_on='datetime_start', 
                    tolerance=pd.Timedelta('30 minutes'),
                    allow_exact_matches=True, direction='nearest')
#                    allow_exact_matches=True, direction='forward')
df_full.rename(columns={
                          'count':'1_sum',}, 
                 inplace=True)
df_full.head(5)

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time,counter,24_sum,6_sum,2_sum,1_sum
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,2019-01-20 00:00:00,13.75038,False,1,0,BIKE 93771,15,51.071262,False,False,0,12,0.0,12099518,0,2019-01-20 00:28:00,13.693052,1,0,BIKE 93771,51.046234,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,1,265,36,19,8
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.03821,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0,0.0,1,265,33,17,17
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.69113,True,1,0,Malterstraße (Haltestelle),0,51.04257,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.74773,3,0,Wundtstr. / Zellescher Weg,51.02837,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0,0.0,1,265,33,17,17
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.68962,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.68962,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0,1,265,33,17,17
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.07174,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.07174,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,1,265,33,17,17


In [43]:
# Drop unnecessary columns for temporal calculation
df_full.drop("counter", inplace=True, axis=1)
df_full.head(5)

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time,24_sum,6_sum,2_sum,1_sum
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,2019-01-20 00:00:00,13.75038,False,1,0,BIKE 93771,15,51.071262,False,False,0,12,0.0,12099518,0,2019-01-20 00:28:00,13.693052,1,0,BIKE 93771,51.046234,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,265,36,19,8
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.03821,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0,0.0,265,33,17,17
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.69113,True,1,0,Malterstraße (Haltestelle),0,51.04257,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.74773,3,0,Wundtstr. / Zellescher Weg,51.02837,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0,0.0,265,33,17,17
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.68962,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.68962,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0,265,33,17,17
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.07174,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.07174,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,265,33,17,17


### Dive into NaNs / no NaNs

In [44]:
#this did not work as expected
#logically, 24_sum should never be nan, while 1_sum could be nan quite often.
df_full.isna().sum()
# 24_sum                   699
# 6_sum                     75
# 2_sum                     23
# 1_sum                      3
#df_full


p_spot_start              0
b_lock_types_start        0
p_maintenance_start       0
p_bike_racks_start        0
p_place_type_start        0
p_number_start           62
p_uid_start               0
b_number_start            0
p_free_racks_start        0
b_boardcomputer_start     0
datetime_start            0
p_lng_start               0
b_electric_lock_start     0
p_bikes_start             0
p_booked_bikes_start      0
p_name_start              0
b_bike_type_start         0
p_lat_start               0
p_spot_end                0
p_maintenance_end         0
p_bike_racks_end          0
p_place_type_end          0
p_number_end             78
p_uid_end                 0
p_free_racks_end          0
datetime_end              0
p_lng_end                 0
p_bikes_end               0
p_booked_bikes_end        0
p_name_end                0
p_lat_end                 0
air_deg                   0
air_hum                   0
rain_mm                   0
rain_yn                   0
sun_hour            

In [45]:
#the merge did not work for 20th of January, 2020! 
#this might be, because no matching datetime_start was found
df = df_full

df_show = df[df['24_sum'].isnull()]
df_show = df_show[['datetime_start','24_sum','6_sum','2_sum','1_sum']]
print(df_show.to_string())

#Whats the reason? maybe merge direction 'nearest' is bad for this task? --> NOPE
#https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.merge_asof.html

# A “backward” search selects the last row in the right DataFrame --> 300.000 NaNs
# whose ‘on’ key is less than or equal to the left’s key.

# A “forward” search selects the first row in the right DataFrame --> 200.000 NaNs
# whose ‘on’ key is greater than or equal to the left’s key.

# A “nearest” search selects the row in the right DataFrame --> 700 NaNs
# whose ‘on’ key is closest in absolute distance to the left’s key

#Maybe resampling method is not entirely correct?
#https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.resample.html

Empty DataFrame
Columns: [datetime_start, 24_sum, 6_sum, 2_sum, 1_sum]
Index: []


### Check if timedeltas caused any issues / no issues

In [46]:
df_full[df_full['booking_date_start']=='2020-01-21']
# keine verschiebung der booking sums

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time,24_sum,6_sum,2_sum,1_sum


In [47]:
df_full[df_full['booking_date_start']=='2019-01-19']
#keine verschiebung der booking sums

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time,24_sum,6_sum,2_sum,1_sum


In [48]:
df_full.head(10)

Unnamed: 0,p_spot_start,b_lock_types_start,p_maintenance_start,p_bike_racks_start,p_place_type_start,p_number_start,p_uid_start,b_number_start,p_free_racks_start,b_boardcomputer_start,datetime_start,p_lng_start,b_electric_lock_start,p_bikes_start,p_booked_bikes_start,p_name_start,b_bike_type_start,p_lat_start,p_spot_end,p_maintenance_end,p_bike_racks_end,p_place_type_end,p_number_end,p_uid_end,p_free_racks_end,datetime_end,p_lng_end,p_bikes_end,p_booked_bikes_end,p_name_end,p_lat_end,air_deg,air_hum,rain_mm,rain_yn,sun_hour,wind_ms,month_start,month_end,day_start,day_end,day_of_week_start,day_of_week_end,hour_start,hour_end,day_of_year_start,day_of_year_end,season,weekend,booking_date_start,trip_duration,idle_time,24_sum,6_sum,2_sum,1_sum
0,False,analog_code_lock,False,0,12,0.0,12095573,93771,0,22532,2019-01-20 00:00:00,13.75038,False,1,0,BIKE 93771,15,51.071262,False,False,0,12,0.0,12099518,0,2019-01-20 00:28:00,13.693052,1,0,BIKE 93771,51.046234,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,265,36,19,8
1,True,analog_code_lock,False,0,0,4486.0,10299640,93576,0,0,2019-01-20 00:05:00,13.744712,False,5,0,Wohnheim Gret-Palucca.Straße / Lenneplatz,15,51.03821,False,False,0,12,0.0,12099344,0,2019-01-20 00:15:00,13.744122,1,0,BIKE 93576,51.049069,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,10.0,0.0,265,33,17,17
2,True,frame_lock,False,0,0,4483.0,10299584,93440,0,7551004130,2019-01-20 00:07:00,13.69113,True,1,0,Malterstraße (Haltestelle),0,51.04257,True,False,0,0,4458.0,4405670,0,2019-01-20 00:33:00,13.74773,3,0,Wundtstr. / Zellescher Weg,51.02837,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,26.0,0.0,265,33,17,17
3,False,analog_code_lock,False,0,12,0.0,12098234,93322,0,1265,2019-01-20 00:07:00,13.68962,False,1,0,BIKE 93322,0,51.041798,False,False,0,12,0.0,12098234,0,2019-01-20 00:09:00,13.68962,1,0,BIKE 93322,51.041798,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0,265,33,17,17
4,True,analog_code_lock,False,0,0,4373.0,264575,93585,0,0,2019-01-20 00:07:00,13.741257,False,5,0,Friedensstraße/Conradstr.,15,51.07174,True,False,0,0,4373.0,264575,0,2019-01-20 00:35:00,13.741257,5,0,Friedensstraße/Conradstr.,51.07174,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,28.0,0.0,265,33,17,17
5,True,analog_code_lock,False,0,0,4310.0,45444,93660,0,0,2019-01-20 00:21:00,13.789338,False,1,0,Panometer,15,51.025741,True,False,0,0,4336.0,264532,0,2019-01-20 00:33:00,13.807733,1,0,Altenberger Platz,51.034938,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,12.0,0.0,265,33,17,17
6,False,analog_code_lock,False,0,12,0.0,12098942,93478,0,1206,2019-01-20 00:26:00,13.710755,False,1,0,BIKE 93478,0,51.028163,True,False,0,0,4491.0,10299678,0,2019-01-20 00:28:00,13.742453,1,0,Räcknitzhöhe,51.023416,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0,265,33,17,17
7,True,analog_code_lock,False,0,0,4392.0,264599,93577,0,0,2019-01-20 00:29:00,13.777725,False,1,0,Mosenstraße,15,51.04474,True,False,0,0,4392.0,264599,0,2019-01-20 00:31:00,13.777725,1,0,Mosenstraße,51.04474,-3.3,75.3,0.0,0.0,0.0,4.3,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,2.0,0.0,265,33,17,17
8,True,analog_code_lock,False,0,0,4403.0,38955,93258,0,1322,2019-01-20 00:31:00,13.739294,False,2,0,Palaisplatz,0,51.060231,True,False,0,0,4327.0,121771,0,2019-01-20 01:02:00,13.750328,4,0,Schauburg,51.071144,-3.4,75.0,0.0,0.0,0.0,4.4,1,1,20,20,6,6,0,1,20,20,1,True,2019-01-20,31.0,0.0,265,33,17,17
9,True,analog_code_lock,False,0,0,4348.0,264548,93785,0,22556,2019-01-20 00:33:00,13.722911,False,2,0,Nürnberger Ei,15,51.033767,True,False,0,0,4458.0,4405670,0,2019-01-20 00:49:00,13.74773,5,0,Wundtstr. / Zellescher Weg,51.02837,-3.4,75.0,0.0,0.0,0.0,4.4,1,1,20,20,6,6,0,0,20,20,1,True,2019-01-20,16.0,0.0,265,33,17,17


## Save df as CSV file

In [49]:
df_full.to_csv(os.path.join(path_output, "Trips.csv"))