# Create new Tripmap

In [22]:
def import_data(source):
    return pd.read_csv(source)


def dic_out_of_df(df01):
    import pandas as pd
    from tqdm import tqdm
    # Dict out of DataFrames of Trips per Bike

    # init DFs for every bike
    myDFs = {}

    for ind in tqdm(df01.index):
        if df01['bike_id'][ind] not in myDFs:
            myDFs[df01['bike_id'][ind]] = pd.DataFrame(
                columns=['bike_id',
                         'start_time',
                         'end_time',
                         'start_lat',
                         'start_lng',
                         'end_lat',
                         'end_lng',
                         'end_station_number'])
    return myDFs


def add_rentals_to_df(df01, myDFs):
    from tqdm import tqdm
    # Add all rentals to their bike_id DF
    for ind in tqdm(df01.index):
        if df01['bike_id'][ind] in myDFs:
            myDFs[df01['bike_id'][ind]].loc[df01.index[ind]] = df01.iloc[ind]
    return myDFs

In [23]:
def sort_dfs(myDFs):
    # sort every DF
    for df in myDFs.values():
        df[['bike_id', 'start_time','end_time']] = df[['bike_id', 'start_time','end_time']].astype(int)
        df[['start_lat','start_lng', 'end_lat', 'end_lng']] = df[['start_lat','start_lng', 'end_lat', 'end_lng']].astype(float)
        df.sort_values(by=['start_time'], inplace=True)
        df.reset_index(drop=True, inplace=True)
    return myDFs

# MAIN

In [24]:
from keplergl import KeplerGl
import pandas as pd
pd.options.mode.chained_assignment = None
import time

ts1 = time.time()

source = '../../data/raw_lendings/raw_lendings_2021.csv'

df = import_data(source)
#df = df.truncate(after=10000)

In [25]:
myDfs = dic_out_of_df(df)
myDfs = add_rentals_to_df(df, myDfs)
myDfs = sort_dfs(myDfs)

100%|██████████| 574838/574838 [00:04<00:00, 134290.53it/s]
100%|██████████| 574838/574838 [13:12<00:00, 725.67it/s]


### Get bike with most rentals

In [40]:
bike_id = 0
a = 0

for bike in myDfs:
    if len(myDfs[bike]) > a:
        a = len(myDfs[bike])
        bike_id = bike

print("bike id: %i" % bike_id)
print("rentals: %i" % a)

bike id: 74456
rentals: 1007


In [None]:
# SAVE
myDfs[bike_id].to_csv('../../results/tripmaps/trips.csv')

### Compute Idle Points from Trips

In [35]:
from tqdm import tqdm
from numpy import cos, sqrt


def get_distance(A_lat, A_lng, B_lat, B_lng):
    x = B_lat - A_lat
    y = (B_lng - A_lng)*cos((B_lat + A_lat)*0.00872664626)
    return 111.138*sqrt(x*x+y*y)*1000

def create_df_points(myDFs,radius):
    # DF of all points with stoodtime
    in_val = 0
    val = 0

    df_points = pd.DataFrame(columns=['bike_id', 'lat', 'lng', 'idle_time', 'time_start', 'time_end', 'end_station_number'])
    #df_points[['bike_id', 'idle_time', 'time_start', 'time_end']] = df_points[['bike_id', 'idle_time', 'time_start', 'time_end']].astype(int)
    #df_points[['lng', 'lat']] = df_points[['lng', 'lat']].astype(float)

    for df in myDFs.values():
        for ind in df.index[1:]:
            #in SECOUNDS
            _idle_time = (df['start_time'][ind] - df['end_time'][ind - 1]).astype(int)
            distance = get_distance(df['end_lat'][ind - 1], df['end_lng'][ind - 1], df['start_lat'][ind], df['start_lng'][ind])
            if distance > radius:
                in_val +=1
                continue
            val +=1
            dict = {'bike_id': df['bike_id'][ind], 'lng': df['start_lng'][ind], 'lat': df['start_lat'][ind],
                    'idle_time': _idle_time, 'time_start': df['end_time'][ind - 1], 'time_end': df['start_time'][ind], 'end_station_number': df['end_station_number'][ind-1]}
            df_points = df_points.append(dict, ignore_index=True)
    print(f"invalides: {in_val} from {val} that is {in_val/val * 100} %")
    return df_points

In [36]:
dic = {bike_id : myDfs[bike_id]}

In [37]:
df_points = create_df_points(dic, radius=20)

100%|██████████| 1/1 [00:01<00:00,  1.55s/it]

invalides: 0 from 1006 that is 0.0 %





In [38]:
# SAVE
df_points.to_csv('../../results/tripmaps/points_valid.csv')