# Analiza rowerowa

### Import modółów

In [25]:
import pandas as pd
import numpy as np

### Definicja funkcji

In [64]:
def count_bikes(bikes):
    """
    Counts number of bikes at the station in the preprocessed dataframe.
    Is used for creating the column [no_bikes]
    """
    if bikes == '0':
        return 0
    else:
        return bikes.count(',')+1

def string_to_intlist(s):
    """
    Is used to process the row of the column [bikes] in such a way that
    it changes it from the string to the list of ints
    """
    if s==0:
        return s
    s=s.split(',')
    s=list(map(int,s))
    return s
    
def bikes_to_list(df):
    """
    Changes the column [bikes] into the same column but with the lists of ints instead of strings
    """
    df['bikes'] = df.apply(lambda x: string_to_intlist(x['bikes']), axis = 1)
    return df

def find_station(station_Id, df):
    """
    Returns the DataFrame with the filtered rows: they have to containt the information about 
    the specific station: station_Id
    """
    return df[df['station_ID'] == station_Id]

def find_bike(bike_Id, df):
    """
    Returns the DataFrame with the rows that contain the specific bike: bike_Id
    """
    df = df.loc[df['bikes'].apply(lambda x: bike_Id in x)]
    return df.drop(columns=['bikes', 'no_bikes', 'no_racks'])

def bike_movement(df):
    """
    As an input we take the DataFrame which was the result of the find_bike!
    We return the DataFrame with all the changes of stations of the particular bike.
    """
    df = df.reset_index(drop=True)
    if len(df) <=2:
        print('the data frame is too short!')
        return 0
    temp_df = df[:2]
    station = df['station_ID'][0]
    temp_df = temp_df.append(df.iloc[0])
    
    for i in range(1, len(df)):
        if df['station_ID'][i] != station:
            temp_df = temp_df.append(df.iloc[i-1])
            temp_df = temp_df.append(df.iloc[i])
            station = df['station_ID'][i]
    temp_df = temp_df.drop(index=[0])
    temp_df = temp_df.reset_index(drop=True)
    return temp_df

def mean_occupation(station_id, df):
    """
    The function takes as an argument a DataFrame and the station_id, then returns an array of the form [[hour],[mean_no_of_bikes]]
    """
    temp_df = find_station(station_id, df)
    temp_df = temp_df.groupby('hour')['no_bikes'].mean()
    a = temp_df.values
    h = np.array([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23])
    return np.array([h,a])

### Wczytanie i preprocessing danych

In [3]:
df = pd.read_csv('veturilo_statistics.csv', names = ['date','station_ID', 'no_racks','bikes'])

#df = df[:200000]

In [4]:
df['datetime']=pd.to_datetime(df.date, infer_datetime_format = True)
df['day']=df['datetime'].apply(lambda x: x.day)
df['month']=df['datetime'].apply(lambda x: x.month)
df['year']=df['datetime'].apply(lambda x: x.year)
df['hour']=df['datetime'].apply(lambda x: x.hour)
df['minute']=df['datetime'].apply(lambda x: x.minute)
df=df.drop(columns=['date','datetime'])

In [5]:
df['bikes']=df['bikes'].fillna('0')
df['no_bikes'] = df.apply(lambda x: count_bikes(x.bikes),axis=1)
df = bikes_to_list(df)

### Właściwa analiza

In [6]:
bike_24720 = find_bike(24720,df)

In [12]:
bike_movement(bike_24720).head()

Unnamed: 0,station_ID,day,month,year,hour,minute
0,9402,4,4,2019,20,9
1,9402,5,4,2019,13,4
2,9404,5,4,2019,13,14
3,9404,6,4,2019,19,33
4,9402,6,4,2019,19,43


In [65]:
a=find_station(9534, df)
a = mean_occupation(9534, df)