In [8]:
import pandas as pd
import time
import numpy as np

## Bike Share Data

In [2]:
CITY_DATA = {'chicago': 'chicago.csv',
             'new york city': 'new_york_city.csv',
             'washington': 'washington.csv'}

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    cities = ('chicago', 'new york city', 'washington')
    months = ('all', 'january', 'february', 'march', 'april', 'may', 'june')
    days = ('all', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input("Enter the name of the city to analyze (chicago, new york city, washington) : ").lower()
    while city not in cities:
        print("Enter a Valid Name!!")
        city = input("Enter the name of the city to analyze (chicago, new york city, washington) : ").lower()

    # TO DO: get user input for month (all, january, february, ... , june)
    month = input(
        "Enter the name of the month to filter by, or all to apply no month filter (all, january, february, ... , june) : ").lower()
    while month not in months:
        print("Enter a Valid Name!!")
        month = input(
            "Enter the name of the month to filter by, or all to apply no month filter (all, january, february, ... , june) : ").lower()

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    day = input(
        "Enter the name of the day of week to filter by, or all to apply no day filter (all, monday, tuesday, ... sunday) : ").lower()
    while day not in days:
        print("Enter a Valid Name!!")
        day = input(
            "Enter the name of the day of week to filter by, or all to apply no day filter (all, monday, tuesday, ... sunday) : ").lower()

    print('-' * 40)
    return city, month, day

city,month,day = get_filters()

Hello! Let's explore some US bikeshare data!
Enter the name of the city to analyze (chicago, new york city, washington) : new york city
Enter the name of the month to filter by, or all to apply no month filter (all, january, february, ... , june) : june
Enter the name of the day of week to filter by, or all to apply no day filter (all, monday, tuesday, ... sunday) : monday
----------------------------------------


In [3]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])

    df['Start Time'] = pd.to_datetime(df['Start Time'])

    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()

    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]

    if day != 'all':
        df = df[df['day_of_week'] == day.title()]

    return df

In [5]:
df = load_data(city, month, day)
df.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week
49,5334757,2017-06-05 12:23:13,2017-06-05 12:29:22,368,E 53 St & Madison Ave,E 58 St & 1 Ave,Subscriber,Male,1990.0,6,Monday
129,5351922,2017-06-05 17:36:56,2017-06-05 18:06:11,1754,South St & Gouverneur Ln,Richards St & Delavan St,Subscriber,Male,1987.0,6,Monday
156,5321258,2017-06-05 08:08:51,2017-06-05 08:39:03,1812,Murray St & West St,W 42 St & 8 Ave,Subscriber,Male,1969.0,6,Monday
247,5768649,2017-06-12 21:10:48,2017-06-12 21:40:25,1777,West St & Chambers St,11 Ave & W 59 St,Subscriber,Female,1958.0,6,Monday
263,5320479,2017-06-05 07:58:07,2017-06-05 09:21:32,5004,Brooklyn Bridge Park - Pier 2,Cadman Plaza E & Tillary St,Customer,,,6,Monday


In [10]:
def time_stats(df):
    

    print('\nCalculating The Most Frequent Times of Travel')
    start_time = time.time()

    # TO DO: display the most common month
    print("The most common month : ", df['month'].mode()[0])

    # TO DO: display the most common day of week
    print("The most common day of week : ", df['day_of_week'].mode()[0])

    # TO DO: display the most common start hour
    print("The most common start hour : ", df['Start Time'].dt.hour.mode()[0])

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)

In [11]:
time_stats(df)


Calculating The Most Frequent Times of Travel
The most common month :  6
The most common day of week :  Monday
The most common start hour :  8

This took 0.004004478454589844 seconds.
----------------------------------------


In [12]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    print("The most commonly used start station : ", df['Start Station'].mode()[0])

    # TO DO: display most commonly used end station
    print("The most commonly used end station : ", df['End Station'].mode()[0])

    # TO DO: display most frequent combination of start station and end station trip
    print("The most most frequent combination of start station and end station trip : \nStart Station : ", df[["Start Station","End Station"]].mode()['Start Station'][0],"\nEnd Station : ",df[["Start Station","End Station"]].mode()['End Station'][0])

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)

In [13]:
station_stats(df)


Calculating The Most Popular Stations and Trip...

The most commonly used start station :  Pershing Square North
The most commonly used end station :  Pershing Square North
The most most frequent combination of start station and end station trip : 
Start Station :  Pershing Square North 
End Station :  Pershing Square North

This took 0.014998912811279297 seconds.
----------------------------------------


In [18]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    print("The total travel time : ", df["Trip Duration"].sum())
    df['Trip Duration'].sum()
    # TO DO: display mean travel time
    print("The mean travel time : ", df["Trip Duration"].mean())
    df['Trip Duration'].mean()

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)

In [19]:
trip_duration_stats(df)


Calculating Trip Duration...

The total travel time :  8134869
The mean travel time :  856.3921465417412

This took 0.0029997825622558594 seconds.
----------------------------------------


In [22]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    print("The counts of user types :")
    userTypes = df['User Type'].value_counts()
    for userType in userTypes.index:
        print(userType, ":", userTypes[userType])

    # TO DO: Display counts of gender
    try:
        print("The counts of gender :")
        genderTypes = df['Gender'].value_counts()
        for genderType in genderTypes.index:
            print(genderType, ":", genderTypes[genderType])
    except:
        print("No Gender Data!!!")

    # TO DO: Display earliest, most recent, and most common year of birth
    try:
        print("The earliest year of birth :",df["Birth Year"].min())
        print("The most recent year of birth :", df["Birth Year"].max())
        print("The most common year of birth :", df["Birth Year"].mode()[0])
    except:
        print("No Birth Year Data!!!")

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)

In [23]:
user_stats(df)


Calculating User Stats...

The counts of user types :
Subscriber : 8617
Customer : 882
The counts of gender :
Male : 6467
Female : 2198
The earliest year of birth : 1893.0
The most recent year of birth : 2001.0
The most common year of birth : 1986.0

This took 0.008001327514648438 seconds.
----------------------------------------


In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9499 entries, 49 to 299962
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Unnamed: 0     9499 non-null   int64         
 1   Start Time     9499 non-null   datetime64[ns]
 2   End Time       9499 non-null   object        
 3   Trip Duration  9499 non-null   int64         
 4   Start Station  9499 non-null   object        
 5   End Station    9499 non-null   object        
 6   User Type      9499 non-null   object        
 7   Gender         8665 non-null   object        
 8   Birth Year     8693 non-null   float64       
 9   month          9499 non-null   int64         
 10  day_of_week    9499 non-null   object        
dtypes: datetime64[ns](1), float64(1), int64(3), object(6)
memory usage: 890.5+ KB


In [27]:
df.Gender.value_counts()

Male      6467
Female    2198
Name: Gender, dtype: int64