In [323]:
import time
import numpy as np
import pandas as pd

In [324]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [325]:
Cities = ['chicago' , 'new york city', 'washington', 'all']
Months = ['january' , 'february', 'march' , 'april' , 'may' , 'june', 'all' ]
Days = ['saturday', 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'all']

In [326]:
def check_user_input(user_input,input_type):
    """
    Check for valid data entered by the user
    Args:
    user_input: Is the Input data from the user.
    input_type: IS the type of the input (city=0, month=1, day=2)
    """
    while True:
        valid_input = input(user_input).lower()
        try:
            if valid_input in Cities and input_type == 1:
                break
            elif valid_input in Months and input_type == 2:
                break
            elif valid_input in Days and input_type == 3:
                break
            else:
                if input_type == 1:
                    print("Your input is not a valid city")
                if input_type == 2:
                    print("Your input is not a valid month")
                if input_type == 3:
                    print("Your input is not a valid day")
        except ValueError:
            print("Invalid input!")
    return valid_input

In [327]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    city = check_user_input("Would you like to see the data for Chicago, New York City or Washington?: ", 1).lower()
    month = check_user_input("Which month? January, February, March, April, May, June, or all: ", 2).lower()
    day = check_user_input("Which day? Saturday, Sunday, Monday, Tuesday, Wednesday, Thursday, or all: ", 3).lower()

    print('-'*40)
    return city, month, day

In [328]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
    #extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.weekday

    # filter by month if applicable
    if month != 'all' and month not in Months:
        month = Months.index(month) + 1
        df = df[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all' and day not in Days:
        df = df[df['day_of_week'] == day.title()]


    return df

In [329]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    #display the most common month
    popular_month = df['month'].mode()[0]
    print('Most Popular Month:', popular_month)

    #display the most common day of week
    popular_day_of_week = df['day_of_week'].mode()[0]
    print('Most Day Of Week:', popular_day_of_week)


    # extract hour from the Start Time column to create an hour column
    df['hour'] = df['Start Time'].dt.hour
    # find the most popular hour
    popular_hour = df['hour'].mode()[0]
    print('Most Popular Start Hour:', popular_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [330]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    popular_start_station = df['Start Station'].mode()[0]
    print('Most Start Station:', popular_start_station)

    # display most commonly used end station
    popular_end_station = df['End Station'].mode()[0]
    print('Most End Station:', popular_end_station)


    # display most frequent combination of start station and end station trip
    group_field=df.groupby(['Start Station','End Station'])
    popular_combination_station = group_field.size().sort_values(ascending=False).head(1)
    print('Most frequent combination of Start Station and End Station trip:\n', popular_combination_station)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [331]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print('Total Travel Time:', total_travel_time)

    # display mean travel time
    mean_travel_time = df['Trip Duration'].mean()
    print('Mean Travel Time:', mean_travel_time)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [332]:
def user_stats(df,city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    print('User Type Statistics:')
    print(df['User Type'].value_counts())
    if city != 'washington':
        # Display counts of gender
        print('Gender Statistics:')
        print(df['Gender'].value_counts())
        # Display earliest, most recent, and most common year of birth
        print('Birth Year Stats:')
        earliest_year = df['Birth Year'].min()
        print('Earliest Year:',earliest_year)
        most_recent_year = df['Birth Year'].max()
        print('Most Recent Year:',most_recent_year)
        most_common_year = df['Birth Year'].mode()[0]
        print('Most Common Year:',most_common_year)
        

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [333]:
def display_raw_data(df):
    """Displays 5 rows of data from the csv files according to the user input.
    Args:
        df: DataFrame that has a city name from the user input and filtered by day and month.
    Returns:
        None.
    """
    i = 0
    user_request = input('Do you want to display five rows of raw data? yes or no? : ').lower()
    print(df.head())
    while True:
        if user_request == 'no':
            break
        else:
            print(df[i:i+5])
            user_request = input('Do you want to display five rows of raw data? yes or no? : ').lower()
            i +=5

In [334]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df,city)
        display_raw_data(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!
Would you like to see the data for Chicago, New York City or Washington?: chicago
Which month? January, February, March, April, May, June, or all: may
Which day? Saturday, Sunday, Monday, Tuesday, Wednesday, Thursday, or all: monday
----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Popular Month: 6
Most Day Of Week: 1
Most Popular Start Hour: 17

This took 0.02593088150024414 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most Start Station: Streeter Dr & Grand Ave
Most End Station: Streeter Dr & Grand Ave
Most frequent combination of Start Station and End Station trip:
 Start Station              End Station            
Lake Shore Dr & Monroe St  Streeter Dr & Grand Ave    854
dtype: int64

This took 0.10057663917541504 seconds.
----------------------------------------

Calculating Trip Duration...

Total Travel Time: 280871787
Mean Travel 