In [1]:
import time
import numpy as np
import pandas as pd


CITY_DATA = {'chicago': 'chicago.csv',
            'new york': 'new_york_city.csv',
             'washington': 'washington.csv'}


def get_filters():
    """
    Asks user to speify a city, month, and/or day to analyse.
    
    Returns:
       (str)city - name of the city to analyse
       (str)month - name of the month to filter by, or "all" to apply no month filter
       (str)day - name of the day of week to filter by, or all to apply no day filter
    """
    
    print('Hello! Let\'s explore some US bikeshare data')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input('Would you like to see data for Chicago, New York, or Washington? ').lower()
    while city not in (CITY_DATA.keys()):
        print ('You provided an invalid city name')
        city = input('Would you like to see data for Chicago, New York, or Washington? ').lower()
        
    # get user input for month, day or both.
    pick = input('Would you like to filter the data by month, day, both or none? ').lower()
    while pick not in ['month', 'day', 'both', 'none']:
        print('Invalid Response')  
        pick =  input('Would you like to filter the data by month, day, both or none? ').lower()
     
    
    # get user input for month (all, january, february, ... , june) 
    months = ['january', 'february', 'march', 'april', 'may', 'june']
    if pick == 'month' or pick == 'both':
        month = input('Which month - January, February, March, April, May or June').lower()  
        while month not in months:
            print('Invalid Response')
            month = input('Which month - January, February, March, April, May or June').lower()
    else:
        month = 'all'
        
        
    # get user input for day of week (all, monday, tuesday, ... sunday)
    days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
    if pick =='day' or pick == 'both':
        day = input('Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday').lower()
        while day not in days:
            print('Invalid Response')
            day = input('Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday ').lower()
            
    else:
        day = 'all'
        
    print('-'*40) 
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    # load data file into a dataframe

    df = pd.read_csv(CITY_DATA[city])
    
    # convert the start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
    #extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    
    #filter by month if applicable
    if month !='all':
        #use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month)+1
        
        #filter by month to create the new dataframe
        df = df[df['month'] == month]
        
    #filter by day of week if applicable
    if day !='all':
        #filter by day of week to create new dataframe
        df = df[df['day_of_week'] == day.title()]
        
    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    months = ['january', 'february', 'march', 'april', 'may', 'june']
    month = df['month'].mode()[0]
    print('The most common month is: {}'.format(months[month-1]))

    # display the most common day of week
    day = df['day_of_week'].mode()[0]
    print('The most common day of week is: {}'.format(day))


    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    popular_hour = df['hour'].mode()[0]      
    print('The most common start hour is: {}'.format(popular_hour))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

def station_stats(df):
    """Displays statistics on the most popular stations and trip."""
    
    print('\nCalculating the most popular stations and trip...\n')
    start_time = time.time()
    
    # display most commonly used start station
    popular_start_station = df['Start Station'].mode()[0]
    print('The most popular start station is: {}'.format(popular_start_station))

    # display most commonly used end station
    popular_end_station = df['End Station'].mode()[0]
    print('The most popular end station is: {}'.format(popular_end_station))


    # display most frequent combination of start station and end station trip
    popular_trip = df['Start Station'] + 'to' + df['End Station']
    print('The most popular trip is: from {}'.format(popular_trip.mode()[0]))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

def trip_duration_stats(df):
    from datetime import timedelta as td
    """Displays statistics on the total and average trip duration. """
    
    print('\nCalculating trip duration...\n')
    start_time = time.time()
    
    # display total travel time
    total_travel_duration = (pd.to_datetime(df['End Time']) - pd.to_datetime(df['Start Time'])).sum()
    days = total_travel_duration.days
    hours = total_travel_duration.seconds // (60*60)
    minutes = total_travel_duration.seconds % (60*60) // 60
    seconds = total_travel_duration.seconds % (60*60) % 60
    print('Total travel time is: {}days, {}hours, {}minutes, {}seconds'.format(days,hours,minutes,seconds))
    
    # display mean travel time
    average_travel_duration = (pd.to_datetime(df['End Time']) - pd.to_datetime(df['Start Time'])).mean()
    days = average_travel_duration.days
    hours = average_travel_duration.seconds // (60*60)
    minutes = average_travel_duration.seconds % (60*60) // 60
    seconds = average_travel_duration.seconds % (60*60) % 60
    print('Average travel time is: {}days, {}hours, {}minutes, {}seconds'.format(days,hours,minutes,seconds))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    
def user_stats(df):
    """Displays statistics on bikeshare users."""
    
    print('\nCalculating Users Stats...\n')
    start_time = time.time()

    # Display counts of user types
    print(df['User Type'].value_counts())

    # Display counts of gender
    if 'Gender' in(df.columns):
        print(df['Gender'].value_counts())
    else:
        print('Sorry, this dataset has no gender data')

    # Display earliest, most recent, and most common year of birth
    if 'Birth Year' in(df.columns):
        year = df['Birth Year'].fillna(0).astype('int64')
        print('Earliest birth year is: {}\nmost recent is: {}\nand most common birth year is: {}'.format(year.min(),year.max(),year.mode()[0]))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        
        view_data = input('\nWould you like to view 5 rows of individual trip data? Enter yes or no\n')
        start_loc = 0
        while (start_loc<30):
            print(df.iloc[start_loc:start_loc+5])
            start_loc += 5
            view_data = input("Do you wish to continue?: ").lower()
            if view_data != 'yes':
                break
             
                
        
        
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()
    

Hello! Let's explore some US bikeshare data
Would you like to see data for Chicago, New York, or Washington? chicago
Would you like to filter the data by month, day, both or none? month
Which month - January, February, March, April, May or Junemarch
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month is: march
The most common day of week is: Friday
The most common start hour is: 17

This took 0.039655208587646484 seconds.
----------------------------------------

Calculating the most popular stations and trip...

The most popular start station is: Clinton St & Washington Blvd
The most popular end station is: Clinton St & Washington Blvd
The most popular trip is: from Calumet Ave & 33rd SttoState St & 33rd St

This took 0.11252379417419434 seconds.
----------------------------------------

Calculating trip duration...

Total travel time is: 242days, 19hours, 40minutes, 20seconds
Average travel time is: 0days, 0hours, 11minute