In [1]:
import time
import pandas as pd
import numpy as np

pd.set_option ("display.max_columns", 200)

CITY_DATA = { 'Chicago': 'chicago.csv',
              'New York City': 'new_york_city.csv',
              'Washington': 'washington.csv' }


In [2]:
cities = ["Chicago", "New York City", "Washington"]
months = ["All", "January", "February", "March", "April", "May", "June"]
days = ["All", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

def get_filters():
    print('Hello! Let\'s explore some US bikeshare data!')
    city = str(input("Enter city you want to analyze (Chicago, New York City or Washington): ").lower().title())
    while city not in cities:
        print("Please enter the city as either Chicago, New York City or Washington")
        city = str(input("Enter city you want to analyze (Chicago, New York City or Washington): ").lower().title())
    month = str(input("Enter month (up to June) you want to analyze or type All for all months: ").lower().title())
    while month not in months:
        print("Please enter the month (up to June) as a calendar month or type All e.g. January, February")
        month = str(input("Enter month (up to June) you want to analyze or type All for all months: ").lower().title())
    day = str(input("Enter day you want to analyze or type All for all days: ").lower().title())
    while day not in days:
        print("Please enter the day as a weekday or weekend or type All e.g. Monday, Tuesday")  
        day = str(input("Enter day you want to analyze or type All for all days: ").lower().title())
    return city, month, day

city, month, day = get_filters()


Hello! Let's explore some US bikeshare data!
Enter city you want to analyze (Chicago, New York City or Washington): Washington
Enter month (up to June) you want to analyze or type All for all months: May
Enter day you want to analyze or type All for all days: Friday


In [3]:
def load_data (city, month, day):
    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df.insert(1, 'Month', df['Start Time'].dt.strftime("%B"))
    df.insert(2, 'Day', df['Start Time'].dt.strftime("%A"))
    df.insert(3,'Hour', df['Start Time'].dt.strftime("%H"))
    df.insert(4,'Station Combination', df['Start Station'] + " - " + df['End Station'])
    if month != 'All':
        df= df[df['Month'] == month]
    if day != 'All':
        df = df[df['Day'] == day]
    print("Displaying data for the city of: {}".format(city))
    print("Displaying data for the following month/s: {}".format(month))
    print("Displaying data for the following day/s: {}".format(day))
    return df

df = load_data(city,month,day)


def raw_data (df):
    x = 1
    raw_data = input("Would you like to see 5 rows of raw data (yes or no)?".lower())
    while raw_data == 'yes':
        print(df.iloc[(x-1)*5:x*5])
        x += 1
        raw_data = input("Would you like to see 5 rows of raw data (yes or no)?".lower())
        
raw_data(df)
        

Displaying data for the city of: Washington
Displaying data for the following month/s: May
Displaying data for the following day/s: Friday
would you like to see 5 rows of raw data (yes or no)?yes
     Unnamed: 0 Month     Day Hour  \
43      1126235   May  Friday   02   
110     1207200   May  Friday   03   
157     1215865   May  Friday   09   
159     1210885   May  Friday   06   
173     1067125   May  Friday   08   

                                   Station Combination          Start Time  \
43                     22nd & P ST NW - 10th & K St NW 2017-05-12 02:15:53   
110   Columbus Circle / Union Station - 11th & H St NE 2017-05-19 03:53:18   
157           N Lynn St & Fairfax Dr - 23rd & E St NW  2017-05-19 09:50:38   
159  5th St & Massachusetts Ave NW - 10th St & Cons... 2017-05-19 06:42:57   
173       15th & P St NW - New Hampshire Ave & T St NW 2017-05-05 08:12:45   

                End Time  Trip Duration                    Start Station  \
43   2017-05-12 02:33:15      

In [4]:
def time_stats():
    """Displays statistics on the most frequent times of travel"""
    print('\nCalculating The Most Frequent Times of Travel for your city of choice. Note that these statistics are not filtered by month or weekday...\n')
    df = pd.read_csv(CITY_DATA[city])
    start_time = time.time()
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df.insert(1, 'Month', df['Start Time'].dt.strftime("%B"))
    df.insert(2, 'Day', df['Start Time'].dt.strftime("%A"))
    df.insert(3,'Hour', df['Start Time'].dt.strftime("%H"))
    most_common_month = df['Month'].value_counts().idxmax()
    most_common_day = df['Day'].value_counts().idxmax()
    most_common_hour = df['Hour'].value_counts().idxmax()
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    return 'The most popular month for travel is: {}'.format(most_common_month), 'The most popular day for travel is: {}'.format(most_common_day), 'The most popular start time hour for travel is: {}:00'.format(most_common_hour) 

    # TO DO: display the most common month


    # TO DO: display the most common day of week


    # TO DO: display the most common start hour


    

time_stats()




Calculating The Most Frequent Times of Travel for your city of choice. Note that these statistics are not filtered by month or weekday...


This took 9.27512001991272 seconds.
----------------------------------------


('The most popular month for travel is: June',
 'The most popular day for travel is: Wednesday',
 'The most popular start time hour for travel is: 08:00')

In [5]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip for your choice of city, month/s and day/s...\n')
    start_time = time.time()
    most_common_start_station = df['Start Station'].value_counts().idxmax()
    most_common_end_station = df['End Station'].value_counts().idxmax()
    most_common_station_combination = df['Station Combination'].value_counts().idxmax()

    # TO DO: display most commonly used start station


    # TO DO: display most commonly used end station


    # TO DO: display most frequent combination of start station and end station trip


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    return 'The most popular start station is: {}'.format(most_common_start_station), 'The most popular end station is: {}'.format(most_common_end_station), 'The most popular station combination for travel is: {}'.format(most_common_station_combination) 
    


station_stats(df)



Calculating The Most Popular Stations and Trip for your choice of city, month/s and day/s...


This took 0.006887912750244141 seconds.
----------------------------------------


('The most popular start station is: Lincoln Memorial',
 'The most popular end station is: Columbus Circle / Union Station',
 'The most popular station combination for travel is: Lincoln Memorial - Smithsonian-National Mall / Jefferson Dr & 12th St SW')

In [6]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration for your choice of city, month/s and day/s...\n')
    start_time = time.time()
    total_travel_time_hours = int((df['Trip Duration'].sum())//60)
    mean_travel_time_hours = int((df['Trip Duration'].mean())//60)
    total_travel_time_mins = int((df['Trip Duration'].sum())%60)
    mean_travel_time_mins = int((df['Trip Duration'].mean())%60)

    # TO DO: display total travel time


    # TO DO: display mean travel time


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    return 'The total travel time during your analysis period is: {} hours and {} mins'.format(total_travel_time_hours,total_travel_time_mins), 'The mean travel time during your analysis period is: {} hours and {} mins'.format(mean_travel_time_hours, mean_travel_time_mins )


trip_duration_stats(df)




Calculating Trip Duration for your choice of city, month/s and day/s...


This took 0.0023298263549804688 seconds.
----------------------------------------


('The total travel time during your analysis period is: 141275 hours and 22 mins',
 'The mean travel time during your analysis period is: 19 hours and 32 mins')

In [7]:
def user_stats(df,city):
    """Displays statistics on bikeshare users."""
    
    print('\nCalculating User Stats...\n')
    start_time = time.time()

    #counts of user types
    print('User type counts:\n{}'.format(df['User Type'].value_counts()) + '\n')
    
    if city != 'washington':
        #  counts of gender
        print('Counts of gender:\n{}'.format(df['Gender'].value_counts())+ '\n') 
        # earliest, most recent, and most common year of birth
        print('Earliest birth date: {}'.format(df['Birth Year'].min()))
        print('Most recent birth date: {}'.format(df['Birth Year'].max()))
        print('Most common year of birth: {}'.format(df['Birth Year'].mode()))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)






Calculating User Stats for your choice of city, month/s and day/s...


This took 0.0 seconds.
----------------------------------------


('The count of user types is as follows: Subscriber    5305\nCustomer      1925\nName: User Type, dtype: int64',
 'There are no user statistics on gender and date of birth for the State of Washington')

In [None]:
def main():
    while True:
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break
        
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats()
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        

if __name__ == "__main__":
	main()



Would you like to restart? Enter yes or no.
yes
Hello! Let's explore some US bikeshare data!
Enter city you want to analyze (Chicago, New York City or Washington): New York City
Enter month (up to June) you want to analyze or type All for all months: June
Enter day you want to analyze or type All for all days: Monday
Displaying data for the city of: New York City
Displaying data for the following month/s: June
Displaying data for the following day/s: Monday

Calculating The Most Frequent Times of Travel for your city of choice. Note that these statistics are not filtered by month or weekday...


This took 6.5308403968811035 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip for your choice of city, month/s and day/s...


This took 0.013336896896362305 seconds.
----------------------------------------

Calculating Trip Duration for your choice of city, month/s and day/s...


This took 0.002008199691772461 seconds.
-------------------------