In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [2]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    print('\nWhich city do you want to see its data (Chicago, New York City or Washington)?')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    cities = ('chicago', 'new york city', 'washington')
    city = input().lower()
    while city not in cities:
        city = input('\nPlease write the correct city!').lower()
        
    print('\nPlease write the month : January, February, March, April, May, June or all')

    # TO DO: get user input for month (all, january, february, ... , june)
    months = ('January', 'February', 'March', 'April', 'May', 'June', 'All')
    month = input().title()
    while month not in months:
        month = input('\nPlease write the correct month!').title()
    
    print('\nPlease write the day : Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or all')
    
    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    days = ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'All')
    day = input().title()
    while day not in days:
        day = input('\nPlease write the correct day!').title()
    
    print('-'*40)
    return city, month, day

In [3]:
def load_data(city, month, day):
    
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    

    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['End Time'] = pd.to_datetime(df['End Time'])
    
    # Month data
    df['month'] = df['Start Time'].dt.month
    
    if month != 'All':
        months = ['January', 'February', 'March', 'April', 'May', 'June']
        month = months.index(month) + 1  
        df = df[df['month'] == month]
    
  # Day data  
    df['day_of_week'] = df['Start Time'].dt.day_name()
    if day != 'All':
        #days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
        #day = days.index(day) + 1
        df = df[df['day_of_week'] == day.title()]
    
    #print (df(head()))
    
    
    
    return df

In [4]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    print('\nThe most common month is : {}'.format(df['month'].mode()[0]))

    # TO DO: display the most common day of week
    print('\nThe most common day of week is : {}'.format(df['day_of_week'].mode()[0]))

    # TO DO: display the most common start hour
    df['Hour'] = df['Start Time'].dt.hour
    print('\nThe most common start hour is : {}'.format(df['Hour'].mode()[0]))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [5]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    print('\nThe most commonly used start station is : {}'.format(df['Start Station'].mode()[0]))

    # TO DO: display most commonly used end station
    print('\nThe most commonly used end station is : {}'.format(df['End Station'].mode()[0]))

    # TO DO: display most frequent combination of start station and end station trip
    df['Trip'] = df['Start Station'] + ' ' + df['End Station']
    print('\nThe most frequent combination of start station and end station trip is: {}'.format((df['Trip'].mode()[0])))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [6]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    print('\nTotal travel time is : {}'.format(df['Trip Duration'].sum()))
    
    # TO DO: display mean travel time
    print('\nMean travel time is : {}'.format(df['Trip Duration'].mean()))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [7]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    print('\nCounts of user types : {}'.format(df['User Type'].value_counts()))

    # TO DO: Display counts of gender
    if 'Gender' not in df:
        print ('\nNo Data for users gender in this city')
    else:
        print('\nCounts of gender : {}'.format(df['Gender'].value_counts()))

    # TO DO: Display earliest, most recent, and most common year of birth
    if 'Birth Year' not in df :
        print ('\nNo Data for users birth year in this city')
    else:
        #df['Birth Year'] = df['Birth Day'].fillna(0)
        earlist_year = int(min(df['Birth Year'].dropna()))
        recent_year = int(max(df['Birth Year'].dropna()))
        common_year = int(df['Birth Year'].dropna().mode()[0])
        print('\nThe earlist year of birth is : {}. \n The most recent year of birth is : {}.\n The most common year of birth is : {}.'.format((earlist_year),(recent_year),(common_year)))
        
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    
    
    user_input = input('\nDo you want to see trip details Yes or No? \n').title()
    
    answer = ('Yes', 'No')
    while user_input not in answer:
        user_input = input('\nPlease write Yes or No!').title()
        
    s = 0
    e = 5
    while user_input == 'Yes':
        print (df.iloc[s:e])
        user_input = input('\nDo you want to see more information Yes or No? \n').title()
        while user_input not in answer:
            user_input = input('\nPlease write Yes or No!').title()
        s = s + 5
        e = e + 5
    
    
    print ('Thanks for your time!')

In [8]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!

Which city do you want to see its data (Chicago, New York City or Washington)?
chicago

Please write the month : January, February, March, April, May, June or all
may

Please write the day : Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or all
monday
----------------------------------------

Calculating The Most Frequent Times of Travel...


The most common month is : 5

The most common day of week is : Monday

The most common start hour is : 17

This took 0.006289482116699219 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...


The most commonly used start station is : Streeter Dr & Grand Ave

The most commonly used end station is : Streeter Dr & Grand Ave

The most frequent combination of start station and end station trip is: Lake Shore Dr & Monroe St Streeter Dr & Grand Ave

This took 0.013689756393432617 seconds.
----------------------------------------

Calculating Tri