In [2]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }


def get_filters():
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    country=['chicago', 'new york city', 'washington']
    city=input("would you like to see data for chicago, new york city, washington").lower()           
    while city not in country:
        print("please choose city from list")
        city=input( ).lower()

    # get user input for month (all, january, february, ... , june)
    months=['all', 'january','february','march','april','may','june']
    print('choose month from list: january,february,march,april,may,june')
    month=input('enter month or all').lower()
    while month not in months:
        print("please choose month from list")
        month=input( ).lower()


    # get user input for day of week (all, monday, tuesday, ... sunday)
    days=['all','sunday','monday','tuesday','wednesday','thursday','friday','saturday']
    print('choose day from list:sunday,monday,tuesday,wednesday,thursday,friday,saturday')
    day=input("enter day or all").lower()
    while day not in days:
        print("please choose day from list")
        day=input( ).lower()
    

    print('-'*40)
    return city, month, day


def load_data(city, month, day):

    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.weekday_name
    
    
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]
        
        
    if day != 'all':
        df = df[df['day_of_week'] == day.title()]
        
        
    #to add new 2 columns in washington (gender and birth year)
    #to deal with func user_stats 
    if city=='washington':
        df['Gender']=np.nan
        df['Birth Year']=np.nan


    return df


def time_stats(df):

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    most_month = df['month'].mode()
    print("most common month",most_month)

    # display the most common day of week
    most_day = df['day_of_week'].mode()
    print("most common day",most_day)

    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    popular_hour = df['hour'].mode()[0]
    print("most common time",popular_hour)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    start_station=df['Start Station'].mode()    
    print("most common start station",start_station)
    

    # display most commonly used end station
    end_station=df['End Station'].mode()
    print("most common end station",end_station)
    

    # display most frequent combination of start station and end station trip
    print("most common combination of start station and end station trip")
    print(df.groupby(['Start Station','End Station']).size().idxmax())
    

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    travel_time=df['Trip Duration'].sum()
    print("the total time",travel_time)
    

    # display mean travel time
    avg_time=df['Trip Duration'].mean()
    print("the avg time ",avg_time)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_types = df['User Type'].value_counts()
    print("user type ",user_types)


    # Display counts of gender
    df['Gender']=df['Gender'].fillna(0)
    user_gender = df['Gender'].value_counts()
    print("user gender",user_gender)


    # Display earliest, most recent, and most common year of birth
    
    early=df['Birth Year'].min()
    recent=df['Birth Year'].max()
    most_common=df['Birth Year'].mode()
    print("early year",early)
    print("recent year",recent)
    print("most used year",most_common)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def display_raw_data(df):
    i=0
    display=input("do you want to see 5 rows of data yes/no").lower()
    answer=["yes","no"]
    while display not in answer:
        print("please write yes or no")
        display=input( ).lower()
    pd.set_option('display.max_columns',200) #but already the columns not more than 200  
    
        
    while True:
        if display=="no" :
            break
        print(df[i:i+5])
        display=input("do you want to see 5 more rows of data yes/no").lower()
        while display not in answer:
            print("please write yes or no")
            display=input( ).lower()
        i+=5
    
    
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        display_raw_data(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
would you like to see data for chicago, new york city, washingtonchicago
choose month from list: january,february,march,april,may,june
enter month or allmay
choose day from list:sunday,monday,tuesday,wednesday,thursday,friday,saturday
enter day or allmonday
----------------------------------------

Calculating The Most Frequent Times of Travel...

most common month 0    5
dtype: int64
most common day 0    Monday
dtype: object
most common time 17

This took 0.005984783172607422 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

most common start station 0    Streeter Dr & Grand Ave
dtype: object
most common end station 0    Streeter Dr & Grand Ave
dtype: object
most common combination of start station and end station trip
('Lake Shore Dr & Monroe St', 'Streeter Dr & Grand Ave')

This took 0.011968374252319336 seconds.
----------------------------------------

Calculating Trip Duration...

the

do you want to see 5 rows of data yes/noyes
     Unnamed: 0          Start Time             End Time  Trip Duration  \
1        482740 2017-03-11 10:40:00  2017-03-11 10:46:00        402.549   
38       582231 2017-03-25 14:16:00  2017-03-25 14:35:00       1127.911   
71       590016 2017-03-25 20:42:00  2017-03-25 21:18:00       2171.291   
285      521787 2017-03-18 17:27:00  2017-03-18 17:30:00        201.276   
305      429803 2017-03-04 20:27:00  2017-03-04 20:37:00        591.502   

                                         Start Station  \
1                           Yuma St & Tenley Circle NW   
38                                      18th & R St NW   
71   Smithsonian-National Mall / Jefferson Dr & 12t...   
285                                     15th & P St NW   
305                          New York Ave & 15th St NW   

                      End Station   User Type  month day_of_week  Gender  \
1    Connecticut Ave & Yuma St NW  Subscriber      3    Saturday     0.0   
38  