In [1]:
import time
import pandas as pd
import numpy as np
import math

In [2]:
CITY_DATA = { 'chicago': 'C:/My Python/all-project-files/chicago.csv',
              'new york city': 'C:/My Python/all-project-files/new_york_city.csv',
              'washington': 'C:/My Python/all-project-files/washington.csv' }

In [3]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True: 

        city = input('Please enter the city you would like to anayze:').strip().lower()
        
        if city in CITY_DATA:
            print('You have selected {}'.format(city.title()))
            break
        else : 
            print('Please type valid city name like chicago , new york city, washington')
    # get user input for month (all, january, february, ... , june)
 
    while True : 
        x = list(range(1,7))
        months = list(map(str, x))
        months.append('All')   
        month = input('Enter any one of the first 6 months or enter All to select all 6 months :').title()
        if month in months: 
            print('You have selected {} st/nd/rd/th month'.format(month))
            break
        else : 
            print('Please type the valid month digit as shown in below: ')
            print(months)

    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True: 
        days= ['Wednesday', 'Saturday', 'Tuesday', 'Sunday', 'Monday', 'Friday','Thursday', 'All']

        day =input('Please enter the day name you want to filter, else write all :').title()
        if day in days:
            print('You have selected :{} day/days'.format(day.title()))
            break
        else : 
            print('Please type valid day name from the day list: ')
            print(days)

    print('-'*40)
    return city, month, day


In [4]:
def load_data(city,month,day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    citypath = CITY_DATA[city]
    df = pd.read_csv(citypath)
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['Month'] = pd.DatetimeIndex(df['Start Time']).month
    df['Day of Week'] = df['Start Time'].dt.day_name()
    df['Start Hour'] = df['Start Time'].dt.hour
    if month !='All' and day !='All':
        df = df[(df['Month']==int(month)) & (df['Day of Week']==day.title())]   
    elif month =='All' and day != 'All':
        df['Month'] =df['Month']
        df = df[df['Day of Week']==day]
    elif month != 'All' and day =='All':
        df['Day of Week'] = df['Day of Week']

    return df


In [5]:
def display_raw_data(df):
    #Displays the raw data of bikeshare
    row=0
    pd.set_option('display.max_columns',200)
    raw = input('Do you want to see the raw data, please proceed with (yes/no) :').strip().lower()
    while True : 
        if raw =='no': 
            break
        elif raw =='yes':
            end = row+5
            for i in range(row,end): 
                print(df.iloc[i])
                print('--'*25)
        raw_again = input('Do you want to see next 5 rows (yes/no):').strip().lower()
        if raw_again !='yes': 
            break 
        row+=5

In [6]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""
    while True: 

        show_stats = input("Do you want to see statistics on time stats? (yes/no): ").strip().lower()
        if show_stats == 'yes':
            print('\nCalculating The Most Frequent Times of Travel...\n')
            start_time = time.time()

            # display the most common month
            month = df['Month'].mode()[0]
            print('The most common month is : {} th'.format(month))
            # display the most common day of week
            day = df['Day of Week'].mode()[0]
            print('The most common day is : {}'.format(day))
            # display the most common start hour
            hour = df['Start Hour'].mode()[0]
            print('The most common start hour is : {} th '.format(hour))
            print("\nThis took %s seconds." % (time.time() - start_time))
            print('-'*40)
            break
        elif show_stats == 'no':
            # Exit the function
            return
        else:
            # Handle invalid input
            print("Invalid input. Please enter 'yes' or 'no'.")

In [7]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""
    while True:

        show_stats = input("Do you want to see statistics on station stats? (yes/no): ").strip().lower()

        if show_stats == 'yes':
            print('\nCalculating The Most Popular Stations and Trip...\n')
            start_time = time.time()

            # display most commonly used start station
            Station_start = df['Start Station'].mode()[0]
            print('The most common start station is : {}'.format(Station_start))
            # display most commonly used end station
            Station_end = df['End Station'].mode()[0]
            print('The most common end station is : {}'.format(Station_end))
            # display most frequent combination of start station and end station trip
            Station_common = (df['Start Station'] + ' TO ' + df['End Station']).mode()[0]
            print('The most common commuting station is : {}'.format(Station_common))
            
            print("\nThis took %s seconds." % (time.time() - start_time))
            print('-'*40)
            break
        elif show_stats == 'no':
            # Exit the function
            return
        else:
            # Handle invalid input
            print("Invalid input. Please enter 'yes' or 'no'.")


In [8]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""
    while True:
        show_stats = input("Do you want to see statistics on trip duration? (yes/no): ").strip().lower()
        if show_stats == 'yes':
            print('\nCalculating Trip Duration...\n')
            start_time = time.time()

            # display total travel time
            Total_travel =  math.ceil(df['Trip Duration'].sum() /3600)
            print('The total travel time is : {} hours'.format(Total_travel))
            # display mean travel time
            Mean_travel =df['Trip Duration'].mean() 
            print('The mean travel time is : {} seconds'.format(Mean_travel))
            
            print("\nThis took %s seconds." % (time.time() - start_time))
            print('-'*40)
            break
        elif show_stats == 'no':
            # Exit the function
            return
        else:
            # Handle invalid input
            print("Invalid input. Please enter 'yes' or 'no'.")

In [9]:
def user_stats(df):
    """Displays statistics on bikeshare users."""
    while True: 

        show_stats = input("Do you want to see statistics on bikeshare users? (yes/no): ").strip().lower()
        if show_stats == 'yes':

            print('\nCalculating User Stats...\n')
            start_time = time.time()

            # Display counts of user types
            usercount = df['User Type'].value_counts()
            print('Here are user types and their counts : {}'.format(usercount))
            # Display counts of gender
            try:

                gen_count = df['Gender'].value_counts()
                print('Here are the gender counts : {}'.format(gen_count))
                # Display earliest, most recent, and most common year of birth
                earliest = df['Birth Year'].min()
                most_rescent = df['Birth Year'].max()
                most_common = df['Birth Year'].mode()[0]
                print('--'*40)
                print('The oldest to travel is {:.0f} born , youngest is {:.0f} born, most common is {:.0f} born'.format(earliest, most_rescent,most_common))
                print('--'*40)
            except KeyError: 
                print("The gender and birth year column is not in this file")
            print("\nThis took %s seconds." % (time.time() - start_time))
            print('-'*40)
            break
        elif show_stats == 'no':
            # Exit the function
            return
        else:
            # Handle invalid input
            print("Invalid input. Please enter 'yes' or 'no'.")

In [144]:
city, month, day = get_filters()
df = load_data(city, month, day)
display_raw_data(df)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)

Hello! Let's explore some US bikeshare data!
You have selected Washington
You have selected :all st/nd/th/rd month/months
You have selected :All day/days
----------------------------------------


In [145]:
df = load_data(city, month, day)
df

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Month,Day of Week,Start Hour
0,1621326,2017-06-21 08:36:34,2017-06-21 08:44:43,489.066,14th & Belmont St NW,15th & K St NW,Subscriber,6,Wednesday,8
1,482740,2017-03-11 10:40:00,2017-03-11 10:46:00,402.549,Yuma St & Tenley Circle NW,Connecticut Ave & Yuma St NW,Subscriber,3,Saturday,10
2,1330037,2017-05-30 01:02:59,2017-05-30 01:13:37,637.251,17th St & Massachusetts Ave NW,5th & K St NW,Subscriber,5,Tuesday,1
3,665458,2017-04-02 07:48:35,2017-04-02 08:19:03,1827.341,Constitution Ave & 2nd St NW/DOL,M St & Pennsylvania Ave NW,Customer,4,Sunday,7
4,1481135,2017-06-10 08:36:28,2017-06-10 09:02:17,1549.427,Henry Bacon Dr & Lincoln Memorial Circle NW,Maine Ave & 7th St SW,Subscriber,6,Saturday,8
...,...,...,...,...,...,...,...,...,...,...
299995,945535,2017-04-26 03:12:14,2017-04-26 03:41:19,1745.528,Lincoln Memorial,Jefferson Dr & 14th St SW,Customer,4,Wednesday,3
299996,1495781,2017-06-11 09:48:52,2017-06-11 10:22:31,2018.450,Key Blvd & N Quinn St,5th & K St NW,Subscriber,6,Sunday,9
299997,12860,2017-01-04 14:33:00,2017-01-04 14:43:00,583.897,17th & K St NW / Farragut Square,7th & F St NW/Portrait Gallery,Subscriber,1,Wednesday,14
299998,977621,2017-04-28 07:17:47,2017-04-28 07:56:31,2324.170,Jefferson Dr & 14th St SW,Washington & Independence Ave SW/HHS,Customer,4,Friday,7


In [128]:
time_stats(df)


Calculating The Most Frequent Times of Travel...

The most common month is : 6 th
The most common day is : Tuesday
The most common start hour is : 17 th 

This took 0.03200173377990723 seconds.
----------------------------------------


In [146]:
df['Day of Week'].unique()

array(['Wednesday', 'Saturday', 'Tuesday', 'Sunday', 'Monday', 'Friday',
       'Thursday'], dtype=object)

In [154]:
a = list(range(1,13))
a

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

In [155]:
while True : 
        months = list(range(1,13))
        months.append('all')   
        month = input('Please enter the month number you want to filter, else write all :')
        if month in months: 
            print('You have selected {} st/nd/rd/th month'.format(month))
            break
        else : 
            print('Please type the valid month digit as shown in below: ')
            print(months)

Please type the valid month digit as shown in below: 
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 'all']
Please type the valid month digit as shown in below: 
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 'all']
You have selected all st/nd/rd/th month


In [166]:
def display_raw_data(df):
    #Displays the raw data of bikeshare
    row=0
    pd.set_option('display.max_columns',200)
    raw = input('Do you want to see the raw data, please proceed with (yes/no) :').strip().lower()
    while True : 
        if raw =='no': 
            break
        elif raw =='yes':
            end = row+5
            for i in range(row,end): 
                print(df.iloc[i])
        raw_again = input('Do you want to see next 5 rows (yes/no):').strip().lower()
        if raw_again !='yes': 
            break 
        row+=5
        

In [168]:
display_raw_data(df)

Unnamed: 0                    1621326
Start Time        2017-06-21 08:36:34
End Time          2017-06-21 08:44:43
Trip Duration                 489.066
Start Station    14th & Belmont St NW
End Station            15th & K St NW
User Type                  Subscriber
Month                               6
Day of Week                 Wednesday
Start Hour                          8
Name: 0, dtype: object
Unnamed: 0                             482740
Start Time                2017-03-11 10:40:00
End Time                  2017-03-11 10:46:00
Trip Duration                         402.549
Start Station      Yuma St & Tenley Circle NW
End Station      Connecticut Ave & Yuma St NW
User Type                          Subscriber
Month                                       3
Day of Week                          Saturday
Start Hour                                 10
Name: 1, dtype: object
Unnamed: 0                              1330037
Start Time                  2017-05-30 01:02:59
End Time          