In [1]:
import time
import pandas as pd
import numpy as np
Loice
CITY_DATA = {
    'chicago': 'chicago.csv',
    'new york city': 'new_york_city.csv',
    'washington': 'washington.csv'
}


def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')

    while True:
        city = input('Enter the name of the city (Chicago, New York City, Washington): ').lower()
        if city in CITY_DATA:
            break
        else:
            print('Invalid city name. Please try again.')

    while True:
        month = input('Enter the month (all, January, February, ..., June): ').lower()
        if month in ['all', 'january', 'february', 'march', 'april', 'may', 'june']:
            break
        else:
            print('Invalid month. Please try again.')

    while True:
        day = input('Enter the day of the week (all, Monday, Tuesday, ..., Sunday): ').lower()
        if day in ['all', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
            break
        else:
            print('Invalid day. Please try again.')

    print('-' * 40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    filename = CITY_DATA[city]
    df = pd.read_csv(filename)

    # Convert the 'Start Time' column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # Extract the month from the 'Start Time' column
    df['Month'] = df['Start Time'].dt.month

    # Filter by month if applicable
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month_num = months.index(month) + 1
        df = df[df['Month'] == month_num]

    # Extract the day of the week from the 'Start Time' column
    df['Day of Week'] = df['Start Time'].dt.day_name()

    # Filter by day if applicable
    if day != 'all':
        df = df[df['Day of Week'] == day.title()]

    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""
    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    if len(df) > 0:
        if 'Month' in df:
            # Display the most common month
            common_month = df['Month'].mode().values[0]
            print('Most Common Month:', common_month)

        # Display the most common day of the week
        common_day = df['Day of Week'].mode().values[0]
        print('Most Common Day:', common_day)

        # Display the most common start hour
        df['Hour'] = df['Start Time'].dt.hour
        common_hour = df['Hour'].mode().values[0]
        print('Most Common Start Hour:', common_hour)
    else:
        print('No data available for the specified filters.')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""
    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # Display most commonly used start station
    common_start_station = df['Start Station'].mode().values[0]
    print('Most Common Start Station:', common_start_station)

    # Display most commonly used end station
    common_end_station = df['End Station'].mode().values[0]
    print('Most Common End Station:', common_end_station)

    # Display most frequent combination of start station and end station trip
    df['Trip'] = df['Start Station'] + ' to ' + df['End Station']
    common_trip = df['Trip'].mode().values[0]
    print('Most Common Trip:', common_trip)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""
    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # Display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print('Total Travel Time:', total_travel_time)

    # Display mean travel time
    mean_travel_time = df['Trip Duration'].mean()
    print('Mean Travel Time:', mean_travel_time)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)


def user_stats(df):
    """Displays statistics on bikeshare users."""
    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_types = df['User Type'].value_counts()
    print('User Types:\n', user_types)

    # Display counts of gender if the 'Gender' column is available
    if 'Gender' in df:
        gender_counts = df['Gender'].value_counts()
        print('\nGender Counts:\n', gender_counts)

    # Display earliest, most recent, and most common year of birth if the 'Birth Year' column is available
    if 'Birth Year' in df:
        earliest_year = int(df['Birth Year'].min())
        most_recent_year = int(df['Birth Year'].max())
        most_common_year = int(df['Birth Year'].mode().values[0])
        print('\nEarliest Year of Birth:', earliest_year)
        print('Most Recent Year of Birth:', most_recent_year)
        print('Most Common Year of Birth:', most_common_year)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)


def display_raw_data(df):
    """Displays raw data upon user request."""
    start_row = 0
    while True:
        display = input('\nWould you like to see raw data? Enter yes or no.\n')
        if display.lower() != 'yes':
            break
        print(df.iloc[start_row:start_row + 5])
        start_row += 5


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        display_raw_data(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
    main()
    print(load_data('washington','may','all').shape[0])

Hello! Let's explore some US bikeshare data!
Enter the name of the city (Chicago, New York City, Washington): chicago
Enter the month (all, January, February, ..., June): all
Enter the day of the week (all, Monday, Tuesday, ..., Sunday): all
----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Common Month: 6
Most Common Day: Tuesday
Most Common Start Hour: 17

This took 0.06247901916503906 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most Common Start Station: Streeter Dr & Grand Ave
Most Common End Station: Streeter Dr & Grand Ave
Most Common Trip: Lake Shore Dr & Monroe St to Streeter Dr & Grand Ave

This took 0.2199993133544922 seconds.
----------------------------------------

Calculating Trip Duration...

Total Travel Time: 280871787
Mean Travel Time: 936.23929

This took 0.0 seconds.
----------------------------------------

Calculating User Stats...

User Types:
 Subscriber    2