In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('chicago.csv')

In [None]:
print(type(df))

In [None]:
df['Start Time'] = pd.to_datetime(df['Start Time'])

df['hour'] = df['Start Time'].dt.hour

popular_hour = df['hour'].mode()[0]

print('Most popular start hour:', popular_hour)

In [None]:
user_types = df['User Type'].value_counts()

print(user_types)

In [None]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }


cities = list(CITY_DATA.keys())
months = ['all', 'january', 'february', 'march', 'april', 'may', 'june']
day_of_week = ['all', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs


    while True:
        city = str(input('Would you like to see data from Chicago, New York City or Washington?\n').lower())
        if city in cities:
            print(f'Gathering data for {city}')
            break
        else:
            print('Choose between Chicago, New York City, Washington')


    # get user input for month (all, january, february, ... , june)


    while True:
        month = str(input('Which month would you like to see data for?\n').lower())
        if month in months:
            print(f'Gathering data for {month}')
            break
        else:
            print('Choose between January - June')
            
    # get user input for day of week (all, monday, tuesday, ... sunday)


    while True:
        day = str(input('Which day of the week would you like to see data for?\n').lower())
        if day in day_of_week:
            print(f'Gathering data for {day}')
            break
        else:
            print('Choose one day of the week')



    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['Month'] = df['Start Time'].dt.month
    df['Day of Week'] = df['Start Time'].dt.weekday
    df['Hour'] = df['Start Time'].dt.hour
    
    if month != "all":
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = month.index(month) + 1
        month = df[df['Month'] == month]
        
    if day != "all":
        days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
        day = df[df['Day of Week'] == day.title()]
       

    
    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()


    # display the most common month
    common_month = df['Month'].mode()[0]
    print('\nThe most common month is {}'.format(months[common_month].title()))

    # display the most common day of week
    common_day = df['Day of Week'].mode()[0]
    print('\nThe most common day is {}'.format(common_day))

    # display the most common start hour
    popular_hour = df['Hour'].mode()[0]
    print('\nThe most common start hour is {}'.format(popular_hour))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    common_start_station = df['Start Station'].mode()[0]
    print('The most commonly used start station is: ', common_start_station)

    # display most commonly used end station
    common_end_station = df['End Station'].mode()[0]
    print('The most commonly used end station is: ', common_end_station)

    # display most frequent combination of start station and end station trip
    freq_stations = common_start_station + ' & ' + common_end_station
    print('The most frequent combination of start and end station is:', freq_stations)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print('Total trip duration in seconds:\n', total_travel_time)
    print()
    # display mean travel time
    avg_travel_time = df['Trip Duration'].mean()
    print('Average trip duration in seconds:\n', avg_travel_time)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_types = df['User Type'].value_counts()
    print('User_types:\n', user_types)
    print()

    # Display counts of gender
    if 'Gender' in df:
        gender_type = df['Gender'].value_counts()
        print('Gender Type:\n', gender_type)
    else:
        print('Gender type not available')
        print()



    # Display earliest, most recent, and most common year of birth
    
    if 'Birth Year' in df:
        earliest_by = df['Birth Year'].min()
        print()
        print('The earliest birth year is:\n', int(earliest_by))
    else:
        print('Earliest birth year not available')
        print()
        
   
    if 'Birth Year' in df:
        most_recent_by = df['Birth Year'].max()
        print('The most recent birth year is:\n', int(most_recent_by))
    else:
        print('Most recent birth year not available')
        print()
        
    
    if 'Birth Year' in df:
        common_by = df['Birth Year'].mode()[0]
        print('The most common year of birth is:\n', int(common_by))
    else:
        print('Most common birth year not available')
        
        

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        
        
        view_data = input("Would you like to view 5 rows of individual trip data? Enter yes or no?\n")
        start_loc = 0
        if view_data.lower() == 'yes':
            
            print(df.iloc[start_loc:start_loc + 5])
            start_loc += 5
            view_display = input("Would you like to see the next 5 rows?:\n").lower()
            while view_display == 'yes':
                print(df.iloc[start_loc:start_loc + 5])
                start_loc += 5
                view_display = input("Would you like to see the next 5 rows?:\n").lower()
        else:
            view_display = input("Do you wish to continue?:\n").lower()
        
               
        

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()
    