In [1]:
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 15 11:06:31 2021

@author: Eng. Mohamed Abd-Elhak
"""

import pandas as pd
import numpy as np
import time


cities= ['Chicago','New York City','Washington']

city_files_directory= dict(zip(cities,['./chicago.csv',
                                       './new_york_city.csv',
                                       './washington.csv']))

filters= ['Month', 'Day', 'Not at all']

days_filters= ['Monday', 'Tuesday', 'Wednesday',
               'Thursday', 'Friday', 'Saturday', 'Sunday']

months_filters= ['January', 'February', 'March',
                 'April', 'May', 'June']

def get_user_inputs(identifier, options_list):
    """Getting the needed data from Python Console User and reprocessing it to
        match one of the passed options_list elements to it, then returned that 
        element back.
     
     Inputs:
         identifier: is a list with two strings, that was used in user input 
         prompt messags
         
         options_list: is a list with the accepted options by the program for
         the needed variable
         
     Outputs:
         The selected value from options_list
    """
    print("------------>'Selecting {}'<------------\n".format(identifier[0]))
    while True:
        try:
            options_counter= 0
            print("Available {} are {}".format(identifier[1], options_list))
            entered_text= input('Enter the first unique letter/s of your selected option: ')
            for option in options_list:
                if option.lower().find(entered_text.lower())== 0:
                    selected_option = option
                    options_counter +=1
            if options_counter== 1:
                print('--->> Selected {} is: "{}"\n'.format(identifier[0], selected_option))
                break
            else:
                print('      *********Error or not a unique input, choose again*********\n')
        except:
            print("\n      *********Invalid Key Press, choose again*********\n")
    print("_" * 78 + "\n")
    return selected_option

def welcome_mess_print():
    """Prints welcome message in Python console"""
    print("""
 ┌────────────────────────────────────────────────────────────────────────────┐
 │              **U.S. Bike Share Statistical Analysis Module**               │
 │                             ****Welcome!!****                              │
 └────────────────────────────────────────────────────────────────────────────┘
    """)
    
def get_filters():
    """Getting all the needed filters for Statistical Analysis from the user"""
    city= get_user_inputs(['city', 'cities'], cities)
    
    a_filter= get_user_inputs(['filter option', 'filter options'], filters)
    
    if a_filter== 'Month':
        month_filter= get_user_inputs(['month filter', 'month filters'], months_filters)
        day_filter= ''
    
    elif a_filter== 'Day':
        month_filter=''
        day_filter= get_user_inputs(['day filter', 'day filters'], days_filters)
    
    elif a_filter== 'Not at all':
        month_filter= ''
        day_filter= ''
    
    return city, month_filter, day_filter
        
def loading_csv_data_file(city_name):
    """Loading the 'City_name.csv' file from HDD, at the same directory of 
        the Python file.
        
        Inputs: cith_name string
        Outputs: a Pandas DataFrame with the loaded data, or "" in case of 
                loading errors
    """
    try:
        return pd.read_csv(city_files_directory[city_name])
    except:
        print("""Error in loading '{}' file, try one of the followings:
              1- The file name is the same as it appears in the above path
              2- The file is in the same directory of Python file
              3- The file is not corrupted\n
              """.format(city_files_directory[city_name]))
        return ""
    
def restarting_program():
    """Returning user response to restart the program or not"""
    while True:
        try:
            user_input= input("Do you want to restart or exit (R/E): ")
            if 'restart'.find(user_input.lower())== 0:
                print("\nProgram restarting...\n")
                return True
            elif 'exit'.find(user_input.lower())== 0:
                print("\nProgram exiting...\n")
                return False
            else:
                print('      *********Invalid input, choose again*********\n')
        except:
            print("\n      *********Invalid Key Press, choose again*********\n")

def val_fil_data_file(data_file, month, day):
    """Validating and filtering data_file"""
    
    #Removing the first unneeded column
    data_file.pop('Unnamed: 0')
    
    print('\n>> Number of rows before filtering missing values: {} rows'.format(data_file.shape[0]))
    
    #casting DataFrame columns dtypes to the appropirate types
    data_file = data_file.astype({'Start Time': 'datetime64[ns]',
                               'End Time': 'datetime64[ns]',
                               'Trip Duration': 'int32'})
    #Casting 'Birth Year' to object
    #No need to remove NaN values in 'Birth Year' column
    if 'Birth Year' in data_file.columns:
        data_file = data_file.astype({'Birth Year': 'object'})
    
    #Replacing NaN values in 'Gender' to be counted    
    if 'Gender' in data_file.columns:
        data_file.fillna(value={'Gender': 'Missing Value'}, inplace= True)
    
    #remove any rows with NaN values in all columns except 'Gender', and 'Birth Year' 
    data_file.dropna(axis= 0,
                     subset=['Start Time', 'End Time', 'Trip Duration',
                             'Start Station', 'End Station', 'User Type'],
                     inplace= True)
    print('\n>> Number of rows after filtering missing values: {} rows'.format(data_file.shape[0]))
    
    #Creating new colunms for start time month, day, & hour fields to apply
    # filters on them
    data_file['s_month'] = data_file['Start Time'].dt.month_name()
    data_file['s_day_of_week']= data_file['Start Time'].dt.day_name()
    data_file['s_hour'] = data_file['Start Time'].dt.hour
    data_file= data_file.astype({'s_hour': 'object'})
    
    #Filtering data according to user preferences
    if month == '' and day != '':
        data_file= data_file[data_file['s_day_of_week'] == day]
        print('\n>> Number of rows after filtering values by {}: {} rows\n'.format(day, data_file.shape[0]))
    elif month != '' and day == '':
        data_file= data_file[data_file['s_month'] == month]
        print('\n>> Number of rows after filtering values by {} month: {} rows\n'.format(month, data_file.shape[0]))
    
    if data_file.shape[0] == 0:
        print("\n>> your filters returned an empty rows")
        return ''
    else:
        return data_file

def statistics_otuputs(data_file, month, day):
    """Printing all the needed statistics from the data_file"""
    
    #Answering first Question, 'Popular times of travel'
    
    print('1- Popular times of travel:\n')
    if month == '' and day != '':
        print("------------>Used filter day: {}<------------".format(day))
        print("\nMost common month: {}".format(data_file['s_month'].mode().values))
    elif month != '' and day == '':
        print("------------>Used filter month: {}<------------".format(month))
        print("\nMost common day of the week: {}".format(data_file['s_day_of_week'].mode().values))
    elif month == '' and day == '':
        print("------------>No used filter<------------")
        print("\nMost common month: {}".format(data_file['s_month'].mode().values))
        print("\nMost common day of the week: {}".format(data_file['s_day_of_week'].mode().values))
    print("\nMost popular hour/s of day: {} O'clock".format(data_file['s_hour'].mode().values))
    print('_'*40)
    
    #Answering Second Question, 'Popular stations and trip'
    print('\n\n2- Popular stations and trip:\n')
    print("\nMost common Start Station: {}".format(data_file['Start Station'].mode().values))
    print("\nMost common End Station: {}".format(data_file['End Station'].mode().values))
    print("\nMost common Trip: {}".format(('from \'' +  data_file['Start Station'] + '\' to \'' + data_file['End Station'] + '\'').mode().values))
    print('_'*40)
    
    #Answering Third Question, 'Trip duration'
    print('\n\n3- Trip duration:\n')
    print("\nTotal travel time: {} seconds".format(data_file['Trip Duration'].sum()))
    print("\nAverage travel time: {} seconds".format(np.around(data_file['Trip Duration'].mean(),decimals=2)))
    print('_'*40)
    
    #Answering Forth Question, 'User info'
    print('\n\n4- User information:\n')
    print("\nCounts of each user type:\n")
    print(data_file['User Type'].value_counts())
    if 'Gender' in data_file.columns:
        print("\nCounts of each gender:\n")
        print(data_file['Gender'].value_counts())
    if 'Birth Year' in data_file.columns:
        print("\nEarliest year of birth: {}".format(int(data_file['Birth Year'].min())))
        print("\nMost Recent year of birth: {}".format(int(data_file['Birth Year'].max())))
        print("\nMost common year of birth: {}".format(data_file['Birth Year'].mode().values.astype(int)))
        print("\nCounts of Missed Data in 'Birth Year' column: {}".format(data_file['Birth Year'].isnull().sum()))
    print('_'*40)
    
def Printing(data_file):
    i=0
    j=0
    input_msg= "Do you want to print the first 5 lines of data file (Yes/No): "
    print("\nPrinting Data File:\n")
    while i< data_file.shape[0]:
        while True:
            try:
                user_input= input(input_msg)
                if 'yes'.find(user_input.lower())== 0:
                    i= j
                    input_msg= "Do you want to print the next 5 lines of data file (Yes/No): "
                    if i+5 <= data_file.shape[0]:
                        j= i+5
                    else:
                        j= data_file.shape[0]
                    print(data_file.iloc[i:j])
                    break
                elif 'no'.find(user_input.lower())== 0:
                    i= data_file.shape[0]
                    break
                else:
                    print('      *********Invalid input, choose again*********\n')
            except:
                print("\n      *********Invalid Key Press, choose again*********\n")
            
            

def main():
    while True:
        try:
            #print welcome message
            welcome_mess_print()
            
            #get user prefered data_file filters
            city, month_filter, day_filter= get_filters()
            
            start_calc_time = time.time()

            #loading city_data_file.csv to our DataFrame
            data_file= loading_csv_data_file(city)
            
            #checking if our DataFrame is created properly
            if type(data_file)== str:
                #if True means that the data_file loading is failed, and asking 
                #for restarting or exiting program
                if restarting_program():
                    continue
                else:
                    break
            
            #Validate (deal with NaN values), and apply user filters to our DataFrame
            data_file= val_fil_data_file(data_file, month_filter, day_filter)
            
            #if True means that the data_file filtering returned no rows, and 
            #asking for restarting or exiting program
            if type(data_file)== str:
                if restarting_program():
                    continue
                else:
                    break
            
            print("------------>'Statistics data for {}'<------------\n".format(city))
            statistics_otuputs(data_file, month_filter, day_filter)
            
            print("\nAll Calculations from loading data files to statistical analysis tooks {} seconds.".format(np.around((time.time() - start_calc_time),decimals=3)))
            print("_"*78+ "\n")
            
            #printing data lines 5 by 5
            Printing(data_file)
            
            #checking if the user want to exit or restart program again
            if restarting_program():
                continue
            else:
                break
        except:
            print("\n      *********Exception Error*********\n")
            #checking if the user want to exit or restart program again
            if restarting_program():
                continue
            else:
                break

if __name__ == "__main__":
	main()


 ┌────────────────────────────────────────────────────────────────────────────┐
 │              **U.S. Bike Share Statistical Analysis Module**               │
 │                             ****Welcome!!****                              │
 └────────────────────────────────────────────────────────────────────────────┘
    
------------>'Selecting city'<------------

Available cities are ['Chicago', 'New York City', 'Washington']
Enter the first unique letter/s of your selected option: w
--->> Selected city is: "Washington"

______________________________________________________________________________

------------>'Selecting filter option'<------------

Available filter options are ['Month', 'Day', 'Not at all']
Enter the first unique letter/s of your selected option: n
--->> Selected filter option is: "Not at all"

______________________________________________________________________________


>> Number of rows before filtering missing values: 300000 rows

>> Number of rows after f


>> Number of rows before filtering missing values: 300000 rows

>> Number of rows after filtering missing values: 300000 rows

>> Number of rows after filtering values by April month: 51659 rows

------------>'Statistics data for Chicago'<------------

1- Popular times of travel:

------------>Used filter month: April<------------

Most common day of the week: ['Saturday']

Most popular hour/s of day: [17] O'clock
________________________________________


2- Popular stations and trip:


Most common Start Station: ['Streeter Dr & Grand Ave']

Most common End Station: ['Streeter Dr & Grand Ave']

Most common Trip: ["from 'Lake Shore Dr & Monroe St' to 'Streeter Dr & Grand Ave'"]
________________________________________


3- Trip duration:


Total travel time: 50699234 seconds

Average travel time: 981.42 seconds
________________________________________


4- User information:


Counts of each user type:

Subscriber    39829
Customer      11830
Name: User Type, dtype: int64

Counts of ea