# Bike Share Data:
* The Datasets
Randomly selected data for the first six months of 2017 are provided for all three cities. All three of the data files contain the same core six (6) columns:
•	Start Time (e.g., 2017-01-01 00:07:57)
•	End Time (e.g., 2017-01-01 00:20:53)
•	Trip Duration (in seconds - e.g., 776)
•	Start Station (e.g., Broadway & Barry Ave)
•	End Station (e.g., Sedgwick St & North Ave)
•	User Type (Subscriber or Customer)
The Chicago and New York City files also have the following two columns:
•	Gender
•	Birth Year


1- Statistics Computed:
    #1 Popular times of travel (i.e., occurs most often in the start time)
•	most common month
•	most common day of week
•	most common hour of day
#2 Popular stations and trip
•	most common start station
•	most common end station
•	most common trip from start to end (i.e., most frequent combination of start station and end station)
#3 Trip duration
•	total travel time
•	average travel time
#4 User info
•	counts of each user type
•	counts of each gender (only available for NYC and Chicago)
•	earliest, most recent, most common year of birth (only available for NYC and Chicago)


The Files:
dataset files 
•	chicago.csv
•	new_york_city.csv
•	washington.csv


# 3: Load and Filter the Dataset

1.	Load the dataset for the specified city. Index the global CITY_DATA dictionary object to get the corresponding filename for the given city name.
2.	Create month and day_of_week columns. Convert the "Start Time" column to datetime and extract the month number and weekday name into separate columns using the datetime module.
3.	Filter by month. Since the month parameter is given as the name of the month, you'll need to first convert this to the corresponding month number. Then, select rows of the dataframe that have the specified month and reassign this as the new dataframe.
4.	Filter by day of week. Select rows of the dataframe that have the specified day of week and reassign this as the new dataframe. (Note: Capitalize the day parameter with the title() method to match the title case used in the day_of_week column!)


# the project

In [2]:
import time
import pandas as pd
import numpy as np

In [3]:
city_dict = {'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv'}

month_list = ['all', 'january', 'february', 'march', 'april', 'may', 'june']

day_list = ['all', 'monday', 'tuesday', 'wednesday', 'friday', 'saturday', 'sunday']

###### get_filters function: 
user to specify a city, month, and day to analyze

to take input from use in loop way:
- make a dictionary for input value
- user_input = make a string to recive the input as blank
- while loop over user_input.lower (Cond: not in dict_input)
- user_input = input('') method
- check by if (user_input.lower() in dict) -- add to dict then break, else print(massage)
- loop will continue


In [None]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    #  get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city_name = ''
    while city_name.lower() not in city_dict:
        city_name = input("\nWhoud you like to analyse data for: \n 1- Chicago \n 2- New york city\n 3- Washington\n Enter Your choise: ")
        if city_name.lower() in city_dict:
            city = city_dict[city_name.lower()]
        else:
            print("Error, please select a correct option from above\n")

    #  get user input for month (all, january, february, ... , june)
    month_name = ''
    while month_name.lower() not in month_list:
        print('\nWhat is the name of the month to filter data?')
        for month in month_list:
            print('- ', month)
        month_name = input("Enter your choice: ")
        if month_name.lower() in month_list:
            #set the month value
            month = month_name.lower()
        else:
             # print error
            print("Error, please select a correct option from above\n")

    # get user input for day of week (all, monday, tuesday, ... sunday)
    day_name = ''
    while day_name.lower() not in day_list:
        print('\nWhat is the name of the day to filter data?')
        for day in day_list:
            print('- ', day)
        day_name = input("Enter your choice: ")
        if day_name.lower() in day_list:
            day = day_name.lower()
        else:
            #Error
            print("Error, please select a correct option from above\n")

    print('-'*40)
    return city, month, day

### Load data Function:
Loads data for the specified city and filters by month and day if applicable
#### trick in it:
1- convert column 'Start time' from object to datetime object BY using pd.to_datetime function

2- make a new column for 'month' from column 'Start time' by .dt.month

3- make a new column for day_of_week from column 'Start time' by .dt.weekday_name

4- make a new column for hour from column 'Start time' by .dt.hour

#### apply filter to data:
1- made a filter using month , day

2- check user_select for month if not all by cond: != 'all'

2- get the month in form of number, from month_list by month_list.index(month)

3- return df for selcting by using loc function , indexd Cond:df['month'] == month
do the same for day filter



In [None]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(city)
    #convert start time from object to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.weekday_name
    df['hour'] = df['Start Time'].dt.hour

    # made a filter using month
    if month != 'all':
        month = month_list.index(month)
        df = df.loc[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        df = df.loc[df['day_of_week'] == day.title()]

    return df


### Time statics:
Displays statistics on the most frequent times of travel.
#### Trick note:
1- mode()[0] : apply to column to find the most common of it.

2- groupby method: apply to df to find most common in two columns


In [None]:
def time_stats(df,month_list=month_list):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # display the most common month
    df['month'] =df['Start Time'].dt.month
    popular_month_index = int(df['month'].mode()[0])
    print('Most Frequent common month:', month_list[popular_month_index])


    #   display the most common day of week
    df['day'] =df['Start Time'].dt.weekday_name
    popular_day = df['day'].mode()[0]
    print('Most Frequent day of week:', popular_day)


    #   display the most common start hour
    df['hour'] =df['Start Time'].dt.hour
    popular_hour = df['hour'].mode()[0]
    print('Most Frequent Start Hour:', popular_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


### Station statics :
Displays statistics on the most popular stations and trip

#### Tick nootes:
1- apply to df to find most common in two columns

   df.groupby(['Start Station', 'End Station']).size().idxmax()
   
2- by mode method:

(df['Start Station'] + "," + df['End Station']).mode()[0]

In [None]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()


    start_station = df['Start Station'].mode()[0]
    print('most commonly used start station: {}'.format(start_station))

    #   display most commonly used end station
    end_station = df['End Station'].mode()[0]
    print('most commonly used end station: {}'.format(end_station))

    #   display most frequent combination of start station and end station trip
#    freq_combination = (df['Start Station'] + "," + df['End Station']).mode()[0]
#    print("The most frequent combination of start station and end station trip is : " + str(freq_combination.split(",")))
    # Another good code for commbination of start and end station
    most_popular_trip = df.groupby(['Start Station', 'End Station']).size().idxmax()
    print("The most frequent combination of start station and end station trip is : " + str(most_popular_trip))
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


### user_stats funtion

In [None]:
def user_stats(df,city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    #   Display counts of user types User Type
    print('The count of user types',df['User Type'].value_counts())



    #   Display earliest, most recent, and most common year of birth
    if city == 'chicago.csv' or city == 'new_york_city.csv':
    #   Display counts of gender
        print(df['Gender'].value_counts())
        earliest = df['Birth Year'].min()
        recent = df['Birth Year'].max()
        most_common = df['Birth Year'].mode()[0]

        print('Earlist year of birth is: {}'.format(earliest))
        print('most recent year of birth is: {}'.format(recent))
        print('most common year of birth is: {}'.format(most_common))

        print("\nThis took %s seconds." % (time.time() - start_time))
        print('-'*40)

### display_raw_data

In [None]:
def display_raw_data(df):
    """Displays data head request.

    Input:
        (DataFrame) df - Pandas DataFrame containing city data filtered by month and day
    Return:
         Print DataFrame Head
    """
    print(df.head())
    next = 0
    while True:
        view_raw_data = input('\nwould you like to view next five row of raw data? Enter: yes | no.\n')
        if view_raw_data.lower() != 'yes':
            return
        next = next + 5
        print(df.iloc[next:next+5])

## main

In [None]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df,city)
        while True:
            view_raw_data = input('\nwould you like to view first 5 row of data? Enter: yes | no.\n')
            if view_raw_data.lower() != 'yes':
                break
            display_raw_data(df)
            break

        restart = input('\nWould you like to restart? Enter: yes | no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()
