# US Bike Share 

## Import modules

In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

## Read CSV file
* `read_csv()` function reads the csv file data
* pass the argument in `read_csv()` carefully

In [40]:
nyc_df = pd.read_csv('.\Data\NYC-CitiBike-2016.csv')
chicago_df = pd.read_csv('.\Data\Chicago-Divvy-2016.csv')
washington_df = pd.read_csv('.\Data\Washington-CapitalBikeshare-2016.csv')

### Check the data

* Use `head()` to see the data in the data frame
* You can pass different arguments in this function to get data from different parts.

In [41]:
nyc_df.head()

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender
0,839,1/1/2016 00:09:55,1/1/2016 00:23:54,532,S 5 Pl & S 4 St,40.710451,-73.960876,401,Allen St & Rivington St,40.720196,-73.989978,17109,Customer,,0
1,686,1/1/2016 00:21:17,1/1/2016 00:32:44,3143,5 Ave & E 78 St,40.776829,-73.963888,3132,E 59 St & Madison Ave,40.763505,-73.971092,23514,Subscriber,1960.0,1
2,315,1/1/2016 00:33:11,1/1/2016 00:38:26,3164,Columbus Ave & W 72 St,40.777057,-73.978985,3178,Riverside Dr & W 78 St,40.784145,-73.983625,14536,Subscriber,1971.0,1
3,739,1/1/2016 00:40:51,1/1/2016 00:53:11,223,W 13 St & 7 Ave,40.737815,-73.999947,276,Duane St & Greenwich St,40.717488,-74.010455,24062,Subscriber,1969.0,1
4,1253,1/1/2016 00:44:16,1/1/2016 01:05:09,484,W 44 St & 5 Ave,40.755003,-73.980144,151,Cleveland Pl & Spring St,40.722104,-73.997249,16380,Customer,,0


In [42]:
chicago_df.head()

Unnamed: 0,trip_id,starttime,stoptime,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear
0,9080545,3/31/2016 23:30,3/31/2016 23:46,2295,926,156,Clark St & Wellington Ave,166,Ashland Ave & Wrightwood Ave,Subscriber,Male,1990.0
1,9080521,3/31/2016 22:59,3/31/2016 23:02,3439,198,259,California Ave & Francis Pl,276,California Ave & North Ave,Subscriber,Male,1974.0
2,9080479,3/31/2016 22:24,3/31/2016 22:26,4337,124,344,Ravenswood Ave & Lawrence Ave,242,Damen Ave & Leland Ave,Subscriber,Female,1992.0
3,9080475,3/31/2016 22:22,3/31/2016 22:41,3760,1181,318,Southport Ave & Irving Park Rd,458,Broadway & Thorndale Ave,Subscriber,Female,1979.0
4,9080443,3/31/2016 22:08,3/31/2016 22:19,1270,656,345,Lake Park Ave & 56th St,426,Ellis Ave & 60th St,Subscriber,Female,1997.0


In [43]:
washington_df.head()

Unnamed: 0,Duration (ms),Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member Type
0,427387,3/31/2016 22:57,3/31/2016 23:04,31602,Park Rd & Holmead Pl NW,31207,Georgia Ave and Fairmont St NW,W20842,Registered
1,587551,3/31/2016 22:46,3/31/2016 22:56,31105,14th & Harvard St NW,31266,11th & M St NW,W21385,Registered
2,397979,3/31/2016 22:46,3/31/2016 22:53,31634,3rd & Tingey St SE,31108,4th & M St SW,W00773,Registered
3,444282,3/31/2016 22:42,3/31/2016 22:50,31200,Massachusetts Ave & Dupont Circle NW,31201,15th & P St NW,W21397,Registered
4,780875,3/31/2016 22:21,3/31/2016 22:34,31203,14th & Rhode Island Ave NW,31604,3rd & H St NW,W00213,Registered


### Checking the data types of every column.
* Use `dtypes` to check the data types of the DataFrame.

In [44]:
nyc_df.dtypes

tripduration                 int64
starttime                   object
stoptime                    object
start station id             int64
start station name          object
start station latitude     float64
start station longitude    float64
end station id               int64
end station name            object
end station latitude       float64
end station longitude      float64
bikeid                       int64
usertype                    object
birth year                 float64
gender                       int64
dtype: object

## Parse the data

### Trip Duration

* Convert the trip duration into minutes. Note: the trip duration of different city has different units.
* Use `apply()` to convert the duration into seconds.

In [45]:
def duration_in_minutes(data_point, city):
    """
    This function takes a data and the city name as input.
    Then, it converts the data into minutes.
    """
    # Convert the data_point into float
    # Convert seconds into minutes
    if city == 'nyc' or city == "chicago":
        return (float(data_point))/60
    # If city is washington
    else:
        return (float(data_point))/60000

In [46]:
# Use apply() to convert the time into seconds.
# Whole column goes through the function duration_in_minutes().
# Returned value type will be float.

# NYC
nyc_df['tripduration'] = nyc_df['tripduration'].apply(duration_in_minutes, args=('nyc',))

# Chicago
chicago_df['tripduration'] = chicago_df['tripduration'].apply(duration_in_minutes, args=('chicago',))

# Washington
washington_df["Duration (ms)"] = washington_df["Duration (ms)"].apply(duration_in_minutes, args=('washington',))

* You can check the info of the data frame by `df.info()`

This will return many data statistics like no of **rows**, **couluns** and many other stats.