# Data Analysis with Pandas
## Cycle Share Data
### Logan Jones | Jan 19, 2021

In [1]:
import pandas as pd

In [2]:
df_station = pd.read_csv('station.csv')
df_weather = pd.read_csv('weather.csv', parse_dates = ["Date"])
df_trip = pd.read_csv('trip.csv', error_bad_lines=False)

b'Skipping line 50794: expected 12 fields, saw 20\n'


### 1. What is the average trip duration for a borrowed bicycle?


In [3]:
average_trip = df_trip["tripduration"].mean()
average_trip

1178.2956753399776

### 2. What’s the most common age of a bicycle-sharer?

In [4]:
most_common_age = 2021 - df_trip["birthyear"].mode().item()
most_common_age

34.0

### 3. Given all the weather data here, find the average precipitation per month, and the median precipitation.

In [5]:
df = df_weather[["Date", "Precipitation_In"]]
df.set_index("Date", inplace=True)
avg_precip_month = df.groupby(df.index.month).mean().mean().item()
median_precip_month = df.groupby(df.index.month).median().median().item()

print(avg_precip_month)
print(median_precip_month)

0.1039669873608753
0.01


### 4. What’s the average number of bikes at a given bike station?

In [6]:
avg_bikes = df_station["current_dockcount"].mean()
avg_bikes 

16.517241379310345

### 5. When a bike station is modified, is it more likely that it’ll lose bikes or gain bikes? How do you know?

In [7]:
modified_station_bike_avg = df_station[df_station["modification_date"].isnull() != True].install_dockcount.mean()
unmodified_station_bike_avg = df_station[df_station["modification_date"].isnull() == True].install_dockcount.mean()
modification_gain = modified_station_bike_avg > unmodified_station_bike_avg
modification_gain

False

### 6. Come up with 3 more questions that can be answered with this data set.
#### 6a. What is the average precipitation per day?

In [8]:
df_weather.Precipitation_In.mean()

0.10506531204644412

#### 6b. How many bike stations have been modified in total?

In [9]:
df_station[df_station["modification_date"].isnull() != True].station_id.count()

17

#### 6c. Which is the most often used bike?

In [10]:
df_trip.bikeid.value_counts().index[0]

'SEA00281'

## Tests

In [11]:
def test():

    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"

    assert_equal(average_trip, 1178.2956753399776)
    assert_equal(most_common_age, 34)
    assert_equal(avg_precip_month, 0.1039669873608753)
    assert_equal(median_precip_month, 0.01)
    assert_equal(avg_bikes, 16.517241379310345)
    assert_equal(modification_gain, False)



    print("Success!!!")

test()

Success!!!
