In [None]:
import numpy as np
import pandas as pd
import zipfile
import requests

## Brief
Use data from the last 12 months of Cyclistic bike-share activity to understand how casual riders and annual members use Cyclistic bikes differently. Use these insights to create data visualizations to present to the marketing team and executives to inform marketing strategy to convert casual users into annual members. I am also interested in exploring weather data with this set to see how rides may correlate with weather conditions.

## Data Sources
Cyclistic is a fictional company created for the purposes of this case study. The datasets are real bike-share data made publicly available by [Motivate International Inc.](https://divvy-tripdata.s3.amazonaws.com/index.html) under this [license](https://divvy-tripdata.s3.amazonaws.com/index.html).

Weather data was aggregated and downloaded from 

## Process
### Download and unzip files
Rather than downloading, unzipping, and moving each individual file, I'll use the Python `requests` and `zipfile` modules to download and unzip the files within the project directory. 

In [None]:
# list of files to download ---------
files = ["202304", "202303", "202302", "202301", "202212", "202211", "202210", "202209", "202208", "202207", "202206", "202205"]

# # ** Uncomment below to download files **
# # download files using requests module --------
# for file in files:
#     url = "https://divvy-tripdata.s3.amazonaws.com/" + file + "-divvy-tripdata.zip"
#     r = requests.get(url)
#     fn = file + ".zip"
#     with open(fn, 'wb') as f:
#         f.write(r.content)
#     with zipfile.ZipFile(fn, 'r') as fl:    # << unzips files with zipfile module
#         fl.extractall()

In [6]:
# # ** Uncomment below to clean up folder **
# # ! <- tells python to run this on the command line
# !mkdir data # << creates new subdirectory called 'data'
# !mv *.csv data/ # << moves all .csv files into the new data directory
# !rm *.zip # << deletes all of the .zip files

In [7]:
# create dictionary and read in dataframes
df = {}
for file in files:
    df[file] = pd.read_csv("data/" + file + "-divvy-tripdata.csv")

In [10]:
for file in files:
    print(df[file].shape)

(426590, 13)
(258678, 13)
(190445, 13)
(190301, 13)
(181806, 13)
(337735, 13)
(558685, 13)
(701339, 13)
(785932, 13)
(823488, 13)
(769204, 13)
(634858, 13)


In [9]:
for file in files:
    print(df[file].info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 426590 entries, 0 to 426589
Data columns (total 13 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   ride_id             426590 non-null  object 
 1   rideable_type       426590 non-null  object 
 2   started_at          426590 non-null  object 
 3   ended_at            426590 non-null  object 
 4   start_station_name  362776 non-null  object 
 5   start_station_id    362776 non-null  object 
 6   end_station_name    357960 non-null  object 
 7   end_station_id      357960 non-null  object 
 8   start_lat           426590 non-null  float64
 9   start_lng           426590 non-null  float64
 10  end_lat             426155 non-null  float64
 11  end_lng             426155 non-null  float64
 12  member_casual       426590 non-null  object 
dtypes: float64(4), object(9)
memory usage: 42.3+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258678 entries, 0 to 258677
Dat