# Data Analysis of the Ford GoBike's trip data

In [1]:
import requests
import os, sys
import re
import pandas as pd
import numpy as np
import zipfile
import json
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [5]:
DOWNLOADS_DIR = 'downloads'
DATA_DIR = 'data'
IMAGES_DIR = 'images'

def ensure_dir(file_path=DOWNLOADS_DIR):
    """
    Ensure directory exists or create it.
    :param file_path: directory path
    :return: 
    """
    if not os.path.exists(file_path):
        os.makedirs(file_path)

        
def download(*urls):
    """
    Download files from the provided URL.
    :param urls: variable number of URL
    :return: None
    """
    ensure_dir()
    for url in urls:
        url_file = os.path.join(DOWNLOADS_DIR, url.split(os.path.sep)[-1]).replace('-', '_')
        if not os.path.exists(url_file):
            response = requests.get(url, allow_redirects=True)
            with open(url_file, 'wb') as handle:
                handle.write(response.content)
                sys.stdout.write('.')
    sys.stdout.write('\n')


def download_img(name, url):
    """
    Download image from the provided URL
    :param name: name of image file
    :param url: URL for image
    :return: None
    """
    ensure_dir(IMAGES_DIR)
    image_file = os.path.join(IMAGES_DIR, f"{name}.{url.split('.')[-1]}")
    if not os.path.exists(image_file):
        response = requests.get(url, allow_redirects=True)
        with open(image_file, 'wb') as handle:
            handle.write(response.content)


def extract_zip(file, path=DATA_DIR):
    """
    Extract alla files from a zip archive.
    :param file: file name of archive.
    :param path: target directory for unzipped files.
    :return: a list of file names in the archive.
    """
    with zipfile.ZipFile(os.path.join(DOWNLOADS_DIR, file), 'r') as zip_ref:
        zip_ref.extractall(path)
        return zip_ref.namelist()

def rename(file_from, file_to, directory=DOWNLOADS_DIR):
    """
    Rename file in the 
    :param file_from: Existing file to rename
    :param file_to: Target file name
    :param directory: Source directory, defaults to DOWNLOADS_DIR
    :return: None
    """
    source = os.path.join(directory, file_from)
    if os.path.exists(source):
        os.rename(source, os.path.join(directory, file_to))


def file_exists(filename, directory=DOWNLOADS_DIR):
    """
    Check if the file exists in a optional provided directory.
    :param filename: name of file
    :param directory: Source directory, defaults to DOWNLOADS_DIR
    :return: True if file exists.
    """
    return os.path.exists(os.path.join(directory, filename))

## Gather

In [19]:
download('https://s3.amazonaws.com/fordgobike-data/201801-fordgobike-tripdata.csv.zip', 
        'https://s3.amazonaws.com/fordgobike-data/201802-fordgobike-tripdata.csv.zip', 
        'https://s3.amazonaws.com/fordgobike-data/201803-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201804-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201805-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201806-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201807-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201808-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201809-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201810-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201811-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201812-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201901-fordgobike-tripdata.csv.zip',
        'https://s3.amazonaws.com/fordgobike-data/201902-fordgobike-tripdata.csv.zip')

..............


In [20]:
for file in os.listdir(DOWNLOADS_DIR):
    print(file)

201801_fordgobike_tripdata.csv.zip
201812_fordgobike_tripdata.csv.zip
201804_fordgobike_tripdata.csv.zip
201805_fordgobike_tripdata.csv.zip
201901_fordgobike_tripdata.csv.zip
201809_fordgobike_tripdata.csv.zip
201902_fordgobike_tripdata.csv.zip
201803_fordgobike_tripdata.csv.zip
201810_fordgobike_tripdata.csv.zip
201806_fordgobike_tripdata.csv.zip
201808_fordgobike_tripdata.csv.zip
201811_fordgobike_tripdata.csv.zip
201807_fordgobike_tripdata.csv.zip
201802_fordgobike_tripdata.csv.zip


In [22]:
zip_files = [file for file in os.listdir(DOWNLOADS_DIR) if file.endswith('.zip') and os.path.isfile(os.path.join(DOWNLOADS_DIR, file))]

In [23]:
for file in zip_files:
    extract_zip(file, DATA_DIR)
for file in os.listdir(DATA_DIR):
    print(file)

201802-fordgobike-tripdata.csv
201811-fordgobike-tripdata.csv
201809-fordgobike-tripdata.csv
201806-fordgobike-tripdata.csv
201805-fordgobike-tripdata.csv
201812-fordgobike-tripdata.csv
201801-fordgobike-tripdata.csv
201901-fordgobike-tripdata.csv
201807-fordgobike-tripdata.csv
201808-fordgobike-tripdata.csv
201810-fordgobike-tripdata.csv
201803-fordgobike-tripdata.csv
201804-fordgobike-tripdata.csv
201902-fordgobike-tripdata.csv


In [17]:
tripdata_files = [file for file in os.listdir(DATA_DIR) if file.endswith('.csv') and os.path.isfile(os.path.join(DATA_DIR, file))]

## Assess
ok

## Clean

## Exploratory Data Analysis

In [None]:
## Explainatory Data Analysis