In [11]:
import os

from google.cloud import bigquery

%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

# os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="PATH_to_your_big_query_key.json"

# or with setting an environment variable:
# export GOOGLE_APPLICATION_CREDENTIALS="PATH_to_your_big_query_key.json"

In [10]:
class Chicago_Taxi_Trips:
    """
    Class to pull data from the Chicago Taxi Trips dataset
    
    Short information about this dataset:
    
    Chicago taxi trips from 2013 to present
    
    The dataset has 70.4 GB
    
    """
    
    def __init__(self):
        """

        Init function contructing the client and authenticating
        with credentials set in the environment
        and performing the SQL query pulling all  from the dataset

        """

        self.client = bigquery.Client()

        self.query = """
        SELECT
              *
          FROM
            `bigquery-public-data.chicago_taxi_trips.taxi_trips`
         """
        self.query_job = self.client.query(self.query)
        self.results = self.query_job.result()
    
        
    def to_dataframe(self):
        """
        Outputs the results as a Pandas dataframe
        """

        return self.query_job.to_dataframe()
    
    
    def highest_average_tips(self):
        
        self.highest_tips_query = """
        SELECT
            dropoff_community_area,
            FORMAT('%3.2f',
                AVG(tips)) AS average_tip,
            FORMAT('%3.2f',
                MAX(tips)) AS max_tip
            FROM
              `bigquery-public-data.chicago_taxi_trips.taxi_trips`
            WHERE
              dropoff_community_area IS NOT NULL
            GROUP BY
              dropoff_community_area
            ORDER BY
              average_tip DESC
            LIMIT
                10
        """
        
        self.query_job = self.client.query(self.highest_tips_query)
        self.results = self.query_job.result()
    
    
        
    def busiest_time_of_day(self):
        
        self.busiest_time_query = """
        SELECT
            date_part('hour', trip_start_timestamp) AS trip_hour,
            count(*)
        FROM nyc_yellow_taxi_trips_2016_06_01
        GROUP BY trip_hour
        ORDER BY trip_hour;
        """
        
        self.query_job = self.client.query(self.busiest_time_query)
        self.results = self.query_job.result()
        
    
    def plot_the_data(self, data, x, y):
        
        """
        Function to plot the data
        
        Parameters
         ----------
         data: data pulled from the googleApi
         x: parameter plotted on the X axis
         y: parameter plotted on the Y axis
         
         Returns
         ----------
         None
        """
        mpl.style.use('ggplot')
        
        df = self.to_dataframe(data)
        df.plot(x=x, y=y, kind = 'line')
        
        plt.show()
        
        
        
        