In [None]:
# @title Setup
from google.colab import auth
from google.cloud import bigquery
from google.colab import data_table

project = 'paulkamau' # Project ID inserted based on the query results selected to explore
location = 'US' # Location inserted based on the query results selected to explore
client = bigquery.Client(project=project, location=location)
data_table.enable_dataframe_formatter()
auth.authenticate_user()

## Reference SQL syntax from the original job
Use the ```jobs.query```
[method](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query) to
return the SQL syntax from the job. This can be copied from the output cell
below to edit the query now or in the future. Alternatively, you can use
[this link](https://console.cloud.google.com/bigquery?j=paulkamau:US:bquxjob_7a279b63_1847ee09f25)
back to BigQuery to edit the query within the BigQuery user interface.

In [None]:
# Running this code will display the query used to generate your previous job

job = client.get_job('bquxjob_7a279b63_1847ee09f25') # Job ID inserted based on the query results selected to explore
print(job.query)

SELECT
  station_number,
  COUNT(*) AS num_days,
  AVG(mean_temp) AS avg_mean_temp,
  MIN(mean_temp) AS min_mean_temp,
  MAX(mean_temp) AS max_mean_temp
FROM
  `bigquery-public-data.samples.gsod`
WHERE
  year >= 2000
GROUP BY
  station_number
HAVING
  num_days >= 400
ORDER BY
  avg_mean_temp DESC
;


# Result set loaded from BigQuery job as a DataFrame
Query results are referenced from the Job ID ran from BigQuery and the query
does not need to be re-run to explore results. The ```to_dataframe```
[method](https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.job.QueryJob.html#google.cloud.bigquery.job.QueryJob.to_dataframe)
downloads the results to a Pandas DataFrame by using the BigQuery Storage API.

To edit query syntax, you can do so from the BigQuery SQL editor or in the
```Optional:``` sections below.

In [None]:
# Running this code will read results from your previous job

job = client.get_job('bquxjob_7a279b63_1847ee09f25') # Job ID inserted based on the query results selected to explore
results = job.to_dataframe()
results

Unnamed: 0,station_number,num_days,avg_mean_temp,min_mean_temp,max_mean_temp
0,691134,401,94.086035,43.900002,110.000000
1,631600,665,91.140902,72.500000,109.300003
2,632100,538,90.307621,76.300003,106.699997
3,647560,1143,89.712774,58.500000,107.000000
4,612350,1815,88.514986,67.000000,108.199997
...,...,...,...,...,...
11183,890090,2631,-56.399544,-103.000000,7.700000
11184,897440,2134,-56.985754,-101.599998,-6.800000
11185,896250,739,-57.832612,-103.900002,-11.400000
11186,895770,1869,-63.360353,-108.300003,-7.600000


## Show descriptive statistics using describe()
Use the ```pandas DataFrame.describe()```
[method](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.describe.html)
to generate descriptive statistics. Descriptive statistics include those that
summarize the central tendency, dispersion and shape of a dataset’s
distribution, excluding ```NaN``` values. You may also use other Python methods
to interact with your data.

In [None]:
results.describe()

Unnamed: 0,station_number,num_days,avg_mean_temp,min_mean_temp,max_mean_temp
count,11188.0,11188.0,11188.0,11188.0,11188.0
mean,542301.506972,2776.825438,54.257652,14.655631,84.676198
std,310093.36697,1130.172789,17.542371,32.831238,11.416843
min,10010.0,400.0,-66.106333,-113.0,-11.4
25%,261432.5,1856.0,43.609797,-9.0,78.900002
50%,677800.0,3354.0,53.130309,16.299999,86.300003
75%,726119.5,3741.0,66.056666,38.200001,91.300003
max,999999.0,3759.0,94.086035,77.699997,110.0
