# About
This notebook looks at the correlation between number of rides and the temperature of a given day.

# 0. Prerequisits

In [None]:
# Only applicable for Linux distros
!sudo apt install libm-devel, openssl-devel

In [None]:
!pip install snowflake-connector-python
!pip install "snowflake-connector-python[pandas]"

# 1. Connect to Snowflake

In [None]:
import decimal

import snowflake.connector
import matplotlib.pyplot as plt
import numpy as np

In [None]:
ctx = snowflake.connector.connect(
    user='kristianaars',
    account='avsdods-gh85649',
    password=input('Please enter password:')
)

## 1.1 Check version

In [None]:
with ctx.cursor() as cs:
    cs.execute('SELECT current_version()')
    print(cs.fetchone()[0])

# 2 Pulling data

## 2.1 Selecting Database

In [None]:
with ctx.cursor() as cs:
    cs.execute('USE DATABASE my_sf_db')
    print(cs.fetchone()[0])

## 2.2 Fetch Data
Notice that we are collecting the data as a Pandas Dataframe by using `cs.fetch_pandas_all()`

In [None]:
with ctx.cursor() as cs:
    cs.execute('SELECT * FROM users')
    data = cs.fetch_pandas_all()

In [None]:
data

# 3 Processing Data
Lets look at how the temperature correlates with the number of bike rides. First we must collect the data

In [None]:
sql = """
SELECT
    TO_DATE(citibike_data.started_at) as ride_date,
    COUNT(ride_date) as n_trips,
    AVG(weather_data.avg_temperature_feelslike_2m_f) as temperature
FROM CITIBIKE.PUBLIC.RIDES citibike_data
INNER JOIN WEATHER_DATA.STANDARD_TILE.HISTORY_DAY weather_data
ON weather_data.date_valid_std = ride_date
WHERE weather_data.postal_code LIKE '10257'
GROUP BY ride_date
"""

with ctx.cursor() as cs:
    cs.execute(sql)
    data = cs.fetch_pandas_all()

In [None]:
# We do not need the date for calculating correlation, so this is removed
data = data.drop(['RIDE_DATE'], axis=1)
data = data.astype(float)
data

Lets see what the data correlation between number of rides and the temperature is

In [None]:
data.corr()

## 4 Plot the data in correlation graph

In [None]:
x = data['TEMPERATURE']
y = (data['N_TRIPS'])

plt.scatter(x=data['TEMPERATURE'], y=data['N_TRIPS'])

plt.plot(np.unique(x),
         np.poly1d(np.polyfit(x, y, 1))
         (np.unique(x)), color='red')

plt.show()