# Module 7 Project
Author: [Julia Fangman](https://github.com/juliafangman)
Repository: [datafun-07-ml-predictive](https://github.com/juliafangman/datafun-07-ml-predictive)

# Part 1 - Linear Regression

ipython --matplotlib

#### Given a Fahrenheit temperature, we can calculate the corresponding Celsius temperature using the following formula:

In [None]:
c = 5 / 9 * (f - 32)
c = lambda f: 5 / 9 * (f - 32)
temps = [(f, c(f)) for f in range(0, 101, 10)]

import pandas as pd
temps_df = pd.DataFrame(temps, columns=['Fahrenheit', 'Celsius'])
axes = temps_df.plot(x='Fahrenheit', y='Celsius', style='.-')
y_label = axes.set_ylabel('Celsius')

## Section 1 - Load

In [21]:
import pandas as pd
pd.read_csv('ave_hi_nyc_jan_1895-2018.csv')

Unnamed: 0,Date,Value,Anomaly
0,189501,34.2,-3.2
1,189601,34.7,-2.7
2,189701,35.5,-1.9
3,189801,39.6,2.2
4,189901,36.4,-1.0
...,...,...,...
119,201401,35.5,-1.9
120,201501,36.1,-1.3
121,201601,40.8,3.4
122,201701,42.8,5.4


## Section 2 - View

In [None]:
nyc.head()
nyc.tail()

## Section 3 - Clean


In [None]:
nyc.columns = ['Date', 'Temperature', 'Anomaly']
nyc.head(3)

nyc.Date.dtype

nyc.Date = nyc.Date.floordiv(100)
nyc.head(3)

## Section 4 - Describe

In [None]:
pd.set_option('precision', 2)
nyc.Temperature.describe()

## Section 5 - Calculate Line

In [None]:
from scipy import stats
linear_regression = stats.linregress(x=nyc.Date, y=nyc.Temperature)

linear_regression.slope
linear_regression.intercept 

## Section 6 - Predict

In [None]:
linear_regression.slope * 2019 + linear_regression.intercept
linear_regression.slope * 1890 + linear_regression.intercept 

## Section 7 - Plot

In [None]:
import seaborn as sns
sns.set_style('whitegrid')
axes = sns.regplot(x=nyc.Date, y=nyc.Temperature)

axes.set_ylim(10, 70) 

# Part 2 - Machine Learning

## Loading the Average High Temperatures into a DataFrame

In [None]:
import pandas as pd
nyc = pd.read_csv('ave_hi_nyc_jan_1895-2018.csv')
nyc.columns = ['Date', 'Temperature', 'Anomaly']
nyc.Date = nyc.Date.floordiv(100)
nyc.head(3)

## Splitting the Data for Training & Testing

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(nyc.Date.values.reshape(-1, 1), nyc.Temperature.values, random_state=11)

X_train.shape

X_test.shape

## Training the Model

In [None]:
from sklearn.linear_model import LinearRegression

linear_regression = LinearRegression()

linear_regression.fit(X=X_train, y=y_train)

linear_regression.coef_
linear_regression.intercept_

## Testing the Model

In [None]:
predicted = linear_regression.predict(X_test)
expected = y_test

for p, e in zip(predicted[::5], expected[::5]):
    print(f'predicted: {p:.2f}, expected: {e:.2f}')


## Predicting Future Temperatures & Estimating Past Temperatures

In [None]:
predict = (lambda x: linear_regression.coef_ * x + linear_regression.intercept_)

predict(2019)
predict(1890)

## Visualizing the Dataset with the Regression Line

In [None]:
import seaborn as sns
axes = sns.scatterplot(data=nyc, x='Date', y='Temperature', hue='Temperature', palette='winter', legend=False)

axes.set_ylim(10, 70) 

import numpy as np

x = np.array([min(nyc.Date.values), max(nyc.Date.values)])
y = predict(x) 

import matplotlib.pyplot as plt

line = plt.plot(x, y)