# Notebook trend from Chap 9 Train, Validate, Test

In [None]:
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from typing import Sequence

bookcolors = {
    'crimson': '#a50026', 'red': '#d73027',
    'redorange': '#f46d43', 'orange': '#fdae61',
    'yellow': '#fee090', 'sky': '#e0f3f8',
    'babyblue': '#abd9e9', 'lightblue': '#74add1',
    'blue': '#4575b4', 'purple': '#313695'}

def clean_dates(df):
    df.loc[df['YearMade']<1950, 'YearMade'] = np.nan
    df.loc[df.eval("saledate.dt.year < YearMade"), 'YearMade'] =         df['saledate'].dt.year    

def df_split_dates(df,colname):
    df["saleyear"] = df[colname].dt.year
    df["salemonth"] = df[colname].dt.month
    df["saleday"] = df[colname].dt.day
    df["saledayofweek"] = df[colname].dt.dayofweek
    df["saledayofyear"] = df[colname].dt.dayofyear
    df[colname] = df[colname].astype(np.int64) # convert to seconds since 1970
    # age can be nan since YearMade can be nan
    df['age'] = df['saleyear'] - df['YearMade'] # synthesize age

df = pd.read_feather("data/bulldozer-train-all.feather")
df_split_dates(df, 'saledate')

P = df.groupby('saleyear').mean()
P = P.reset_index().sort_values('saleyear')[['saleyear','SalePrice']]

lm = LinearRegression()
lm.fit(P[['saleyear']], P['SalePrice'])
print(lm.coef_, lm.intercept_)
trend = lm.predict(P[['saleyear']])

fig,ax = plt.subplots(figsize=(7,3))
ax.plot(P['saleyear'], P['SalePrice'], "o-",
        markersize=3, linewidth=1, label='Price',
        c=bookcolors['blue'])
ax.plot(P['saleyear'], trend, linewidth=1.2, label='Trend',
        c=bookcolors['orange'])
#ax.plot(P['saleyear'], P['SalePrice']-trend, ":", label="detrended")
ax.plot(P['saleyear'], np.mean(P['SalePrice'])+(P['SalePrice']-trend),
        ":", label="Detrended", linewidth=1.5, c=bookcolors['crimson'])
ax.set_xlabel("Sale year")
ax.set_ylabel("Sale price (dollars)")
ax.set_ylim(25_000, 35_000)
plt.legend(loc="lower right", fontsize="small", labelspacing=0)