In [20]:
#Data
import pandas as pd
data = pd.read_csv('seattleWeather_1948-2017.csv', parse_dates=['date'])
filtered_data=data[[d.month == 1 for d in data.date]].copy()


data.tail()
filtered_data.tail()

Unnamed: 0,date,amount_of_precipitation,max_temperature,min_temperature,rain
25229,2017-01-27,0.0,54,37,False
25230,2017-01-28,0.0,52,37,False
25231,2017-01-29,0.03,48,37,True
25232,2017-01-30,0.02,45,40,True
25233,2017-01-31,0.0,44,34,False


In [3]:
len(filtered_data)


2170

In [4]:
import graphing

# # Test 
# speed=[1,2,3,5,6,8,9]
# distance=[1,3,8,15,23]

graphing.scatter_2D(filtered_data,label_x='date',label_y='min_temperature',title='January Temps in F')


In [5]:
import numpy as np
# This block of code scales and offsets the data slightly, which helps the training process
# The Purpose of NOrmalization is to transform data in a way that they are either dimensionless and or have similar distribution.
# This process is called **Standardization** or Feature Scaling
# Normalization is an essential step in data pre-processing in any machine learning application and model fitting.
# it dramatically improves model accuracy
# https://towardsdatascience.com/data-normalization-in-machine-learning-395fdec69d02 

standardized_temps=filtered_data["min_temperature"]- np.mean(filtered_data["min_temperature"])/np.std(filtered_data["min_temperature"])
standardized_temps

# print("Mean Temperatures: ",np.mean(filtered_data["min_temperature"]))
# print("Standard Deviation: ",np.std(filtered_data["min_temperature"]))


0        37.214866
1        31.214866
2        30.214866
3        29.214866
4        27.214866
           ...    
25229    32.214866
25230    32.214866
25231    32.214866
25232    35.214866
25233    29.214866
Name: min_temperature, Length: 2170, dtype: float64

In [21]:
for each_date in filtered_data.date:

    # print(each_date)
    # print("Year: ",each_date.year)
    # print(each_date.timetuple())
    print((each_date.year + each_date.timetuple().tm_yday / 365.25))

1948.002737850787
1948.0054757015744
1948.0082135523614
1948.0109514031485
1948.0136892539356
1948.0164271047229
1948.01916495551
1948.021902806297
1948.024640657084
1948.0273785078714
1948.0301163586585
1948.0328542094455
1948.0355920602328
1948.03832991102
1948.041067761807
1948.043805612594
1948.0465434633813
1948.0492813141684
1948.0520191649555
1948.0547570157426
1948.0574948665299
1948.060232717317
1948.062970568104
1948.065708418891
1948.0684462696784
1948.0711841204654
1948.0739219712525
1948.0766598220398
1948.0793976728269
1948.082135523614
1948.084873374401
1949.002737850787
1949.0054757015744
1949.0082135523614
1949.0109514031485
1949.0136892539356
1949.0164271047229
1949.01916495551
1949.021902806297
1949.024640657084
1949.0273785078714
1949.0301163586585
1949.0328542094455
1949.0355920602328
1949.03832991102
1949.041067761807
1949.043805612594
1949.0465434633813
1949.0492813141684
1949.0520191649555
1949.0547570157426
1949.0574948665299
1949.060232717317
1949.062970568104

In [7]:
import numpy as np




# Offset date into number of years since 1982
filtered_data["years_since_1982"] = [(d.year + d.timetuple().tm_yday / 365.25) - 1982 for d in filtered_data.date]
# https://www.geeksforgeeks.org/timetuple-function-of-datetime-date-class-in-python/

# Scale and offset temperature so that it has a smaller range of values
# In this case, the data normalization technique used is Z-Score normalization or Standardization
# It is calculated by dividing the difference of the (datapoint and(--) mean) / with the standard deviation
filtered_data["normalised_temperature"] = (filtered_data["min_temperature"] - np.mean(filtered_data["min_temperature"])) / np.std(filtered_data["min_temperature"])

# Graph
graphing.scatter_2D(filtered_data, label_x="years_since_1982", label_y="normalised_temperature", title="January Temperatures (Normalised)")


In [8]:
from model import MyModel

tempModel=MyModel()

In [9]:
print(f"Model parameters before training: {tempModel.intercept}, {tempModel.slope}")

# Look at how well the model does before training
print("Model visualised before training:")
graphing.scatter_2D(filtered_data, "years_since_1982", "normalised_temperature", trendline=tempModel.predict)

Model parameters before training: 0, 0
Model visualised before training:


In [10]:
# Cost Function
from obj_fx import cost_function

In [11]:
# The Optimizer
from m0b_optimizer import MyOptimizer
optimizer = MyOptimizer()

In [12]:
# Training Loop
from trainingLoop import train_one_iteration
from model import MyModel

tempModel=MyModel()

Training method ready


In [19]:
import math

print(f"Model parameters before training:\t\t{tempModel.intercept:.8f},\t{tempModel.slope:.8f}")

continue_loop, cost = train_one_iteration(tempModel,optimizer,model_inputs = filtered_data["years_since_1982"],
                                                    true_temperatures = filtered_data["normalised_temperature"],
                                                    last_cost = math.inf)

print(f"Model parameters after 1 iteration of training:\t{tempModel.intercept:.8f},\t{tempModel.slope:.8f}")
# graphing.scatter_2D(filtered_data, "years_since_1982", "normalised_temperature", trendline=tempModel.predict)  


Model parameters before training:		-0.00648853,	0.01193327
The Variance: 0       -1.335168
1       -0.519690
2       -0.383750
3       -0.247810
4        0.024038
           ...   
25229    0.168615
25230    0.168647
25231    0.168680
25232   -0.239010
25233    0.576468
Length: 2170, dtype: float64
Model parameters after 1 iteration of training:	-0.00648853,	0.01193327


In [14]:
filtered_data['normalised_temperature'].tail()

25229    0.243444
25230    0.243444
25231    0.243444
25232    0.651166
25233   -0.164279
Name: normalised_temperature, dtype: float64

In [18]:

# Start the loop
print("Training beginning...")
last_cost = math.inf
i = 0
continue_loop = True
while continue_loop:

    # Run one iteration of training
    # This will tell us whether to stop training, and also what
    # the cost was for this iteration
    continue_loop, last_cost = train_one_iteration(tempModel,optimizer,model_inputs = filtered_data["years_since_1982"],
                                                    true_temperatures = filtered_data["normalised_temperature"],
                                                    last_cost = last_cost)
   
    # Print the status
    if i % 400 == 0:
        print("Iteration:", i)

    i += 1

    
print("Training complete!")
print(f"Model parameters after training:\t{tempModel.intercept:.8f},\t{tempModel.slope:.8f}")
graphing.scatter_2D(filtered_data, "years_since_1982", "normalised_temperature", trendline=tempModel.predict)    

Training beginning...
The Variance: 0       -1.335168
1       -0.519690
2       -0.383750
3       -0.247810
4        0.024038
           ...   
25229    0.168615
25230    0.168647
25231    0.168680
25232   -0.239010
25233    0.576468
Length: 2170, dtype: float64
Iteration: 0
The Variance: 0       -1.335168
1       -0.519690
2       -0.383750
3       -0.247810
4        0.024038
           ...   
25229    0.168615
25230    0.168647
25231    0.168680
25232   -0.239010
25233    0.576468
Length: 2170, dtype: float64
The Variance: 0       -1.335168
1       -0.519690
2       -0.383750
3       -0.247810
4        0.024038
           ...   
25229    0.168615
25230    0.168647
25231    0.168680
25232   -0.239010
25233    0.576468
Length: 2170, dtype: float64
The Variance: 0       -1.335168
1       -0.519690
2       -0.383750
3       -0.247810
4        0.024038
           ...   
25229    0.168615
25230    0.168647
25231    0.168680
25232   -0.239010
25233    0.576468
Length: 2170, dtype: float64
T

In [None]:
filtered_data

Unnamed: 0,date,amount_of_precipitation,max_temperature,min_temperature,rain,years_since_1982,normalised_temperature
0,1948-01-01,0.47,51,42,True,-33.997262,0.922981
1,1948-01-02,0.59,45,36,True,-33.994524,0.107536
2,1948-01-03,0.42,45,35,True,-33.991786,-0.028371
3,1948-01-04,0.31,45,34,True,-33.989049,-0.164279
4,1948-01-05,0.17,45,32,True,-33.986311,-0.436094
...,...,...,...,...,...,...,...
25229,2017-01-27,0.00,54,37,False,35.073922,0.243444
25230,2017-01-28,0.00,52,37,False,35.076660,0.243444
25231,2017-01-29,0.03,48,37,True,35.079398,0.243444
25232,2017-01-30,0.02,45,40,True,35.082136,0.651166
