In [1]:
# 5 Bayesian Linear Regression

In [2]:
# Task 10

In [25]:
import pymc as pm
import arviz as az
import pytensor.tensor as at

In [4]:
# Loads data
dat_file = open('seoul+bike+sharing+demand/SeoulBikeData.csv', 'r')
file_lines = dat_file.read().split("\n")
dat_file.close
file_lines = file_lines[:-1] # Last line of file is empty with split method so removed
features = file_lines[0].split(",")
label_name = "Rented Bike Count"
features = ["Date", "Hour", "Temperature", "Humidity", "Wind speed", "Visibility", "Dew point temperature",
           "Solar Radiation", "Rainfall", "Snowfall", "Seasons", "Holiday", "Functioning Day"]
file_lines = file_lines[1:] # Removes feature info

In [5]:
from datetime import datetime
import numpy as np
# Loads data into nd array
no_features = len(features)
no_data_items = len(file_lines)
X = np.ones((no_data_items, no_features))
# Loads date into array. Date is encoded as day number of year (1 = 1st January, 365 = 31st December).
# Note this ignores the year variable from dataset, however given that the dataset spans exatcly one year,
# no meaningful inference can be drawn from it.
for idx, record in enumerate(file_lines):
    date_str = record.split(",")[0]
    date_object = datetime.strptime(date_str, "%d/%m/%Y")
    day_number = date_object.timetuple().tm_yday
    X[idx, 0] = day_number

In [6]:
# Loads y labels (i.e. bikes rented)
Y = np.zeros(no_data_items)
for idx, record in enumerate(file_lines):
    Y[idx] = record.split(",")[1]

In [7]:
# Loads numerical objects into x that do not need any conversion
# Loads hour, temperature, humidity, wind speed, visibility, dew point, solar radiation, rainfall and snowfall
for idx, record in enumerate(file_lines):
    split_line = record.split(",")
    for feature_idx in range(2, 10 + 1):
        X[idx, feature_idx - 1] = split_line[feature_idx]

In [8]:
# Loads season data in. 1 = Spring, 2 = Summer, 3 = Autumn, 4 = Winter
for idx, record in enumerate(file_lines):
    season_str = record.split(",")[11]
    if season_str == "Spring": X[idx, 10] = 1
    elif season_str == "Summer": X[idx, 10] = 2
    elif season_str == "Autumn": X[idx, 10] = 3
    elif season_str == "Winter": X[idx, 10] = 4     

In [9]:
# Encodes holiday. 0 = No holiday, 1 = Holiday
for idx, record in enumerate(file_lines):
    hol_str = record.split(",")[12]
    if hol_str == "No Holiday": X[idx, 11] = 0
    elif hol_str == "Holiday": X[idx, 11] = 1

In [10]:
# Encodes function day. 0 = not functioning day, 1 = Functioning day
for idx, record in enumerate(file_lines):
    func_str = record.split(",")[13]
    if func_str == "Yes": X[idx, 12] = 1
    elif func_str == "No" : X[idx, 12] = 0

In [11]:
# Removes all non functioning days. Prediction for this is as simple as
# no bikes are sold on non-functioning days. Including this would needelessly skew the model
X_cpy = []
for idx in range(len(X)):
    if X[idx,12] == 1:
        X_cpy.append(X[idx])
# Removes functioning day column as it's now always true
X = np.delete(X, 12, 1)
features.remove("Functioning Day")

In [12]:
# Removes season data as weather readings encode most of this information well
X = np.delete(X, 10, 1)
features.remove("Seasons")

In [13]:
# Removes date from model as other information encodes this well enough
X = np.delete(X, 0, 1)
features.remove("Date")

In [26]:
# Task 11 - Makes model and chooses priors
with pm.Model(coords={"predictors": features}) as model:
    hour_coff = pm.Normal("Hour", mu=0, sigma=20)
    temp_coff = pm.Normal("Temperature", mu=10, sigma=2)
    humid_coff = pm.Normal("Humidity", mu=-4, sigma=20)
    wind_coff = pm.Normal("Wind speed", mu=-10, sigma=2)
    vis_coff = pm.Normal("Visibility", mu=5, sigma=10)
    dew_coff = pm.Normal("Dew point temperature", mu=-4, sigma=20)
    sol_coff = pm.Normal("Solar radiation", mu=10, sigma=10)
    rain_coff = pm.Normal("Rainfall", mu=-20, sigma=2)
    snow_coff = pm.Normal("Snowfall", mu=-50, sigma=1)
    hol_coff = pm.Normal("Holiday", mu=-1, sigma=3)
    sig = pm.HalfNormal("Sigma", sigma=2)
    intercept = pm.Normal("Intercept", mu=0, sigma=20)
    coff_arr = np.array([hour_coff, temp_coff, humid_coff, wind_coff, vis_coff, dew_coff, sol_coff,
                rain_coff, snow_coff, hol_coff])
    
    bikes_pred_mu =  (coff_arr * X) + intercept
    bikes = pm.Normal('bikes', mu=bikes_pred_mu, sigma=sig, observed=Y)

TypeError: Unsupported dtype for TensorType: object