## **TODO:** Set the value of `URL` to the URL from your learning materials

In [None]:
URL = None
import os
assert URL and (type(URL) is str), "Be sure to initialize URL using the value from your learning materials"
os.environ['URL'] = URL

In [None]:
%%bash
pip install pytorch-lightning
wget -q $URL -O ./data.zip
mkdir -p data
find *.zip | xargs unzip -o -d data/

## Demo: PyTorch Lightining

In [None]:
import pandas as pd
import torch as pt

from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

pt.set_default_dtype(pt.float64)

In [None]:
from pathlib import Path

df = pd.concat(
    pd.read_csv(file) for file in Path('data/').glob('part-*.csv')
)

In [None]:
working_df = df.drop('origindatetime_tr', axis = 1)
working_df.shape

In [None]:
test_df = working_df.sample(frac = 0.10, random_state = 42)
test_df.shape

In [None]:
train_df = working_df.drop(index = test_df.index)
train_df.shape

In [None]:
FEATURES = ['origin_block_latitude','origin_block_longitude','destination_block_latitude','destination_block_longitude']
TARGET = ['fareamount']

BATCH_SIZE = 2 ** 18
PIN_MEMORY = True

X_train = pt.tensor(train_df[FEATURES].values)
X_train = X_train.pin_memory() if PIN_MEMORY else X_train

y_train = pt.tensor(train_df[TARGET].values)
y_train = y_train.pin_memory() if PIN_MEMORY else y_train

train_ds = TensorDataset(y_train, X_train)

train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, pin_memory = PIN_MEMORY, num_workers = os.cpu_count())

len(train_ds), BATCH_SIZE

In [None]:
import pytorch_lightning as pl
from pytorch_lightning import Trainer

class TaxiFareLinearRegressor(pl.LightningModule):
  def __init__(self, train_dl):    

    super(TaxiFareLinearRegressor, self).__init__()
    
    self.train_dl = train_dl

    self.model = pt.nn.Linear(4, 1, bias = False)

  def forward(self, X):
    return self.model(X)

  def training_step(self, batch, batch_idx):
      y, X = batch

      y_pred = self.forward(X)

      loss = pt.nn.functional.mse_loss(y_pred, y)

      rmse = pt.sqrt(loss)
      
      self.log('rmse', rmse, prog_bar=True, on_step=True, logger=True)

      return loss

  def configure_optimizers(self):
    return pt.optim.AdamW(self.parameters())

  def train_dataloader(self):
      return self.train_dl

In [None]:
model = TaxiFareLinearRegressor(train_dl)

In [None]:
from pytorch_lightning import loggers as pl_loggers
tb_logger = pl_loggers.TensorBoardLogger('lightning_logs/')
trainer = pl.Trainer(gpus = 1, max_epochs = 10, 
                     default_root_dir='./checkpoints',
                     logger=tb_logger)
trainer.fit(model) 

# most basic trainer, uses good defaults
# trainer = pl.Trainer(gpus=1, progress_bar_refresh_rate=10)    
# from pytorch_lightning.profiler import AdvancedProfiler
# profiler = AdvancedProfiler()

# trainer = pl.Trainer(gpus = 1, max_epochs = 1, profiler = True)
# trainer = pl.Trainer(gpus = 1, max_epochs = 10, gradient_clip_val=0.5)
# trainer = pl.Trainer(gpus = 1, max_epochs = 1, train_percent_check = 0.005, gradient_clip_val=0.5)

In [None]:
%reload_ext tensorboard
%tensorboard --logdir lightning_logs/

In [None]:
trainer.save_checkpoint('checkpoints')

In [None]:
with pt.no_grad():
  print(pt.nn.functional.mse_loss(model(X_train.cuda()), y_train.cuda()))

Copyright 2020 CounterFactual.AI LLC. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.