#### Tracking 


#### Initializing your tracker (PyPads)
First you have to install pypads-padre which has pypads as a dependency

    pip install pypads-padre

In [1]:
from pypads.app.base import PyPads
tracker = PyPads(autostart=False)

Trying to configure padre plugin for pypads...


#### 3D MNIST example

In [None]:
tracker.start_track(experiment_name= "3D MNIST")
import h5py
import numpy as np


# Loading and tracking your dataset
path = "data/3d-mnist/full_dataset_vectors.h5"
@tracker.decorators.dataset(name="3DMNIST", target_columns=[-1])
def load_3d_mnist(path):
    """
    The aim of this dataset is to provide a simple way to get started with 3D computer vision problems such as 3D shape recognition.

    Accurate 3D point clouds can (easily and cheaply) be adquired nowdays from different sources:

     - RGB-D devices: Google Tango, Microsoft Kinect, etc.
     - Lidar.
     - 3D reconstruction from multiple images.

    However there is a lack of large 3D datasets (you can find a good one here based on triangular meshes); it's especially hard to find datasets based on point clouds (wich is the raw output from every 3D sensing device).

    This dataset contains 3D point clouds generated from the original images of the MNIST dataset to bring a familiar introduction to 3D to people used to work with 2D datasets (images).

    The full dataset is splitted into arrays:

    X_train (10000, 4096)
    y_train (10000)
    X_test(2000, 4096)
    y_test (2000)
    
    data is the concatenation of X_train, X_test, y_train, y_test (12000, 4097)
    
    :return data 
    """
    with h5py.File(path, "r") as hf:
        X_train, y_train = hf["X_train"][:], hf["y_train"][:]
        X_test, y_test = hf["X_test"][:], hf["y_test"][:]
        train_data = np.concatenate([X_train, y_train.reshape(len(y_train), 1)], axis=1)
        test_data = np.concatenate([X_test, y_test.reshape(len(y_test), 1)], axis=1)
        data = np.concatenate([train_data, test_data], axis=0)
    return data


data = load_3d_mnist(path)

# Tracking your train and test splits: in this simple case we follow the original data train : (10000) test: (2000)

@tracker.decorators.splitter()
def splitter(data, index=10000):
    import numpy as np
    idx = np.arange(data.shape[0])
    return idx[:index], idx[index:]

train, test = splitter(data)
# Do and log stuff
from sklearn.linear_model import LogisticRegression
from sklearn.metrics.classification import f1_score

X_train, y_train = data[train,:-1],data[train,-1]
X_test, y_test = data[test,:-1],data[test,-1]

model = LogisticRegression()

model.fit(X_train, y_train)

preds = model.predict(X_test)

f1 = f1_score(preds,y_test, average="macro")

print("F1_score: ", str(f1))

# end_run
tracker.api.end_run()

2020-11-12 11:39:17.437 | INFO     | pypads.app.base:activate_tracking:566 - Activating tracking by extending importlib...


  and should_run_async(code)
Exception in thread Thread-8:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 349, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py"

Exception in thread Thread-14:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 349, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re

2020-11-12 11:39:18.940 | INFO     | pypads.injections.setup.misc_setup:_call:74 - Tracking execution to run with id c35525d5359b460ba03ec75ef2826d61


Exception in thread Thread-16:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re

Exception in thread Thread-20:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re

Exception in thread Thread-25:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re

Exception in thread Thread-31:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re


Exception in thread Thread-35:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    r



Exception in thread Thread-39:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re

Exception in thread Thread-43:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re

Exception in thread Thread-47:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re



Exception in thread Thread-52:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re

2020-11-12 11:39:30.183 | INFO     | pypads.injections.loggers.debug:__pre__:27 - Pypads tracked class <class 'sklearn.linear_model.logistic.LogisticRegression'> initialized.


Exception in thread Thread-54:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re



Exception in thread Thread-56:
Traceback (most recent call last):
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/pypads_onto/injections/converter.py", line 291, in parse
    graph.parse(data=entry.json(by_alias=True), format="json-ld")
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib/graph.py", line 1078, in parse
    parser.parse(source, self, **args)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 95, in parse
    to_rdf(data, conj_sink, base, context_data)
  File "/home/mehdi/anaconda3/envs/pypads-padre-3.7/lib/python3.7/site-packages/rdflib_jsonld/parser.py", line 107, in to_rdf
    re

#### Pytorch example of 3d Mnist

In [None]:
# Activate tracking of pypads
tracker = PyPads(autostart="3D-MNIST-Torch", setup_fns=[])

import torch
from torch import nn
from torch.autograd import Variable
import h5py
import numpy as np

@tracker.decorators.dataset(name="3D-MNIST", target_columns=[-1])
def load_3d_mnist(path):
    """
    The aim of this dataset is to provide a simple way to get started with 3D computer vision problems such as 3D shape recognition.

    Accurate 3D point clouds can (easily and cheaply) be adquired nowdays from different sources:

     - RGB-D devices: Google Tango, Microsoft Kinect, etc.
     - Lidar.
     - 3D reconstruction from multiple images.

    However there is a lack of large 3D datasets (you can find a good one here based on triangular meshes); it's especially hard to find datasets based on point clouds (wich is the raw output from every 3D sensing device).

    This dataset contains 3D point clouds generated from the original images of the MNIST dataset to bring a familiar introduction to 3D to people used to work with 2D datasets (images).

    The full dataset is splitted into arrays:

    X_train (10000, 4096)
    y_train (10000)
    X_test(2000, 4096)
    y_test (2000)

    """

    with h5py.File(path, "r") as hf:
        X_train, y_train = hf["X_train"][:], hf["y_train"][:]
        X_test, y_test = hf["X_test"][:], hf["y_test"][:]
        train_data = np.concatenate([X_train, y_train.reshape(len(y_train), 1)], axis=1)
        test_data = np.concatenate([X_test, y_test.reshape(len(y_test), 1)], axis=1)
        data = np.concatenate([train_data, test_data], axis=0)
    return data

class CNNModel(nn.Module):
    def __init__(self, dim_output):
        super(CNNModel, self).__init__()

        self.conv_layer1 = self._conv_layer_set(3, 32)
        self.conv_layer2 = self._conv_layer_set(32, 64)
        self.fc1 = nn.Linear(2 ** 3 * 64, 128)
        self.fc2 = nn.Linear(128, dim_output)
        self.relu = nn.LeakyReLU()
        self.batch = nn.BatchNorm1d(128)
        self.drop = nn.Dropout(p=0.15)

    def _conv_layer_set(self, in_c, out_c):
        conv_layer = nn.Sequential(
            nn.Conv3d(in_c, out_c, kernel_size=(3, 3, 3), padding=0),
            nn.LeakyReLU(),
            nn.MaxPool3d((2, 2, 2)),
        )
        return conv_layer

    @tracker.decorators.track(event=['pypads_predict'])
    def forward(self, x):
        # Set 1
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.batch(out)
        out = self.drop(out)
        out = self.fc2(out)

        return out

# Sample shape
sample_shape = (16, 16, 16, 3)

# Load 3d Mnist data
path = "/home/mehdi/Desktop/research_assistant_236/PyPadre/pypads-examples/Notebooks-DataScience Lab/" \
       "data/3d-mnist/full_dataset_vectors.h5"
data = load_3d_mnist(path)
X_train, y_train = data[:100, :-1], data[:100, -1]
X_test, y_test = data[11900:, :-1], data[11900:, -1]

# Reshape data into 3D format (16,16,16)
X_train = data_transform(X_train, sample_shape)
X_test = data_transform(X_test, sample_shape)

train_x = torch.from_numpy(X_train).float()
train_y = torch.from_numpy(y_train).long()
test_x = torch.from_numpy(X_test).float()
test_y = torch.from_numpy(y_test).long()

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(train_x, train_y)
test = torch.utils.data.TensorDataset(test_x, test_y)

# Definition of hyperparameters
batch_size = 10
tracker.api.log_param('batch_size', batch_size, description="Batch size")
num_epochs = 10
tracker.api.log_param('num_epochs', num_epochs, description="Number of training epochs")

# Data Loader
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)

# Create CNN
model = CNNModel(10)
# model.cuda()
print(model)

# Cross Entropy Loss
error = nn.CrossEntropyLoss()

# SGD Optimizer
learning_rate = 0.001
tracker.api.log_param('learning_rate', learning_rate, description="Learning rate of the optimizer")
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# CNN model training
count = 0
loss_list = []
iteration_list = []
accuracy_list = []
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):

        train = Variable(images.view(100, 3, 16, 16, 16))
        labels = Variable(labels)
        # Clear gradients
        optimizer.zero_grad()
        # Forward propagation
        outputs = model(train)
        # Calculate softmax and ross entropy loss
        loss = error(outputs, labels)
        # Calculating gradients
        loss.backward()
        # Update parameters
        optimizer.step()

        count += 1
        if count % 50 == 0:
            # Calculate Accuracy
            correct = 0
            total = 0
            # Iterate through test dataset
            for images, labels in test_loader:
                test = Variable(images.view(100, 1, 16, 16, 16))
                # Forward propagation
                outputs = model(test)

                # Get predictions from the maximum value
                predicted = torch.max(outputs.data, 1)[1]

                # Total number of labels
                total += len(labels)
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / float(total)

            # store loss and iteration
            loss_list.append(loss.data)
            iteration_list.append(count)
            accuracy_list.append(accuracy)

            if count % 500 == 0:
                # Print Loss
                print('Iteration: {}  Loss: {}  Accuracy: {} %'.format(count, loss.data, accuracy))


#### Covid-19 tweets example

In [None]:
tracker.start_track(experiment_name= "Covid-19 Tweets")
import pandas as pd
from pandas_profiling import ProfileReport

# Loading and tracking your dataset
path = "data/covid-19-tweets/covid19_tweets.csv"
@tracker.decorators.dataset(name="Covid-19 Tweets")
def load_covid_tweets(path):
    """
    These tweets are collected using Twitter API and a Python script. 
    A query for this high-frequency hashtag (#covid19) is run on a daily basis for a certain time period, to collect a larger number of tweets samples.
    The collection script can be found here: https://github.com/gabrielpreda/covid-19-tweets
    
    :return dataframe 
    """
    df = pd.read_csv(path)
    return df


df = load_covid_tweets(path)

# Do and log stuff
profile = ProfileReport(df, title="Covid tweets")

# end_run
tracker.api.end_run()


In [None]:
profile

#### Corona Virus 2019 Dataset

In [None]:
tracker.start_track(experiment_name = "Corona Virus 2019 Dataset")
import pandas as pd
from pandas_profiling import ProfileReport

# Loading 
path = "data/covid-19/covid_19_data.csv"

@tracker.decorators.dataset(name="Covid-19 cases Dataset")
def load_covid_dataset(path):
    """
    Content

    2019 Novel Coronavirus (2019-nCoV) is a virus (more specifically, a coronavirus) identified as the cause of an outbreak of respiratory illness first detected in Wuhan, China. Early on, many of the patients in the outbreak in Wuhan, China reportedly had some link to a large seafood and animal market, suggesting animal-to-person spread. However, a growing number of patients reportedly have not had exposure to animal markets, indicating person-to-person spread is occurring. At this time, it’s unclear how easily or sustainably this virus is spreading between people - CDC

    This dataset has daily level information on the number of affected cases, deaths and recovery from 2019 novel coronavirus. Please note that this is a time series data and so the number of cases on any given day is the cumulative number.

    The data is available from 22 Jan, 2020.
    Column Description

    Main file in this dataset is covid_19_data.csv and the detailed descriptions of the features are below.

        - Sno - Serial number
        - ObservationDate - Date of the observation in MM/DD/YYYY
        - Province/State - Province or state of the observation (Could be empty when missing)
        - Country/Region - Country of observation
        - Last Update - Time in UTC at which the row is updated for the given province or country. (Not standardised and so please clean before using it)
        - Confirmed - Cumulative number of confirmed cases till that date
        - Deaths - Cumulative number of of deaths till that date
        - Recovered - Cumulative number of recovered cases till that date

    
    :return dataframe 
    """
    df = pd.read_csv(path)
    return df

data = load_covid_dataset(path)


# Do and log stuff
profile = ProfileReport(df, title="Covid Dataset")

# end_run
tracker.api.end_run()

In [None]:
profile