# Trane Quickstart Guide - Covid Example

In [1]:
import trane
import pandas as pd
import json
from datetime import datetime
from urllib.request import urlopen


data_url = "https://raw.githubusercontent.com/HDI-Project/Trane/main/Examples/covid/"
df = pd.read_csv(f"{data_url}covid19.csv")
df["Date"] = df["Date"].apply(lambda x: datetime.strptime(x, "%m/%d/%y"))
df = df.sort_values(by=["Date"])
df = df.fillna(0)

meta_covid_response = urlopen(f"{data_url}meta_covid.json")
meta_covid = trane.TableMeta(json.loads(meta_covid_response.read()))

In [2]:
entity_col = "Country/Region"
time_col = "Date"
window_size = "2d"
cutoff_base = "2020-01-22"
cutoff_end = "2020-03-29"
cutoff_strategy = trane.CutoffStrategy(
    entity_col,
    window_size=window_size,
    minimum_data=cutoff_base,
    maximum_data=cutoff_end,
)

In [3]:
import trane
import json
from urllib.request import urlopen

entity_col = "Country/Region"
time_col = "Date"
data_url = "https://raw.githubusercontent.com/HDI-Project/Trane/main/Examples/covid/"
meta_covid_response = urlopen(f"{data_url}meta_covid.json")
table_meta = trane.TableMeta(json.loads(meta_covid_response.read()))

problem_generator = trane.PredictionProblemGenerator(
    entity_col=entity_col,
    time_col=time_col,
    cutoff_strategy=cutoff_strategy,
    table_meta=table_meta,
)
problems = problem_generator.generate(df, generate_thresholds=True)

  0%|          | 0/1044 [00:00<?, ?it/s]

Success/Attempt = 515/1044


In [4]:
picked_indexes = [1, 50, 200, 300, 400]
for idx, problem in enumerate(problems[i] for i in picked_indexes):
    problem_sentence = str(problem)
    print(f"{problem_sentence}")
    print("----")

print(f"\nTotal Number of Prediction Problems = {len(problems)}")

For each <Country/Region> predict the number of records with <Lat> greater than 41.1533 in next 2d days
----
For each <Country/Region> predict the total <Long> in all related records with <Long> greater than -23.0418 in next 2d days
----
For each <Country/Region> predict the average <Confirmed> in all related records with <Recovered> greater than 0 in next 2d days
----
For each <Country/Region> predict the maximum <Deaths> in all related records with <Long> greater than -19.0208 in next 2d days
----
For each <Country/Region> predict the minimum <Long> in all related records with <Lat> greater than 41.3775 in next 2d days
----

Total Number of Prediction Problems = 515


In [5]:
problem = problems[0]
problem_sentence = str(problem)
label_times = problem.execute(df, -1, verbose=False)
print(problem_sentence, "\n")
print(label_times.head(5))

For each <Country/Region> predict the number of records in next 2d days 

  Country/Region       time  _execute_operations_on_df
0    Afghanistan 2020-01-22                          2
1    Afghanistan 2020-01-24                          2
2    Afghanistan 2020-01-26                          2
3    Afghanistan 2020-01-28                          2
4    Afghanistan 2020-01-30                          2


In [6]:
ft_wrapper = trane.FeaturetoolsWrapper(
    df=df, entity_col=entity_col, time_col=time_col, name="covid"
)
feature_matrix, features = ft_wrapper.compute_features(label_times, window_size)

Built 41 features
Elapsed: 00:03 | Progress: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████


In [7]:
for feature in features[:5]:
    print(feature)

<Feature: COUNT(covid)>
<Feature: MAX(covid.Confirmed)>
<Feature: MAX(covid.Deaths)>
<Feature: MAX(covid.Lat)>
<Feature: MAX(covid.Long)>


In [8]:
feature_matrix.reset_index().head(5)

Unnamed: 0,Country/Region,time,COUNT(covid),MAX(covid.Confirmed),MAX(covid.Deaths),MAX(covid.Lat),MAX(covid.Long),MAX(covid.Recovered),MEAN(covid.Confirmed),MEAN(covid.Deaths),...,SUM(covid.Recovered),MODE(covid.DAY(Date)),MODE(covid.MONTH(Date)),MODE(covid.WEEKDAY(Date)),MODE(covid.YEAR(Date)),NUM_UNIQUE(covid.DAY(Date)),NUM_UNIQUE(covid.MONTH(Date)),NUM_UNIQUE(covid.WEEKDAY(Date)),NUM_UNIQUE(covid.YEAR(Date)),_execute_operations_on_df
0,Afghanistan,2020-01-22,1,0.0,0.0,33.0,65.0,0.0,0.0,0.0,...,0.0,22,1,2,2020,1,1,1,1,2
1,Afghanistan,2020-01-24,3,0.0,0.0,33.0,65.0,0.0,0.0,0.0,...,0.0,22,1,2,2020,3,1,3,1,2
2,Afghanistan,2020-01-26,5,0.0,0.0,33.0,65.0,0.0,0.0,0.0,...,0.0,22,1,2,2020,5,1,5,1,2
3,Afghanistan,2020-01-28,7,0.0,0.0,33.0,65.0,0.0,0.0,0.0,...,0.0,22,1,0,2020,7,1,7,1,2
4,Afghanistan,2020-01-30,9,0.0,0.0,33.0,65.0,0.0,0.0,0.0,...,0.0,22,1,2,2020,9,1,7,1,2


In [9]:
feature_matrix_encoded, features_encoded = ft_wrapper.encode_features(
    feature_matrix, features
)

y = feature_matrix_encoded["_execute_operations_on_df"]
feature_matrix_encoded = feature_matrix_encoded.drop(
    columns=["_execute_operations_on_df"]
)