In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
! pip install pmdarima

In [None]:
import pandas as pd


class DataProcessor:
    """
    This class contains methods that will process data for both univariate
    and multivariate series.
    Parameters
    ----------
    conf_file: dict, optional=None
        Filepath to the json dictionary containing the execution parameters
    geo: string, optional=None
        Name of the geo selected
    can_drop_covid_data: bool, optional=None
        Determines whether covid_data should be dropped or not
    """

    def __init__(self):
        self.train, self.test, self.submission = self._read_data()
        self._convert_to_datetime()

    def _read_data(self):
        train = pd.read_csv("../input/covid19-global-forecasting-week-4/train.csv")
        test = pd.read_csv("../input/covid19-global-forecasting-week-4/test.csv")
        submission = pd.read_csv("../input/covid19-global-forecasting-week-4/submission.csv")
        return train, test, submission

    def _convert_to_datetime(self):
        """
        Gets the DataFrame with all the time series and converts its dates to datetime
        and groups it by country and by province.
        """
        self.train["Date"] = pd.to_datetime(self.train["Date"])
        self.test["Date"] = pd.to_datetime(self.test["Date"])
        return

    def group(self, dataset, by):
        return dataset.groupby(by).sum()
    
    def run_pipeline(self, start_train_date, end_train_date):
        y_train = self._make_train_set(self.train, start_train_date, end_train_date)
        y_train = y_train.fillna("None")
        y_train["Geo_id"] = y_train.Country_Region + "_" + y_train.Province_State
        y_train = y_train.set_index("Geo_id")
        y_train.drop(["Country_Region", "Province_State"], axis=1, inplace=True)
        y_test = self._make_test_set(self.test, end_train_date)
        y_test = y_test.fillna("None")
        y_test["Geo_id"] = y_test.Country_Region + "_" + y_test.Province_State
        y_test = y_test.set_index("Geo_id")
        y_test.drop(["Country_Region", "Province_State"], axis=1, inplace=True)
        return y_train, y_test
    
    def _make_train_set(self, dataset, start_train_date, end_train_date):
        """
        Makes the train set based on the
        dates.
        Parameters
        ----------
        dataset: pd.DataFrame
            Dataframe with the time series to be split
        start_train_date: datetime64 [ns]
            Start date of the training set
        end_train_date: datetime64 [ns]
            End date of the training set
        Returns
        -------
        y_train: pd.DataFrame
            Dataframe with the train set
        """
        train_mask = (dataset['Date'] > start_train_date) & (dataset['Date'] <= end_train_date)
        y_train = dataset.loc[train_mask]
        y_train = y_train.reset_index(drop=True)
        y_train = y_train.drop("Id", axis=1)
        return y_train

    def _make_test_set(self, dataset, end_train_date):
        """
        Makes the test set based on the
        dates.
        Parameters
        ----------
        dataset: pd.DataFrame
            Dataframe with the time series to be split
        start_train_date: datetime64 [ns]
            Start date of the training set
        end_train_date: datetime64 [ns]
            End date of the training set
        Returns
        -------
        y_test: pd.DataFrame
            Dataframe with the test set
        """
        test_mask = dataset["Date"] > end_train_date
        y_test = dataset.loc[test_mask]
        return y_test


In [None]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.statespace.varmax import VARMAX
from statsmodels.tsa.statespace.dynamic_factor import DynamicFactor
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.exponential_smoothing.ets import ETSModel
from statsmodels.tsa.forecasting.stl import STLForecast
from statsmodels.tsa.statespace.dynamic_factor_mq import DynamicFactorMQ
from statsmodels.tsa.api import VAR


class Model:
    """
    This class wraps the initialization, fit and forecast with the
    user-specified model of the statsmodels library
    Parameters
    ----------
    model_name: str
        Supports {"VAR", "VARMAX", "SARIMAX", "DFM", "DFMMQ",
                  "ExponentialSmoothing", "ETSModel", "STLF",
                  "AUTO_ARIMA", "AUTO_SARIMAX"}
        AUTO_SARIMAX is a model that uses auto arima to find the best order
        and seasonal order through auto arima but fits a sarimax model to
        the dataset.
    parameters: dict[str]
        Initialization parameters that depend on the model.
        Ex for VARMAX:
            parameters = {"init_parameters": {"endog": self.y_train,
                                              "exog": self.X_train}}
    """
    def __init__(self, model_name, parameters):
        _models_dict = {
            "VAR": VAR,
            "VARMAX": VARMAX,
            "SARIMAX": SARIMAX,
            "ARIMA": ARIMA,
            "DFM": DynamicFactor,
            "DFMMQ": DynamicFactorMQ,
            "ExponentialSmoothing": ExponentialSmoothing,
            "ETSModel": ETSModel,
            "STLF": STLForecast
        }
        self.model_name = model_name
        if model_name == "STLF":
            if parameters["model"] == "ARIMA":
                parameters["model"] = ARIMA
            elif parameters["model"] == "ExponentialSmoothing":
                parameters["model"] = ExponentialSmoothing
        self.model = _models_dict[self.model_name](**parameters)

    def fit(self, parameters):
        """
        Fits the model.
        The auto arima library is different, because it doesn't have an
        initialization method.
        Parameters
        ----------
        parameters: dict[str]
            Model-dependent-fit-parameters.
        """
        if self.model_name == "DynamicFactorMQ":
            self.results = self.model.fit_em(**parameters)
        else:
            self.results = self.model.fit(**parameters)
        if self.model_name == "STLF":
            self.y_fitted = self.results.result
        else:
            self.y_fitted = self.results.fittedvalues

    def forecast(self, parameters):
        """
        Makes a forecast with the fitted model.
        Parameters
        ----------
        parameters: dict[str]
            Model-dependent-forecast-parameters.
        """
        if (self.model_name == "VAR" or self.model_name == "VARMAX" or
                self.model_name == "DFM" or
                self.model_name == "DFMMQ" or
                self.model_name == "SARIMAX" or
                self.model_name == "ARIMA" or
                self.model_name == "ExponentialSmoothing" or
                self.model_name == "ETSModel" or
                self.model_name == "STLF"):
            self.y_hat = self.results.forecast(**parameters)

    def get_info_criteria(self, ic):
        return self.results.info_criteria(ic)

    def get_var_order(self):
        if self.model_name == "VAR":
            return self.results.k_ar

In [None]:
class ModelParametersMaker:
    """
        Maps model parameters into class dictionaries.
        Parameters
        ----------
        y_train: pd.DataFrame()
            DataFrame containing the train set.
        num_forecast_observations: int
            Defines number of forecast observations.
        X_train, X_test: pd.DataFrame
            Exogenous variables dataframes
    """

    def __init__(self, num_forecast_observations, model_name, y_train,
                 parameters, X_train=None, X_test=None):
        self.num_forecast_observations = num_forecast_observations
        self.y_train = y_train
        self.X_train = X_train
        self.X_test = X_test
        if model_name == "VAR":
            self.make_var_parameters(**parameters)
        elif model_name == "VARMAX":
            self.make_varmax_parameters(**parameters)
        elif model_name == "DFM":
            self.make_dfm_parameters(**parameters)
        elif model_name == "DFMMQ":
            self.make_dfm_mq_parameters(**parameters)
        elif model_name == "SARIMAX":
            self.make_sarimax_parameters(**parameters)
        elif model_name == "ARIMA":
            self.make_arima_parameters(**parameters)
        elif model_name == "ExponentialSmoothing":
            self.make_exponential_smoothing_parameters(**parameters)
        elif model_name == "ETSModel":
            self.make_ets_model_parameters(**parameters)
        elif model_name == "STLF":
            self.make_stlf_parameters(**parameters)
        elif model_name == "AUTO_ARIMA":
            self.make_auto_arima_parameters(**parameters)
        elif model_name == "AUTO_SARIMAX":
            self.make_auto_sarimax_parameters(**parameters)
        else:
            print("Model doesn't exist.")

    def make_var_parameters(self, maxlags, ic="aic", trend="c"):
        self.parameters = {
            "init_parameters": {"endog": self.y_train,
                                "exog": self.X_train},
            "fit_parameters": {"maxlags": maxlags,
                               "ic": ic,
                               "trend": trend},
            "forecast_parameters": {"y": self.y_train.values,
                                    "steps": self.num_forecast_observations,
                                    "exog_future": self.X_test}
        }

    def make_varmax_parameters(self, order, trend="c", measurement_error=False,
                               ic="aic"):
        self.parameters = {
            "init_parameters": {"endog": self.y_train,
                                "exog": self.X_train,
                                "order": order,
                                "trend": trend,
                                "measurement_error": measurement_error},
            "fit_parameters": {"ic": ic},
            "forecast_parameters": {"y": self.y_train.values,
                                    "steps": self.num_forecast_observations,
                                    "exog": self.X_test}
        }

    def make_dfm_parameters(self, k_factors, factor_order,
                            error_order):
        self.parameters = {
            "init_parameters": {"endog": self.y_train,
                                "exog": self.X_train,
                                "k_factors": k_factors,
                                "factor_order": factor_order,
                                "error_order": error_order},
            "fit_parameters": {},
            "forecast_parameters": {"steps": self.num_forecast_observations,
                                    "exog": self.X_test}
        }

    def make_dfm_mq_parameters(self, factors, factor_orders,
                               factor_multiplicities, standardize):
        self.parameters = {
            "init_parameters": {"endog": self.y_train,
                                "k_endog_monthly": None,
                                "factors": factors,
                                "factor_orders": factor_orders,
                                "factor_multiplicities": factor_multiplicities,
                                "idiosyncratic_ar1": True,
                                "standardize": standardize,
                                "endog_quarterly": None,
                                "init_t0": False,
                                "obs_cov_diag": False},
            "fit_parameters": {},
            "forecast_parameters": {"steps": self.num_forecast_observations}
        }

    def make_sarimax_parameters(self, order, seasonal_order):
        self.parameters = {
            "init_parameters": {"endog": self.y_train,
                                "order": order,
                                "seasonal_order": seasonal_order,
                                "enforce_stationarity": False,
                                "enforce_invertibility": False},
            "fit_parameters": {},
            "forecast_parameters": {"steps": self.num_forecast_observations}
        }

    def make_arima_parameters(self, order):
        self.parameters = {
            "init_parameters": {"endog": self.y_train,
                                "order": order,
                                "enforce_stationarity": False,
                                "enforce_invertibility": False},
            "fit_parameters": {},
            "forecast_parameters": {"steps": self.num_forecast_observations}
        }

    def make_exponential_smoothing_parameters(self, trend, damped, seasonal,
                                              seasonal_periods, use_boxcox,
                                              remove_bias):
        self.parameters = {
            "init_parameters": {
                "endog": self.y_train,
                "trend": trend,
                "damped": damped,
                "seasonal": seasonal,
                "seasonal_periods": seasonal_periods
            },
            "fit_parameters": {
                "optimized": True,
                "use_boxcox": use_boxcox,
                "remove_bias": remove_bias
            },
            "forecast_parameters": {"steps": self.num_forecast_observations}
        }

    def make_ets_model_parameters(self, error, trend, damped_trend, seasonal,
                                  seasonal_periods):
        self.parameters = {
            "init_parameters": {
                "endog": self.y_train.values.flatten(),
                "error": error,
                "trend": trend,
                "damped_trend": damped_trend,
                "seasonal": seasonal,
                "seasonal_periods": seasonal_periods,
                "freq": "MS",
                "dates": self.y_train.index
            },
            "fit_parameters": {},
            "forecast_parameters": {"steps": self.num_forecast_observations}
        }

    def make_stlf_parameters(self, model, model_kwargs, robust):
        self.parameters = {
            "init_parameters": {
                "endog": self.y_train,
                "model": model,
                "model_kwargs": model_kwargs,
                "robust": robust
            },
            "fit_parameters": {},
            "forecast_parameters": {"steps": self.num_forecast_observations}
        }

    def make_auto_arima_parameters(self, start_p, start_q, max_p, max_d,
                                   max_q, start_P, start_Q, max_P, D,
                                   max_Q, m, max_order, trend, stepwise, include_D):
        self.parameters = {
            "init_parameters": {},
            "fit_parameters": {"y": self.y_train,
                               "start_p": start_p,
                               "start_q": start_q,
                               "max_p": max_p,
                               "max_d": max_d,
                               "max_q": max_q,
                               "start_P": start_P,
                               "start_Q": start_Q,
                               "max_P": max_P,
                               "D": D,
                               "max_Q": max_Q,
                               "m": m,
                               "max_order": max_order,
                               "trend": trend,
                               "stepwise": stepwise,
                               "include_D": include_D},
            "forecast_parameters": {"steps": self.num_forecast_observations}
        }

    def make_auto_sarimax_parameters(self, parameters):
        parameters["y"] = self.y_train
        self.parameters = {
            "init_parameters": {},
            "fit_parameters": parameters,
            "forecast_parameters": {"steps": self.num_forecast_observations}
        }

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_log_error
from sklearn.model_selection import TimeSeriesSplit
from pmdarima.arima import auto_arima


INIT_PARAMETERS = "init_parameters"
FIT_PARAMETERS = "fit_parameters"
FORECAST_PARAMETERS = "forecast_parameters"


class CVOptimizer:
    """
    Implements Cross Validation for Time Series wrapping the
    sklearn.model_selection.TimeSeriesSplit method. The cross
    validation score is computed by the mean of the rmse of each fold.
    Parameters
    ----------
    model_name: str
        Check _model_dict attribute from model.py for the
        acceptable model_names. Example: 'VAR'
    y_train: pd.DataFrame
        Univariate or Multivariate Time Series
    country_name: str
        Country name
    test_fold_size: int
        Size of the test fold of the cross validation
    X_params: str, optional=None
        {"STL", "seasonal_component_peak_values"}
        Method through which exogenous variables are made.
    verbose: bool
        Whether or not to print debugging info
    cv: int
        Number of folds
    """
    def __init__(self, model_name, train_set,
                 country_name, test_fold_size,
                 verbose=False, cv=3):
        self.model_name = model_name
        self.train_set = train_set
        self.country_name = country_name
        self.test_fold_size = test_fold_size
        self.verbose = verbose
        self.cv = cv
        
    def cross_val_score(self, param_combination):
        """
        Calculates the cross validation score for the given number of folds.
        Saves the score of each fold and the parameters in a tuple
        model_scores (score, param_combination), where score is computed
        by the mean of the rmse of each fold.
        Parameters
        ----------
        param_combination: dict
            Keys: parameter names of the model
            Values: value of the parameters
            Example:
                "param_combination" = {
                    "maxlags": maxlags,
                    "trend": trend
                }
        Exception
        ---------
        If the model can't fit any of the folds, it will be discarded.
        """
        tscv = TimeSeriesSplit(n_splits=self.cv)
        k_fold = 1
        model_rmsle = []
        for train_index, test_index in tscv.split(self.train_set):
            if self.verbose:
                print(f"Training fold number {k_fold}")

            y_train, y_test = self._make_sets(
                train_index, test_index
            )
            if self.model_name == "AUTO_ARIMA":
                try:
                    num_forecast_observations = self.test_fold_size
                    model_parameters_maker = ModelParametersMaker(
                        num_forecast_observations, self.model_name, y_train,
                        param_combination
                    )
                    parameters = model_parameters_maker.parameters
                    model = auto_arima(**parameters[FIT_PARAMETERS])
                    forecast = model.predict(**parameters[FORECAST_PARAMETERS])
                    model_rmsle.append(self._measure_rmsle(forecast, y_test))
                    print("Success")
                except Exception as e:
                    if self.verbose:
                        print(f"Error for series {self.country_name}")
                        print(e)
                        import traceback
                        traceback.print_exc()
                    model_rmsle.append(np.inf)
                finally:
                    k_fold += 1
            else:              
                try:
                    num_forecast_observations = self.test_fold_size
                    model_parameters_maker = ModelParametersMaker(
                        num_forecast_observations, self.model_name, y_train,
                        param_combination
                    )
                    model = self._run_model_pipeline(model_parameters_maker.parameters)
                    model_rmsle.append(self._measure_rmsle(model.y_hat, y_test))
                    print("Success")
                except Exception as e:
                    if self.verbose:
                        print(f"Error for series {self.country_name}")
                        print(e)
                        import traceback
                        traceback.print_exc()
                    model_rmsle.append(np.inf)
                finally:
                    k_fold += 1
        model_rmsle = self._handle_inf_scores(model_rmsle)
        mean_score = np.mean(model_rmsle)
        self.model_scores = (mean_score, param_combination)      

    def _make_sets(self, train_index, test_index):
        train_index_ = np.concatenate(
            (train_index, test_index[:-self.test_fold_size]),
            axis=None
        )
        test_index_ = test_index[-self.test_fold_size:]
        y_train = self.train_set.iloc[train_index_]
        y_test = self.train_set.iloc[test_index_]
        return y_train, y_test


    def _run_model_pipeline(self, model_parameters):
        """
        Runs the model pipeline: initialization, fitting, forecast
        Parameters
        ----------
        model_parameters: dict[str : str]
            Keys: {"init_parameters", "fit_parameters", "forecast_parameters"}
            Values: depend on the model
        Returns
            Model after running pipeline
        """
        model = Model(
            self.model_name, model_parameters[INIT_PARAMETERS]
        )
        model.fit(model_parameters[FIT_PARAMETERS])
        model.forecast(model_parameters[FORECAST_PARAMETERS])
        return model
    
    def _measure_rmsle(self, actual, predicted):
        """
        Computes the rmse between the actual and the predicted values.
        Parameters
        ----------
        actual : pd.DataFrame
            DataFrame containing the observed values
        predicted : pd.DataFrame
            DataFrame containing the predicted values
        Returns
        -------
            rmse between the actual and predicted values
        """
        return np.sqrt(mean_squared_log_error(actual, predicted))

    def _handle_inf_scores(self, model_rmsle):
        np_array = np.array(model_rmsle)
        inf_mask = ~np.isfinite(np_array)
        # Penalizes models that scored inf. It will double
        # the last fold's score and attribute it to the
        # folds that scored infinite
        np_array[inf_mask] = np_array[self.cv-1] * 2
        return list(np_array)

In [None]:
import warnings

import numpy as np
import pandas as pd
from pmdarima.arima import auto_arima


INIT_PARAMETERS = "init_parameters"
FIT_PARAMETERS = "fit_parameters"
FORECAST_PARAMETERS = "forecast_parameters"


def get_exp_param_combinations():
    trend = ["add", "mul", None]
    damped = [True, False]
    seasonal = ["add", "mul", None]
    seasonal_periods = [30]
    use_boxcox = [True, False]
    remove_bias = [True, False]

    param_combinations = []
    for t in trend:
        for d in damped:
            for s in seasonal:
                for sp in seasonal_periods:
                    for u in use_boxcox:
                        for r in remove_bias:
                            param_combination = {}
                            param_combination["trend"] = t
                            param_combination["damped"] = d
                            param_combination["seasonal"] = s
                            param_combination["seasonal_periods"] = sp
                            param_combination["use_boxcox"] = u
                            param_combination["remove_bias"] = r
                            param_combinations.append(param_combination)
    return param_combinations


def get_auto_param_combinations():
    parameters = {}
    parameters["start_p"] = 0
    parameters["start_q"] = 0
    parameters["max_p"] = 5
    parameters["max_d"] = 2
    parameters["max_q"] = 5
    parameters["start_P"] = 0
    parameters["start_Q"] = 0
    parameters["max_P"] = 5
    parameters["D"] = 1
    parameters["max_Q"] = 5
    parameters["m"] = 30
    parameters["max_order"] = 12
    parameters["trend"] = "t"
    parameters["stepwise"] = True
    # include_D: whether or not to force a seasonal differentiation
    parameters["include_D"] = False

    param_combinations = []
    param_combinations.append(parameters)
    return param_combinations



def optimize(model_name, y_train, param_combinations, geo, test_fold_size, verbose=False, cv=3):
    cv_scores = []
    for param_combination in param_combinations:
        cv_optimizer = CVOptimizer(
            model_name, y_train, geo, test_fold_size, verbose=verbose, cv=cv
        )
        cv_optimizer.cross_val_score(param_combination)
        cv_scores.append(cv_optimizer.model_scores)
    cv_scores.sort(key=lambda x: x[0])
    return cv_scores


def best_exp_model_forecast(exp_model_name, best_exp_score,
                            num_forecast_observations, y_train):
    model_parameters_maker = ModelParametersMaker(
        num_forecast_observations, exp_model_name, y_train,
        best_exp_score[1]
    )
    model_parameters = model_parameters_maker.parameters
    model = Model(
        exp_model_name, model_parameters[INIT_PARAMETERS]
    )
    model.fit(model_parameters[FIT_PARAMETERS])
    model.forecast(model_parameters[FORECAST_PARAMETERS])
    return model.y_hat


def best_auto_model_forecast(auto_model_name, best_auto_score,
                             num_forecast_observations, y_train):
    model_parameters_maker = ModelParametersMaker(
        num_forecast_observations, auto_model_name, y_train,
        best_auto_score[1]
    )
    model_parameters = model_parameters_maker.parameters
    model = auto_arima(**model_parameters[FIT_PARAMETERS])
    forecast = model.predict(n_periods=num_forecast_observations)
    return forecast

def run_pipeline(y_train, geo, test_fold_size, num_forecast_observations):
    exp_model_name = "ExponentialSmoothing"
    exp_param_combinations = get_exp_param_combinations()
    exp_scores = optimize(
        exp_model_name, y_train, exp_param_combinations, geo,
        test_fold_size, verbose=False, cv=3
    )
    best_exp_score = exp_scores[0]
    y_hat_exp = best_exp_model_forecast(
        exp_model_name, best_exp_score, num_forecast_observations, y_train
    )
    if isinstance(y_hat_exp, pd.Series):
        y_hat_exp = y_hat_exp.values
    
    auto_model_name = "AUTO_ARIMA"
    auto_param_combinations = get_auto_param_combinations()
    try:
        auto_scores = optimize(
            auto_model_name, y_train, auto_param_combinations, geo,
            test_fold_size, verbose=False, cv=3
        )
        best_auto_score = auto_scores[0]
        y_hat_auto = best_auto_model_forecast(
            auto_model_name, best_auto_score, num_forecast_observations, y_train
        )
        if isinstance(y_hat_auto, pd.Series):
            y_hat_auto = y_hat_auto.values
        y_hat = y_hat_exp * 0.75 + y_hat_auto * 0.25
    except:
        best_auto_score = np.inf
        y_hat = y_hat_exp
    return y_hat


if __name__ == "__main__":
    
    data_processor = DataProcessor()
    START_TRAIN_DATE = pd.to_datetime("2020-01-22")
    END_TRAIN_DATE = pd.to_datetime("2020-04-14")
    train, test = data_processor.run_pipeline(START_TRAIN_DATE, END_TRAIN_DATE)
    unique_geos = train.index.unique()
    submission = data_processor.submission

    for geo in unique_geos:
        y_train = train.loc[geo, :].copy()
        y_train.set_index("Date", inplace=True)
        y_train_cc = y_train["ConfirmedCases"]
        y_train_f = y_train["Fatalities"]
        y_train_cc.clip(0, inplace=True)
        y_train_f.clip(0, inplace=True)
        y_test = test.loc[geo].copy()
        y_test.set_index("Date", inplace=True)
        num_forecast_observations = len(y_test)
        test_fold_size = 10

        y_hat_cc = run_pipeline(y_train_cc, geo, test_fold_size, num_forecast_observations)
        y_hat_f = run_pipeline(y_train_f, geo, test_fold_size, num_forecast_observations)
        submission.loc[submission["ForecastId"].isin(y_test["ForecastId"]), "ConfirmedCases"] = y_hat_cc
        submission.loc[submission["ForecastId"].isin(y_test["ForecastId"]), "Fatalities"] = y_hat_f
        print(f"geo {geo}")
        print(submission.loc[submission["ForecastId"].isin(y_test["ForecastId"]), "Fatalities"])
        print(submission.loc[submission["ForecastId"].isin(y_test["ForecastId"]), "ConfirmedCases"])
    submission.to_csv("submission.csv", index=False)