# Cherry Blossom Peak Bloom Prediction 2026 â€” Python Pipeline

## Abstract

I model cherry blossom peak bloom timing as a climate-sensitive phenology problem where long-term warming, geography, and local site history all matter. To keep the approach interpretable and reproducible, I use a two-model ensemble with explicit backtest-based calibration.

**Model A** captures site-level momentum using a recency-weighted quadratic trend fitted independently for Kyoto, Washington D.C., Liestal, Vancouver, and New York City.

**Model B** captures transferable structure using a pooled Gradient Boosting Regressor (Huber loss) over time, geography, and observation depth, trained on competition records plus auxiliary bloom series and NYC USA-NPN enrichment.

I additionally integrate NOAA GHCN daily climate data from the NOAA CDO datasets portal (https://www.ncdc.noaa.gov/cdo-web/datasets) to derive winter and spring temperature features and a GDD proxy.

**Ensemble blending** is learned from rolling-origin backtests with site-specific dynamic weights. **Prediction intervals** are split-conformal using location-wise 90th-percentile residuals.

---

This notebook builds a reproducible 2026 forecasting pipeline:

- **Model A**: location-level recency-weighted trend.
- **Model B**: pooled nonlinear regressor using time + geography + NOAA climate features.
- **Ensemble**: site-wise dynamic blending from rolling-origin backtesting.
- **Intervals**: conformal location-wise residual quantiles.

In [1]:
import os
import numpy as np
import pandas as pd
import requests
from pathlib import Path
from typing import Optional
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

np.random.seed(5103)



In [2]:
ROOT = Path('.')

competition_files = [
    ROOT / 'data/kyoto.csv',
    ROOT / 'data/washingtondc.csv',
    ROOT / 'data/liestal.csv',
    ROOT / 'data/vancouver.csv',
    ROOT / 'data/nyc.csv',
]

aux_files = [
    ROOT / 'data/japan.csv',
    ROOT / 'data/meteoswiss.csv',
    ROOT / 'data/south_korea.csv',
]

def read_bloom_file(path: Path, source: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    return pd.DataFrame({
        'source': source,
        'location': df['location'].astype(str),
        'lat': pd.to_numeric(df['lat'], errors='coerce'),
        'long': pd.to_numeric(df['long'], errors='coerce'),
        'alt': pd.to_numeric(df['alt'], errors='coerce'),
        'year': pd.to_numeric(df['year'], errors='coerce').astype('Int64'),
        'bloom_doy': pd.to_numeric(df['bloom_doy'], errors='coerce')
    })

competition_raw = pd.concat([read_bloom_file(p, 'competition') for p in competition_files], ignore_index=True)
aux_raw = pd.concat([read_bloom_file(p, 'auxiliary') for p in aux_files], ignore_index=True)

# NYC enrichment from USA-NPN status observations (site 32789, species 228, Open flowers)
npn = pd.read_csv(ROOT / 'data/USA-NPN_status_intensity_observations_data.csv')
npn = npn[(npn['Site_ID'] == 32789) & (npn['Species_ID'] == 228) & (npn['Phenophase_ID'] == 501)].copy()
npn['Observation_Date'] = pd.to_datetime(npn['Observation_Date'], format='%m/%d/%y', errors='coerce')
npn['year'] = npn['Observation_Date'].dt.year
npn_yes = (
    npn[npn['Phenophase_Status'] == 1]
    .sort_values('Observation_Date')
    .groupby('year', as_index=False)
    .first()
)

nyc_npn_status = pd.DataFrame({
    'source': 'npn',
    'location': 'newyorkcity',
    'lat': 40.73040,
    'long': -73.99809,
    'alt': 8.5,
    'year': npn_yes['year'].astype('Int64'),
    'bloom_doy': pd.to_numeric(npn_yes['Day_of_Year'], errors='coerce')
}).dropna(subset=['year', 'bloom_doy'])

# Also use USA-NPN individual phenometrics (pre-computed first-flower DOY)
pheno = pd.read_csv(ROOT / 'data/USA-NPN_individual_phenometrics_data.csv')
pheno = pheno[(pheno['Site_ID'] == 32789) & (pheno['Species_ID'] == 228) & (pheno['Phenophase_ID'] == 501)].copy()
nyc_npn_pheno = (
    pheno.groupby('First_Yes_Year', as_index=False)['First_Yes_DOY']
    .min()
    .rename(columns={'First_Yes_Year': 'year', 'First_Yes_DOY': 'bloom_doy'})
)
nyc_npn_pheno = nyc_npn_pheno[nyc_npn_pheno['bloom_doy'].notna()].copy()
nyc_npn_pheno['source'] = 'npn'
nyc_npn_pheno['location'] = 'newyorkcity'
nyc_npn_pheno['lat'] = 40.73040
nyc_npn_pheno['long'] = -73.99809
nyc_npn_pheno['alt'] = 8.5
nyc_npn_pheno['year'] = nyc_npn_pheno['year'].astype('Int64')

# Merge both NPN sources (status takes priority where years overlap)
status_years = set(nyc_npn_status['year'].dropna().astype(int))
nyc_npn_pheno_new = nyc_npn_pheno[~nyc_npn_pheno['year'].astype(int).isin(status_years)]
nyc_npn = pd.concat([nyc_npn_status, nyc_npn_pheno_new], ignore_index=True)

existing_nyc_years = set(competition_raw.loc[competition_raw['location'] == 'newyorkcity', 'year'].dropna().astype(int))
nyc_npn = nyc_npn[~nyc_npn['year'].astype(int).isin(existing_nyc_years)]

competition = pd.concat([competition_raw, nyc_npn], ignore_index=True)
all_data = (
    pd.concat([competition, aux_raw], ignore_index=True)
    .dropna(subset=['year', 'bloom_doy'])
    .query('year >= 1880')
    .copy()
)
all_data['year'] = all_data['year'].astype(int)
all_data['site_id'] = all_data['source'] + '::' + all_data['location']

competition_sites = sorted(competition_raw['location'].unique())
target_year = int(competition_raw['year'].max()) + 1

def fetch_noaa_features(cache_file='data/noaa_features.csv', force_refresh=False):
    cache_path = ROOT / cache_file
    if (not force_refresh) and cache_path.exists():
        return pd.read_csv(cache_path)

    token = os.getenv('NOAA_WEB_API_TOKEN', '')
    if not token:
        return pd.DataFrame(columns=['location', 'year', 'winter_tavg', 'spring_tavg', 'gdd_proxy'])

    stations = {
        'washingtondc': 'GHCND:USW00013743',
        'vancouver': 'GHCND:CA001108395',
        'newyorkcity': 'GHCND:USW00014732',
        'liestal': 'GHCND:SZ000001940',
        'kyoto': 'GHCND:JA000047759',
    }

    start_year = max(1880, int(all_data['year'].min()) - 1)
    end_year = int(all_data['year'].max())
    rows = []

    for loc, sid in stations.items():
        windows = pd.date_range(f'{start_year}-01-01', f'{end_year}-12-31', freq='300D')
        if len(windows) <= 1:
            windows = pd.to_datetime([f'{start_year}-01-01', f'{end_year}-12-31'])
        for a, b in zip(windows[:-1], windows[1:]):
            params = {
                'datasetid': 'GHCND',
                'stationid': sid,
                'datatypeid': 'TAVG,TMAX',
                'startdate': a.strftime('%Y-%m-%d'),
                'enddate': (b - pd.Timedelta(days=1)).strftime('%Y-%m-%d'),
                'units': 'metric',
                'limit': 1000
,            }
            try:
                r = requests.get(
                    'https://www.ncei.noaa.gov/cdo-web/api/v2/data',
                    params=params,
                    headers={'token': token},
                    timeout=30
,                )
                if r.ok:
                    for z in r.json().get('results', []):
                        z['location'] = loc
                        rows.append(z)
            except Exception:
                pass

    raw = pd.DataFrame(rows)
    if raw.empty:
        return pd.DataFrame(columns=['location', 'year', 'winter_tavg', 'spring_tavg', 'gdd_proxy'])

    raw['date'] = pd.to_datetime(raw['date'], errors='coerce')
    raw['value'] = pd.to_numeric(raw['value'], errors='coerce')
    raw.loc[raw['value'].abs() > 80, 'value'] = raw.loc[raw['value'].abs() > 80, 'value'] / 10.0

    m = raw['date'].dt.month
    y = raw['date'].dt.year
    raw['target_year'] = np.where(m == 12, y + 1, y)

    tavg = raw[raw['datatype'] == 'TAVG'].copy()
    tavg['month'] = tavg['date'].dt.month

    def agg_fn(g):
        winter = g[g['month'].isin([12, 1, 2])]['value'].mean()
        spring = g[g['month'].isin([2, 3])]['value'].mean()
        gdd = np.maximum(g[g['month'].isin([1, 2, 3])]['value'] - 5.0, 0).sum()
        return pd.Series({'winter_tavg': winter, 'spring_tavg': spring, 'gdd_proxy': gdd})

    feat = tavg.groupby(['location', 'target_year']).apply(agg_fn).reset_index()
    feat = feat.rename(columns={'target_year': 'year'})
    feat.to_csv(cache_path, index=False)
    return feat

climate_features = fetch_noaa_features(force_refresh=False)

all_data.groupby('source').size()

source
auxiliary      14209
competition      385
npn                4
dtype: int64

In [3]:
def add_features(df: pd.DataFrame, reference_df: Optional[pd.DataFrame] = None) -> pd.DataFrame:
    out = df.copy()
    ref = out if reference_df is None else reference_df.copy()
    site_obs = ref.groupby('site_id').size().rename('site_obs').reset_index()
    out = out.merge(site_obs, on='site_id', how='left')

    climate_cols = ['winter_tavg', 'spring_tavg', 'gdd_proxy']
    for col in climate_cols:
        if col not in out.columns:
            out[col] = np.nan
        if col not in ref.columns:
            ref[col] = np.nan

    loc_clim = ref.groupby('location', as_index=False)[climate_cols].median().rename(columns={
        'winter_tavg': 'winter_tavg_loc',
        'spring_tavg': 'spring_tavg_loc',
        'gdd_proxy': 'gdd_proxy_loc'
    })
    out = out.merge(loc_clim, on='location', how='left')

    g_winter = ref['winter_tavg'].median(skipna=True)
    g_spring = ref['spring_tavg'].median(skipna=True)
    g_gdd = ref['gdd_proxy'].median(skipna=True)
    g_winter = 0.0 if pd.isna(g_winter) else g_winter
    g_spring = 0.0 if pd.isna(g_spring) else g_spring
    g_gdd = 0.0 if pd.isna(g_gdd) else g_gdd

    out['winter_tavg'] = out['winter_tavg'].fillna(out['winter_tavg_loc']).fillna(g_winter)
    out['spring_tavg'] = out['spring_tavg'].fillna(out['spring_tavg_loc']).fillna(g_spring)
    out['gdd_proxy'] = out['gdd_proxy'].fillna(out['gdd_proxy_loc']).fillna(g_gdd)

    out['year_c'] = out['year'] - 1950
    out['year_c2'] = out['year_c'] ** 2
    out['decade'] = (out['year'] // 10) * 10
    out['lat_abs'] = out['lat'].abs()
    out['alt_log1p'] = np.log1p(np.clip(out['alt'], a_min=0, a_max=None))
    out['site_obs'] = out['site_obs'].fillna(1)
    return out

def build_global_model() -> Pipeline:
    num_cols = [
        'year', 'year_c', 'year_c2', 'lat', 'long', 'alt_log1p', 'site_obs',
        'winter_tavg', 'spring_tavg', 'gdd_proxy'
    ]
    cat_cols = ['source']

    pre = ColumnTransformer(
        transformers=[
            ('num', Pipeline([('imputer', SimpleImputer(strategy='median'))]), num_cols),
            ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols),
        ],
        remainder='drop'
    )

    model = GradientBoostingRegressor(
        loss='huber',
        n_estimators=700,
        learning_rate=0.02,
        max_depth=3,
        random_state=5103
    )

    return Pipeline([('pre', pre), ('model', model)])

def predict_local_trend(train_comp: pd.DataFrame, new_comp: pd.DataFrame) -> pd.DataFrame:
    rows = []
    for loc in new_comp['location'].unique():
        tr = train_comp[train_comp['location'] == loc].sort_values('year').copy()
        nd = new_comp[new_comp['location'] == loc].copy()

        n = len(tr)
        if n >= 4:
            w = np.exp(np.arange(-n + 1, 1) / 6.0)
            coef = np.polyfit(tr['year'].values, tr['bloom_doy'].values, deg=2, w=w)
            pred = np.polyval(coef, nd['year'].values)
        elif n >= 2:
            coef = np.polyfit(tr['year'].values, tr['bloom_doy'].values, deg=1)
            pred = np.polyval(coef, nd['year'].values)
        else:
            pred = np.repeat(tr['bloom_doy'].mean(), len(nd))

        nd['pred_local'] = pred
        rows.append(nd[['location', 'year', 'pred_local']])

    return pd.concat(rows, ignore_index=True)

In [4]:
backtest_start = max(1900, int(competition_raw['year'].min()) + 20)
backtest_years = list(range(backtest_start, int(competition_raw['year'].max()) + 1))

rolling_rows = []
for y in backtest_years:
    train_comp = competition[competition['year'] < y].copy()
    test_comp = competition_raw[competition_raw['year'] == y].copy()

    if test_comp.empty or train_comp['location'].nunique() < len(competition_sites):
        continue

    train_all = all_data[all_data['year'] < y].copy()
    train_all = train_all.merge(climate_features, on=['location', 'year'], how='left')
    train_all = add_features(train_all)

    test_comp['source'] = 'competition'
    test_comp['site_id'] = test_comp['source'] + '::' + test_comp['location']
    test_comp = test_comp.merge(climate_features, on=['location', 'year'], how='left')
    test_feat = add_features(test_comp.copy(), reference_df=train_all)

    local_pred = predict_local_trend(train_comp, test_feat)

    g_model = build_global_model()
    g_model.fit(train_all, train_all['bloom_doy'])
    pred_g = g_model.predict(test_feat)

    fold = test_feat[['location', 'year', 'bloom_doy']].merge(local_pred, on=['location', 'year'], how='left')
    fold['pred_global'] = pred_g
    rolling_rows.append(fold)

rolling = pd.concat(rolling_rows, ignore_index=True)

mae_local = mean_absolute_error(rolling['bloom_doy'], rolling['pred_local'])
mae_global = mean_absolute_error(rolling['bloom_doy'], rolling['pred_global'])

grid = np.arange(0.0, 1.0001, 0.02)
def mae_w(df, w):
    pred = w * df['pred_local'] + (1.0 - w) * df['pred_global']
    return mean_absolute_error(df['bloom_doy'], pred)

w_local_global = min(grid, key=lambda w: mae_w(rolling, w))
w_global_global = 1.0 - w_local_global

site_w = []
for loc, df_loc in rolling.groupby('location'):
    w_star = min(grid, key=lambda w: mae_w(df_loc, w))
    site_w.append({'location': loc, 'w_local': w_star, 'w_global': 1.0 - w_star})
site_w = pd.DataFrame(site_w)

rolling = rolling.merge(site_w, on='location', how='left')
rolling['w_local'] = rolling['w_local'].fillna(w_local_global)
rolling['w_global'] = rolling['w_global'].fillna(w_global_global)

rolling['pred_ensemble'] = rolling['w_local'] * rolling['pred_local'] + rolling['w_global'] * rolling['pred_global']
rolling['abs_err'] = (rolling['bloom_doy'] - rolling['pred_ensemble']).abs()

site_q90 = rolling.groupby('location', as_index=False)['abs_err'].quantile(0.90).rename(columns={'abs_err': 'q90'})
global_q90 = rolling['abs_err'].quantile(0.90)

pd.DataFrame({
    'model': ['local', 'global', 'ensemble'],
    'mae': [
        mae_local,
        mae_global,
        mean_absolute_error(rolling['bloom_doy'], rolling['pred_ensemble'])
    ]
})

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, ou

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, ou

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, ou

Unnamed: 0,model,mae
0,local,7.468177
1,global,5.086136
2,ensemble,4.697999


In [5]:
train_all = all_data.copy().merge(climate_features, on=['location', 'year'], how='left')
train_all = add_features(train_all)
train_comp = competition.copy()

newdata = (
    competition_raw.sort_values('year')
    .groupby('location', as_index=False)
    .tail(1)[['location', 'lat', 'long', 'alt']]
    .copy()
)
newdata['source'] = 'competition'
newdata['year'] = target_year
newdata['bloom_doy'] = np.nan
newdata['site_id'] = newdata['source'] + '::' + newdata['location']
newdata = newdata.merge(climate_features, on=['location', 'year'], how='left')
new_feat = add_features(newdata, reference_df=train_all)

local_pred = predict_local_trend(train_comp, new_feat)
global_model = build_global_model()
global_model.fit(train_all, train_all['bloom_doy'])
pred_global = global_model.predict(new_feat)

final_pred = new_feat[['location', 'year']].merge(local_pred, on=['location', 'year'], how='left')
final_pred['pred_global'] = pred_global
final_pred = final_pred.merge(site_w, on='location', how='left')
final_pred['w_local'] = final_pred['w_local'].fillna(w_local_global)
final_pred['w_global'] = final_pred['w_global'].fillna(w_global_global)
final_pred['prediction_raw'] = final_pred['w_local'] * final_pred['pred_local'] + final_pred['w_global'] * final_pred['pred_global']

final_pred = final_pred.merge(site_q90, on='location', how='left')
final_pred['q90'] = final_pred['q90'].fillna(global_q90)
final_pred['prediction'] = np.clip(np.round(final_pred['prediction_raw']), 1, 366).astype(int)
final_pred['lower'] = np.clip(np.floor(final_pred['prediction_raw'] - final_pred['q90']), 1, 366).astype(int)
final_pred['upper'] = np.clip(np.ceil(final_pred['prediction_raw'] + final_pred['q90']), 1, 366).astype(int)

final_pred = final_pred[['location', 'year', 'prediction', 'lower', 'upper']].sort_values('location')
final_pred

  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, ou

Unnamed: 0,location,year,prediction,lower,upper
2,kyoto,2026,91,83,98
0,liestal,2026,87,79,95
4,newyorkcity,2026,89,82,95
3,vancouver,2026,90,82,99
1,washingtondc,2026,84,76,92


In [6]:
def doy_to_date(year: int, doy: int) -> pd.Timestamp:
    return pd.to_datetime(f'{year}-{doy:03d}', format='%Y-%j', errors='coerce')

submission = final_pred.copy()
submission['predicted_date'] = [doy_to_date(y, d) for y, d in zip(submission['year'], submission['prediction'])]
submission['lower_date'] = [doy_to_date(y, d) for y, d in zip(submission['year'], submission['lower'])]
submission['upper_date'] = [doy_to_date(y, d) for y, d in zip(submission['year'], submission['upper'])]
submission

Unnamed: 0,location,year,prediction,lower,upper,predicted_date,lower_date,upper_date
2,kyoto,2026,91,83,98,2026-04-01,2026-03-24,2026-04-08
0,liestal,2026,87,79,95,2026-03-28,2026-03-20,2026-04-05
4,newyorkcity,2026,89,82,95,2026-03-30,2026-03-23,2026-04-05
3,vancouver,2026,90,82,99,2026-03-31,2026-03-23,2026-04-09
1,washingtondc,2026,84,76,92,2026-03-25,2026-03-17,2026-04-02


In [7]:
# Competition CSV schema
submission[['location', 'prediction', 'lower', 'upper']].to_csv('cherry-predictions-python.csv', index=False)
submission[['location', 'prediction', 'lower', 'upper']]

Unnamed: 0,location,prediction,lower,upper
2,kyoto,91,83,98
0,liestal,87,79,95
4,newyorkcity,89,82,95
3,vancouver,90,82,99
1,washingtondc,84,76,92
