diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 35a6cde9953..57151563165 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -6,7 +6,7 @@ Contributors -[![All Contributors](https://img.shields.io/badge/all_contributors-275-orange.svg)](#contributors) +[![All Contributors](https://img.shields.io/badge/all_contributors-278-orange.svg)](#contributors) This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome! @@ -75,6 +75,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Carlos Ramos CarreΓ±o
Carlos Ramos CarreΓ±o

πŸ“– + Cedric DoniΓ©
Cedric DoniΓ©

πŸ› πŸ’» Chang Wei Tan
Chang Wei Tan

πŸ’» Cheuk Ting Ho
Cheuk Ting Ho

πŸ’» Christian Kastner
Christian Kastner

πŸ’» πŸ› @@ -82,9 +83,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Christopher Dahlin
Christopher Dahlin

πŸ’» Christopher Lo
Christopher Lo

πŸ’» πŸ€” Chung-Fan Tsai
Chung-Fan Tsai

⚠️ - Ciaran Gilbert
Ciaran Gilbert

πŸ› πŸ’» πŸ“– ⚠️ πŸ€” + Ciaran Gilbert
Ciaran Gilbert

πŸ› πŸ’» πŸ“– ⚠️ πŸ€” ClaudiaSanches
ClaudiaSanches

πŸ’» ⚠️ Colin Fallon
Colin Fallon

πŸ“– Corvin Paul
Corvin Paul

πŸ“– @@ -93,9 +94,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Daniel MartΓ­n MartΓ­nez
Daniel MartΓ­n MartΓ­nez

πŸ“– πŸ› Darya Petrashka
Darya Petrashka

πŸ“– Dave Hirschfeld
Dave Hirschfeld

πŸš‡ - David Buchaca Prats
David Buchaca Prats

πŸ’» + David Buchaca Prats
David Buchaca Prats

πŸ’» David Gilbertson
David Gilbertson

πŸ’» πŸ› David Guijo Rubio
David Guijo Rubio

πŸ’» πŸ€” David Manowitz
David Manowitz

πŸ› 🚧 @@ -104,9 +105,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Drishti Bhasin
Drishti Bhasin

πŸ’» Dylan Sherry
Dylan Sherry

πŸš‡ Eddy Oyieko
Eddy Oyieko

πŸ’» πŸ“– - Emilia Rose
Emilia Rose

πŸ’» ⚠️ + Emilia Rose
Emilia Rose

πŸ’» ⚠️ Er Jie Yong
Er Jie Yong

πŸ› πŸ’» Evan Miller
Evan Miller

βœ… Eyal Shafran
Eyal Shafran

πŸ’» @@ -115,9 +116,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Felipe Angelim
Felipe Angelim

πŸ’» πŸ› Felix Claessen
Felix Claessen

πŸ’» πŸ“– ⚠️ πŸ› Felix Hirwa Nshuti
Felix Hirwa Nshuti

πŸ’» 🚧 - Florian Stinner
Florian Stinner

πŸ’» ⚠️ + Florian Stinner
Florian Stinner

πŸ’» ⚠️ Francesco Spinnato
Francesco Spinnato

πŸ’» Franz Kiraly
Franz Kiraly

πŸ“ πŸ› πŸ’Ό πŸ’» πŸ“– 🎨 πŸ“‹ πŸ’‘ πŸ’΅ πŸ” πŸ€” 🚧 πŸ§‘β€πŸ« πŸ“† πŸ’¬ πŸ‘€ πŸ“’ ⚠️ βœ… πŸ“Ή Freddy A Boulton
Freddy A Boulton

πŸš‡ ⚠️ @@ -126,9 +127,9 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Geronimo Bergk
Geronimo Bergk

πŸ› πŸ’» Grace Gao
Grace Gao

πŸ’» πŸ› Guzal Bulatova
Guzal Bulatova

πŸ› πŸ’» πŸ“‹ πŸ§‘β€πŸ« πŸ“† πŸ‘€ ⚠️ - HYang1996
HYang1996

πŸ’» ⚠️ πŸ“– βœ… + HYang1996
HYang1996

πŸ’» ⚠️ πŸ“– βœ… Hamza Benslimane
Hamza Benslimane

πŸ› πŸ’» Hazrul Akmal
Hazrul Akmal

πŸ’» πŸ“– πŸ› ⚠️ Helge Liebert
Helge Liebert

πŸ› πŸ’» @@ -137,20 +138,21 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Ian Spektor
Ian Spektor

πŸ’» πŸ“– Ifeanyi30
Ifeanyi30

πŸ’» Ilja Maurer
Ilja Maurer

πŸ’» - Ilyas Moutawwakil
Ilyas Moutawwakil

πŸ’» πŸ“– + Ilyas Moutawwakil
Ilyas Moutawwakil

πŸ’» πŸ“– Ireoluwatomiwa
Ireoluwatomiwa

πŸ“– Ishan Nangia
Ishan Nangia

πŸ€” + Ishan Paidhungat
Ishan Paidhungat

πŸ’» πŸ“– Jack Russon
Jack Russon

πŸ’» James Large
James Large

πŸ’» πŸ“– ⚠️ πŸš‡ 🚧 James Morrill
James Morrill

πŸ’» Jan Pipek
Jan Pipek

πŸ’» Jasmine Liaw
Jasmine Liaw

πŸ’» - Jason Lines
Jason Lines

πŸ’» πŸ’Ό πŸ“– 🎨 πŸ“‹ πŸ” πŸ€” πŸ“† πŸ’¬ πŸ‘€ πŸ“’ πŸ’‘ - Jason Pong
Jason Pong

πŸ’» ⚠️ + Jason Lines
Jason Lines

πŸ’» πŸ’Ό πŸ“– 🎨 πŸ“‹ πŸ” πŸ€” πŸ“† πŸ’¬ πŸ‘€ πŸ“’ πŸ’‘ + Jason Pong
Jason Pong

πŸ’» ⚠️ Jaume Mateu
Jaume Mateu

πŸ’» Javier Berneche
Javier Berneche

πŸ’» πŸ“– Jonas Pirner
Jonas Pirner

πŸ“– @@ -158,197 +160,198 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Joren Hammudoglu
Joren Hammudoglu

πŸš‡ Juan Orduz
Juan Orduz

βœ… πŸ“– Julia Kraus
Julia Kraus

πŸ“– πŸ’» ⚠️ - Julian Cooper
Julian Cooper

πŸ’» πŸ€” - Julian Nowak
Julian Nowak

πŸ› πŸ’» + Julian Cooper
Julian Cooper

πŸ’» πŸ€” + Julian Haderlein
Julian Haderlein

πŸ“– + Julian Nowak
Julian Nowak

πŸ› πŸ’» Juliana
Juliana

πŸ’» Justin Shenk
Justin Shenk

πŸ“– Kai Lion
Kai Lion

πŸ’» ⚠️ πŸ“– Kavin Anand
Kavin Anand

πŸ“– Kejsi Take
Kejsi Take

πŸ’» Kevin Lam
Kevin Lam

πŸ’» πŸ’‘ ⚠️ + + Kirstie Whitaker
Kirstie Whitaker

πŸ€” πŸ” Kishan Manani
Kishan Manani

πŸ’» πŸ“– ⚠️ πŸ› πŸ€” Krum Arnaudov
Krum Arnaudov

πŸ› πŸ’» - - Kutay Koralturk
Kutay Koralturk

πŸ’» πŸ› Leonidas Tsaprounis
Leonidas Tsaprounis

πŸ’» πŸ› πŸ§‘β€πŸ« πŸ‘€ Lielle Ravid
Lielle Ravid

πŸ’» πŸ“– Logan Duffy
Logan Duffy

πŸ’» πŸ“– ⚠️ πŸ› πŸ€” Lorena Pantano
Lorena Pantano

πŸ€” Lorenzo Toniazzi
Lorenzo Toniazzi

πŸ’» + + Lovkush
Lovkush

πŸ’» ⚠️ πŸ€” πŸ§‘β€πŸ« πŸ“† Luca Miniati
Luca Miniati

πŸ’» πŸ“– Luis Ventura
Luis Ventura

πŸ’» - - Luis Zugasti
Luis Zugasti

πŸ“– Lukasz Mentel
Lukasz Mentel

πŸ’» πŸ“– πŸš‡ ⚠️ πŸ› 🚧 πŸ§‘β€πŸ« Manuel MuΓ±oz Aguirre
Manuel MuΓ±oz Aguirre

πŸ“– Marc Rovira
Marc Rovira

πŸ“– Marcelo Trylesinski
Marcelo Trylesinski

πŸ“– Marco Gorelli
Marco Gorelli

πŸš‡ + + Margaret Gorlin
Margaret Gorlin

πŸ’» πŸ’‘ ⚠️ Mariam Jabara
Mariam Jabara

πŸ’» Marielle
Marielle

πŸ“– πŸ’» πŸ€” - - Markus LΓΆning
Markus LΓΆning

πŸ’» ⚠️ 🚧 πŸ“¦ πŸ‘€ πŸš‡ πŸ’‘ πŸ› βœ… πŸ’Ό πŸ“– 🎨 πŸ“‹ πŸ” πŸ€” πŸ“† πŸ’¬ πŸ“’ πŸ§‘β€πŸ« πŸ“Ή Martin Walter
Martin Walter

πŸ’» πŸ› πŸ“† πŸ” πŸ§‘β€πŸ« πŸ€” 🎨 πŸ‘€ πŸ“– πŸ“’ Martina G. Vilas
Martina G. Vilas

πŸ‘€ πŸ€” Mathias Creemers
Mathias Creemers

πŸ› πŸ’» Matthew Middlehurst
Matthew Middlehurst

πŸ’» πŸ“– ⚠️ βœ… πŸ‘€ πŸ› Mavs
Mavs

πŸ’» - Max Frohlich
Max Frohlich

πŸ’» πŸ€” 🚧 - Max Patzelt
Max Patzelt

πŸ’» - Meraldo Antonio
Meraldo Antonio

πŸ“– + Max Frohlich
Max Frohlich

πŸ’» πŸ€” 🚧 + Max Patzelt
Max Patzelt

πŸ’» + Meraldo Antonio
Meraldo Antonio

πŸ“– πŸ› Miao Cai
Miao Cai

πŸ› πŸ’» Michael Feil
Michael Feil

πŸ’» ⚠️ πŸ€” Michael Gaziani
Michael Gaziani

πŸ“– Michael Mwimali
Michael Mwimali

πŸ’» Michal Chromcak
Michal Chromcak

πŸ’» πŸ“– ⚠️ βœ… Mirae Parker
Mirae Parker

πŸ’» ⚠️ + + Mirko Bristle
Mirko Bristle

πŸ› ⚠️ πŸ“– Mohammed Saif Kazamel
Mohammed Saif Kazamel

πŸ› Morad :)
Morad :)

πŸ’» ⚠️ πŸ“– - - Multivin12
Multivin12

πŸ’» ⚠️ MΓ‘rcio A. Freitas Jr
MΓ‘rcio A. Freitas Jr

πŸ“– Niek van der Laan
Niek van der Laan

πŸ’» Nikhil Gupta
Nikhil Gupta

πŸ’» πŸ› πŸ“– Nikola Shahpazov
Nikola Shahpazov

πŸ“– Nilesh Kumar
Nilesh Kumar

πŸ’» + + Ninnart Fuengfusin
Ninnart Fuengfusin

πŸ’» Noa Ben Ami
Noa Ben Ami

πŸ’» ⚠️ πŸ“– Oleksandr Shchur
Oleksandr Shchur

πŸ› πŸ’» - - Oleksii Kachaiev
Oleksii Kachaiev

πŸ’» ⚠️ Oliver Matthews
Oliver Matthews

πŸ’» Patrick Rockenschaub
Patrick Rockenschaub

πŸ’» 🎨 πŸ€” ⚠️ Patrick SchΓ€fer
Patrick SchΓ€fer

πŸ’» βœ… Paul
Paul

πŸ“– Paul Yim
Paul Yim

πŸ’» πŸ’‘ ⚠️ + + Philipp Kortmann
Philipp Kortmann

πŸ’» πŸ“– Piyush Gade
Piyush Gade

πŸ’» πŸ‘€ Poruri Sai Rahul
Poruri Sai Rahul

πŸ“– - - Pranav Prajapati
Pranav Prajapati

πŸ’» ⚠️ πŸ› Pulkit Verma
Pulkit Verma

πŸ“– Quaterion
Quaterion

πŸ› Rakshitha Godahewa
Rakshitha Godahewa

πŸ’» πŸ“– Ramon Bussing
Ramon Bussing

πŸ“– πŸ’» πŸ› ⚠️ RavenRudi
RavenRudi

πŸ’» + + Rick van Hattem
Rick van Hattem

πŸš‡ Rishabh Bali
Rishabh Bali

πŸ’» Rishi Kumar Ray
Rishi Kumar Ray

πŸš‡ - - Riya Elizabeth John
Riya Elizabeth John
πŸ’» Riya Elizabeth John
Riya Elizabeth John

πŸ’» ⚠️ πŸ“– Roman Lutz
Roman Lutz

πŸ“– Ronnie Llamado
Ronnie Llamado

πŸ“– Ryan Kuhns
Ryan Kuhns

πŸ’» πŸ“– βœ… πŸ’‘ πŸ€” πŸ‘€ ⚠️ Sagar Mishra
Sagar Mishra

πŸ› πŸ’» ⚠️ + + Sajaysurya Ganesh
Sajaysurya Ganesh

πŸ’» πŸ“– 🎨 πŸ’‘ πŸ€” ⚠️ βœ… Sami Alavi
Sami Alavi

πŸ’» 🚧 Samruddhi Navale
Samruddhi Navale

πŸ“– - - Sanjay Kumar
Sanjay Kumar

⚠️ Sanjeeb Dey
Sanjeeb Dey

🚧 Santiago Smith Silva
Santiago Smith Silva

πŸ’» Saransh Chopra
Saransh Chopra

πŸ“– πŸš‡ Satya Prakash Pattnaik
Satya Prakash Pattnaik

πŸ“– Saurabh Dasgupta
Saurabh Dasgupta

πŸ’» + + Sebastiaan Koel
Sebastiaan Koel

πŸ’» πŸ“– Sebastian Hagn
Sebastian Hagn

πŸ“– Shivam Pathak
Shivam Pathak

πŸ“– - - Shivansh Subramanian
Shivansh Subramanian

πŸ“– πŸ’» Shlok Sabarwal
Shlok Sabarwal
πŸ’» Shreesha M
Shreesha M

πŸ› πŸ’» ⚠️ Simon B.
Simon B.

πŸ’» Slava Shpitalny
Slava Shpitalny

🚧 Solomon Botchway
Solomon Botchway

🚧 + + Stanislav Khrapov
Stanislav Khrapov

πŸ’» Stijn J. Rotman
Stijn J. Rotman

πŸ’» πŸ“– Svea Marie Meyer
Svea Marie Meyer

πŸ“– πŸ’» - - TNTran92
TNTran92

πŸ’» Taisei Yamamoto
Taisei Yamamoto

πŸ’» Taiwo Owoseni
Taiwo Owoseni

πŸ’» Thach Le Nguyen
Thach Le Nguyen

πŸ’» ⚠️ TheMathcompay Widget Factory Team
TheMathcompay Widget Factory Team

πŸ“– Thomas Buckley-Houston
Thomas Buckley-Houston

πŸ› + + Tom Xu
Tom Xu

πŸ’» πŸ“– Tomas P. de Vasconcelos
Tomas P. de Vasconcelos

πŸ› πŸ’» Tomasz Chodakowski
Tomasz Chodakowski

πŸ’» πŸ“– πŸ› - - Tony Bagnall
Tony Bagnall

πŸ’» πŸ’Ό πŸ“– 🎨 πŸ“‹ πŸ” πŸ€” πŸ“† πŸ’¬ πŸ‘€ πŸ“’ πŸ”£ Utsav Kumar Tiwari
Utsav Kumar Tiwari

πŸ’» πŸ“– Vandit Tyagi
Vandit Tyagi

πŸ“– Vasudeva Kilaru
Vasudeva Kilaru

πŸ’» πŸ“– Viktor Dremov
Viktor Dremov

πŸ’» ViktorKaz
ViktorKaz

πŸ’» πŸ“– 🎨 + + Vincent Nicholson
Vincent Nicholson

πŸ’» Vyomkesh Vyas
Vyomkesh Vyas

πŸ’» πŸ“– πŸ’‘ ⚠️ Wayne Adams
Wayne Adams

πŸ“– - - William Templier
William Templier

πŸ“– William Zheng
William Zheng

πŸ’» ⚠️ Xinyu Wu
Xinyu Wu

πŸ› πŸ’» ⚠️ Yair Beer
Yair Beer

πŸ’» Yann Hallouard
Yann Hallouard

πŸ’» ⚠️ Yash Edake
Yash Edake

🚧 πŸ› + + Yash Khare
Yash Khare

πŸ’» πŸ“– Yash Lamba
Yash Lamba

πŸ’» Yi-Xuan Xu
Yi-Xuan Xu

πŸ’» ⚠️ 🚧 πŸ“– - - Zhen Shao
Zhen Shao

πŸ’» Ziyao Wei
Ziyao Wei

πŸ’» aa25desh
aa25desh

πŸ’» πŸ› abandus
abandus

πŸ€” πŸ’» adoherty21
adoherty21

πŸ› bethrice44
bethrice44

πŸ› πŸ’» πŸ‘€ ⚠️ + + big-o
big-o

πŸ’» ⚠️ 🎨 πŸ€” πŸ‘€ βœ… πŸ§‘β€πŸ« bobbys
bobbys

πŸ’» brett koonce
brett koonce

πŸ“– - - btrtts
btrtts

πŸ“– chizzi25
chizzi25

πŸ“ chrisholder
chrisholder

πŸ’» ⚠️ πŸ“– 🎨 πŸ’‘ ctl
ctl

πŸ› danbartl
danbartl

πŸ› πŸ’» πŸ‘€ πŸ“’ ⚠️ βœ… πŸ“Ή hamzahiqb
hamzahiqb

πŸš‡ + + hiqbal2
hiqbal2

πŸ“– jesellier
jesellier

πŸ’» jschemm
jschemm

πŸ’» - - kkoziara
kkoziara

πŸ’» πŸ› matteogales
matteogales

πŸ’» 🎨 πŸ€” oleskiewicz
oleskiewicz

πŸ’» πŸ“– ⚠️ pabworks
pabworks

πŸ’» ⚠️ patiently pending world peace
patiently pending world peace

πŸ’» raishubham1
raishubham1

πŸ“– + + simone-pignotti
simone-pignotti

πŸ’» πŸ› sophijka
sophijka

πŸ“– 🚧 sri1419
sri1419

πŸ’» - - tensorflow-as-tf
tensorflow-as-tf

πŸ’» vedazeren
vedazeren

πŸ’» ⚠️ vincent-nich12
vincent-nich12

πŸ’» diff --git a/sktime/base/_base_panel.py b/sktime/base/_base_panel.py index 0c04f3eaa2d..76dd7230a65 100644 --- a/sktime/base/_base_panel.py +++ b/sktime/base/_base_panel.py @@ -102,7 +102,15 @@ def _vectorize(self, methodname, **kwargs): return y_pred - def _fit_predict_boilerplate(self, X, y, cv, change_state, method): + def _fit_predict_boilerplate( + self, + X, + y, + cv, + change_state, + method, + return_type="single_y_pred", + ): """Boilerplate logic for fit_predict and fit_predict_proba.""" from sklearn.model_selection import KFold @@ -147,31 +155,86 @@ def _fit_predict_boilerplate(self, X, y, cv, change_state, method): X = convert( X, from_type=X_mtype, - to_type="nested_univ", + to_type=["pd-multiindex", "nested_univ"], as_scitype="Panel", store_behaviour="freeze", ) - if method == "predict_proba": - y_pred = np.empty([len(y), len(np.unique(y))]) + y_preds = [] + tt_ixx = [] + + if isinstance(X.index, pd.MultiIndex): + X_ix = X.index.get_level_values(0).unique() else: - y_pred = np.empty_like(y) - y_pred[:] = -1 - if isinstance(X, np.ndarray): - for tr_idx, tt_idx in cv.split(X): - X_train = X[tr_idx] - X_test = X[tt_idx] - y_train = y[tr_idx] - fitted_est = self.clone().fit(X_train, y_train) - y_pred[tt_idx] = getattr(fitted_est, method)(X_test) + X_ix = np.arange(len(X)) + + for tr_idx, tt_idx in cv.split(X_ix): + X_train = self._subset(X, tr_idx) + X_test = self._subset(X, tt_idx) + y_train = self._subset(y, tr_idx) + fitted_est = self.clone().fit(X_train, y_train) + y_preds.append(getattr(fitted_est, method)(X_test)) + tt_ixx.append(tt_idx) + + if return_type == "single_y_pred": + return self._pool(y_preds, tt_ixx, y) + else: + return y_preds + + def _subset(self, obj, ix): + """Subset input data by ix, for use in fit_predict_boilerplate. + + Parameters + ---------- + obj : pd.DataFrame or np.ndarray + if pd.DataFrame, instance index = first level of pd.MultiIndex + if np.ndarray, instance index = 0-th axis + ix : sklearn splitter index, e.g., ix, _ from KFold.split(X) + + Returns + ------- + obj_ix : obj subset by ix + """ + if isinstance(obj, np.ndarray): + return obj[ix] + if not isinstance(obj, (pd.DataFrame, pd.Series)): + raise ValueError("obj must be a pd.DataFrame, pd.Series, or np.ndarray") + if not isinstance(obj.index, pd.MultiIndex): + return obj.iloc[ix] else: - for tr_idx, tt_idx in cv.split(X): - X_train = X.iloc[tr_idx] - X_test = X.iloc[tt_idx] - y_train = y[tr_idx] - fitted_est = self.clone().fit(X_train, y_train) - y_pred[tt_idx] = getattr(fitted_est, method)(X_test) + ix_loc = obj.index.get_level_values(0).unique()[ix] + return obj.loc[ix_loc] + + def _pool(self, y_preds, tt_ixx, y): + """Pool predictions from cv splits, for use in fit_predict_boilerplate. + + Parameters + ---------- + y_preds : list of np.ndarray or pd.DataFrame + list of predictions from cv splits + tt_ixx : list of np.ndarray or pd.DataFrame + list of test indices from cv splits + Returns + ------- + y_pred : np.ndarray, pooled predictions + """ + y_pred = y_preds[0] + if isinstance(y_pred, (pd.DataFrame, pd.Series)): + for i in range(1, len(y_preds)): + y_pred = y_pred.combine_first(y_preds[i]) + y_pred = y_pred.reindex(y.index).fillna(-1) + else: + if y_pred.ndim == 1: + sh = y.shape + else: + sh = (y.shape[0], y_pred.shape[1]) + y_pred = -np.ones(sh, dtype=y.dtype) + for i, ix in enumerate(tt_ixx): + y_preds_i = y_preds[i] + if y_pred.ndim == 1: + y_preds_i = y_preds_i.reshape(-1) + y_pred[ix] = y_preds_i return y_pred def _check_convert_X_for_predict(self, X): diff --git a/sktime/classification/base.py b/sktime/classification/base.py index 4f09ca6da9e..032fc8a94b1 100644 --- a/sktime/classification/base.py +++ b/sktime/classification/base.py @@ -25,10 +25,9 @@ class name: BaseClassifier import time import numpy as np -import pandas as pd from sktime.base import BasePanelMixin -from sktime.datatypes import VectorizedDF, check_is_scitype, convert +from sktime.datatypes import VectorizedDF, check_is_scitype from sktime.utils.sklearn import is_sklearn_transformer from sktime.utils.validation import check_n_jobs from sktime.utils.validation._dependencies import _check_estimator_deps @@ -374,6 +373,7 @@ def fit_predict(self, X, y, cv=None, change_state=True): or of any other supported Panel mtype for list of mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : sktime compatible tabular data container, Table scitype 1D iterable, of shape [n_instances] or 2D iterable, of shape [n_instances, n_dimensions] @@ -381,20 +381,26 @@ class labels for fitting 0-th indices correspond to instance indices in X 1-st indices (if applicable) correspond to multioutput vector indices in X supported sktime types: np.ndarray (1D, 2D), pd.Series, pd.DataFrame + cv : None, int, or sklearn cross-validation object, optional, default=None - None : predictions are in-sample, equivalent to fit(X, y).predict(X) - cv : predictions are equivalent to fit(X_train, y_train).predict(X_test) - where multiple X_train, y_train, X_test are obtained from cv folds - returned y is union over all test fold predictions - cv test folds must be non-intersecting - int : equivalent to cv=KFold(cv, shuffle=True, random_state=x), - i.e., k-fold cross-validation predictions out-of-sample - random_state x is taken from self if exists, otherwise x=None + + * None : predictions are in-sample, equivalent to ``fit(X, y).predict(X)`` + * cv : predictions are equivalent to + ``fit(X_train, y_train).predict(X_test)``, where multiple + ``X_train``, ``y_train``, ``X_test`` are obtained from ``cv`` folds. + returned ``y`` is union over all test fold predictions, + ``cv`` test folds must be non-intersecting + * int : equivalent to ``cv=KFold(cv, shuffle=True, random_state=x)``, + i.e., k-fold cross-validation predictions out-of-sample, and where + ``random_state`` ``x`` is taken from ``self`` if exists, + otherwise ``x=None`` + change_state : bool, optional (default=True) - if False, will not change the state of the classifier, - i.e., fit/predict sequence is run with a copy, self does not change - if True, will fit self to the full X and y, - end state will be equivalent to running fit(X, y) + + * if False, will not change the state of the classifier, + i.e., fit/predict sequence is run with a copy, self does not change + * if True, will fit self to the full X and y, + end state will be equivalent to running fit(X, y) Returns ------- @@ -411,136 +417,6 @@ class labels for fitting X=X, y=y, cv=cv, change_state=change_state, method="predict" ) - def _fit_predict_boilerplate( - self, - X, - y, - cv, - change_state, - method, - return_type="single_y_pred", - ): - """Boilerplate logic for fit_predict and fit_predict_proba.""" - from sklearn.model_selection import KFold - - if isinstance(cv, int): - random_state = getattr(self, "random_state", None) - cv = KFold(cv, random_state=random_state, shuffle=True) - - if change_state: - self.reset() - est = self - else: - est = self.clone() - - if cv is None: - return getattr(est.fit(X, y), method)(X) - elif change_state: - self.fit(X, y) - - # we now know that cv is an sklearn splitter - X, y = self._internal_convert(X, y) - X_metadata = self._check_input( - X, y, return_metadata=self.METADATA_REQ_IN_CHECKS - ) - X_mtype = X_metadata["mtype"] - # Check this classifier can handle characteristics - self._check_capabilities(X_metadata) - - # handle single class case - if len(self._class_dictionary) == 1: - return self._single_class_y_pred(X) - - # Convert data to format easily usable for applying cv - if isinstance(X, np.ndarray): - X = convert( - X, - from_type=X_mtype, - to_type="numpy3D", - as_scitype="Panel", - store_behaviour="freeze", - ) - else: - X = convert( - X, - from_type=X_mtype, - to_type=["pd-multiindex", "nested_univ"], - as_scitype="Panel", - store_behaviour="freeze", - ) - - y_preds = [] - tt_ixx = [] - - for tr_idx, tt_idx in cv.split(X): - X_train = self._subset(X, tr_idx) - X_test = self._subset(X, tt_idx) - y_train = self._subset(y, tr_idx) - fitted_est = self.clone().fit(X_train, y_train) - y_preds.append(getattr(fitted_est, method)(X_test)) - tt_ixx.append(tt_idx) - - if return_type == "single_y_pred": - return self._pool(y_preds, tt_ixx, y) - else: - return y_preds - - def _subset(self, obj, ix): - """Subset input data by ix, for use in fit_predict_boilerplate. - - Parameters - ---------- - obj : pd.DataFrame or np.ndarray - if pd.DataFrame, instance index = first level of pd.MultiIndex - if np.ndarray, instance index = 0-th axis - ix : sklearn splitter index, e.g., ix, _ from KFold.split(X) - - Returns - ------- - obj_ix : obj subset by ix - """ - if isinstance(obj, np.ndarray): - return obj[ix] - if not isinstance(obj, (pd.DataFrame, pd.Series)): - raise ValueError("obj must be a pd.DataFrame, pd.Series, or np.ndarray") - if not isinstance(obj.index, pd.MultiIndex): - return obj.iloc[ix] - else: - ix_loc = obj.index.get_level_values(0).unique()[ix] - return obj.loc[ix_loc] - - def _pool(self, y_preds, tt_ixx, y): - """Pool predictions from cv splits, for use in fit_predict_boilerplate. - - Parameters - ---------- - y_preds : list of np.ndarray or pd.DataFrame - list of predictions from cv splits - tt_ixx : list of np.ndarray or pd.DataFrame - list of test indices from cv splits - - Returns - ------- - y_pred : np.ndarray, pooled predictions - """ - y_pred = y_preds[0] - if isinstance(y_pred, (pd.DataFrame, pd.Series)): - for i in range(1, len(y_preds)): - y_pred = y_pred.combine_first(y_preds[i]) - y_pred = y_pred.reindex(y.index).fillna(-1) - else: - if y_pred.ndim == 1: - sh = y.shape - else: - sh = (y.shape[0], y_pred.shape[1]) - y_pred = -np.ones(sh, dtype=y.dtype) - for i, ix in enumerate(tt_ixx): - y_preds_i = y_preds[i] - if y_pred.ndim == 1: - y_preds_i = y_preds_i.reshape(-1) - y_pred[ix] = y_preds_i - return y_pred - def fit_predict_proba(self, X, y, cv=None, change_state=True): """Fit and predict labels probabilities for sequences in X. @@ -564,6 +440,7 @@ def fit_predict_proba(self, X, y, cv=None, change_state=True): or of any other supported Panel mtype for list of mtypes, see datatypes.SCITYPE_REGISTER for specifications, see examples/AA_datatypes_and_datasets.ipynb + y : sktime compatible tabular data container, Table scitype 1D iterable, of shape [n_instances] or 2D iterable, of shape [n_instances, n_dimensions] @@ -571,18 +448,26 @@ class labels for fitting 0-th indices correspond to instance indices in X 1-st indices (if applicable) correspond to multioutput vector indices in X supported sktime types: np.ndarray (1D, 2D), pd.Series, pd.DataFrame + cv : None, int, or sklearn cross-validation object, optional, default=None - None : predictions are in-sample, equivalent to fit(X, y).predict(X) - cv : predictions are equivalent to fit(X_train, y_train).predict(X_test) - where multiple X_train, y_train, X_test are obtained from cv folds - returned y is union over all test fold predictions - cv test folds must be non-intersecting - int : equivalent to cv=Kfold(int), i.e., k-fold cross-validation predictions + + * None : predictions are in-sample, equivalent to ``fit(X, y).predict(X)`` + * cv : predictions are equivalent to + ``fit(X_train, y_train).predict(X_test)``, where multiple + ``X_train``, ``y_train``, ``X_test`` are obtained from ``cv`` folds. + returned ``y`` is union over all test fold predictions, + ``cv`` test folds must be non-intersecting + * int : equivalent to ``cv=KFold(cv, shuffle=True, random_state=x)``, + i.e., k-fold cross-validation predictions out-of-sample, and where + ``random_state`` ``x`` is taken from ``self`` if exists, + otherwise ``x=None`` + change_state : bool, optional (default=True) - if False, will not change the state of the classifier, - i.e., fit/predict sequence is run with a copy, self does not change - if True, will fit self to the full X and y, - end state will be equivalent to running fit(X, y) + + * if False, will not change the state of the classifier, + i.e., fit/predict sequence is run with a copy, self does not change + * if True, will fit self to the full X and y, + end state will be equivalent to running fit(X, y) Returns ------- diff --git a/sktime/classification/tests/test_all_classifiers.py b/sktime/classification/tests/test_all_classifiers.py index cd16cdfb552..f42b1637f4a 100644 --- a/sktime/classification/tests/test_all_classifiers.py +++ b/sktime/classification/tests/test_all_classifiers.py @@ -109,6 +109,11 @@ def test_classifier_output(self, estimator_instance, scenario): ) X_train_len = X_train_metadata["n_instances"] + # temp hack until _get_train_probs is implemented for all mtypes + if hasattr(X_train_len, "index"): + if isinstance(X_train_len.index, pd.MultiIndex): + return None + train_proba = estimator_instance._get_train_probs(X_train, y_train) assert isinstance(train_proba, np.ndarray) diff --git a/sktime/utils/_testing/scenarios_classification.py b/sktime/utils/_testing/scenarios_classification.py index 2274bdb06c5..796be310981 100644 --- a/sktime/utils/_testing/scenarios_classification.py +++ b/sktime/utils/_testing/scenarios_classification.py @@ -16,7 +16,11 @@ from sktime.base import BaseObject from sktime.registry import scitype from sktime.utils._testing.hierarchical import _make_hierarchical -from sktime.utils._testing.panel import _make_classification_y, _make_panel_X +from sktime.utils._testing.panel import ( + _make_classification_y, + _make_panel, + _make_panel_X, +) from sktime.utils._testing.scenarios import TestScenario # random seed for generating data to keep scenarios exactly reproducible @@ -117,6 +121,31 @@ def args(self): default_arg_sequence = ["fit", "predict", "predict", "predict"] +class ClassifierFitPredictThreeClasses(ClassifierTestScenario): + """Fit/predict with univariate panel X, pd-multiindex mtype, and three classes.""" + + _tags = { + "X_univariate": True, + "X_unequal_length": False, + "is_enabled": True, + "n_classes": 3, + } + + @property + def args(self): + y = _make_classification_y(n_instances=18, n_classes=3, random_state=RAND_SEED) + X = _make_panel(n_instances=18, n_timepoints=20, random_state=RAND_SEED, y=y) + X_test = _make_panel_X(n_instances=5, n_timepoints=20, random_state=RAND_SEED) + + return { + "fit": {"y": y, "X": X}, + "predict": {"X": X_test}, + } + + default_method_sequence = ["fit", "predict", "predict_proba", "decision_function"] + default_arg_sequence = ["fit", "predict", "predict", "predict"] + + class ClassifierFitPredictNumpy(ClassifierTestScenario): """Fit/predict with univariate panel X, numpy3D mtype, and labels y.""" @@ -216,6 +245,7 @@ def args(self): scenarios_classification = [ ClassifierFitPredict, + ClassifierFitPredictThreeClasses, ClassifierFitPredictNumpy, ClassifierFitPredictMultivariate, ClassifierFitPredictUnequalLength, @@ -224,6 +254,7 @@ def args(self): # same scenarios used for early classification scenarios_early_classification = [ ClassifierFitPredict, + ClassifierFitPredictThreeClasses, ClassifierFitPredictNumpy, ClassifierFitPredictMultivariate, ClassifierFitPredictUnequalLength,