diff --git a/_doc/examples/README.txt b/_doc/examples/README.txt new file mode 100644 index 0000000..566f0e8 --- /dev/null +++ b/_doc/examples/README.txt @@ -0,0 +1,3 @@ +==================== +Galleries d'exemples +==================== diff --git a/_doc/examples/lectures/README.txt b/_doc/examples/lectures/README.txt new file mode 100644 index 0000000..962ff70 --- /dev/null +++ b/_doc/examples/lectures/README.txt @@ -0,0 +1,6 @@ + +.. _l-gallery-examples: + +Exemples longs +-------------- + diff --git a/_doc/examples/lectures/plot_nuage.py b/_doc/examples/lectures/plot_nuage.py new file mode 100644 index 0000000..6c86f96 --- /dev/null +++ b/_doc/examples/lectures/plot_nuage.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +""" +Corrélations +============ + +Dessine les corrélations pour un jeu de données. +""" + +############### +# A remplacer. + +import os +import sys +this = os.path.abspath(os.path.dirname(__file__)) +if "papierstat" in this: + this = this.split("papierstat")[0].rstrip("\\/") +for module in ["papierstat"]: + try: + exec("import %s" % module) + except ImportError: + p = os.path.join(this, module, "src") + sys.path.append(p) + exec("import %s" % module) + + +######################### +# Récupération des données + +from papierstat.datasets import load_wines_dataset +df = load_wines_dataset() +print(df.head(n=2).T) + +#################### +# Les corrélations avec :epkg:`seaborn`. + +from seaborn import clustermap + +clustermap(df.corr(), center=0, cmap="vlag", + linewidths=.75, figsize=(13, 13)) + +import matplotlib.pyplot as plt +plt.show() diff --git a/_doc/notebooks/lectures/wines_knn.ipynb b/_doc/notebooks/lectures/wines_knn.ipynb index 4c3d1d7..36f5b18 100644 --- a/_doc/notebooks/lectures/wines_knn.ipynb +++ b/_doc/notebooks/lectures/wines_knn.ipynb @@ -175,8 +175,8 @@ } ], "source": [ - "from papierstat.datasets import load_wines_datasets\n", - "df = load_wines_datasets()\n", + "from papierstat.datasets import load_wines_dataset\n", + "df = load_wines_dataset()\n", "df.head()" ] }, diff --git a/_doc/notebooks/lectures/wines_knn_eval.ipynb b/_doc/notebooks/lectures/wines_knn_eval.ipynb index 0ee6299..44710df 100644 --- a/_doc/notebooks/lectures/wines_knn_eval.ipynb +++ b/_doc/notebooks/lectures/wines_knn_eval.ipynb @@ -25,8 +25,8 @@ "metadata": {}, "outputs": [], "source": [ - "from papierstat.datasets import load_wines_datasets\n", - "df = load_wines_datasets()\n", + "from papierstat.datasets import load_wines_dataset\n", + "df = load_wines_dataset()\n", "X = df.drop(['quality', 'color'], axis=1)\n", "y = df['quality']" ] diff --git a/_doc/notebooks/lectures/wines_knn_split.ipynb b/_doc/notebooks/lectures/wines_knn_split.ipynb index 950c0a0..d6701ed 100644 --- a/_doc/notebooks/lectures/wines_knn_split.ipynb +++ b/_doc/notebooks/lectures/wines_knn_split.ipynb @@ -24,8 +24,8 @@ "metadata": {}, "outputs": [], "source": [ - "from papierstat.datasets import load_wines_datasets\n", - "df = load_wines_datasets()\n", + "from papierstat.datasets import load_wines_dataset\n", + "df = load_wines_dataset()\n", "X = df.drop(['quality', 'color'], axis=1)\n", "y = df['quality']" ] diff --git a/_doc/sphinxdoc/source/biblio.rst b/_doc/sphinxdoc/source/biblio.rst new file mode 100644 index 0000000..8a4e812 --- /dev/null +++ b/_doc/sphinxdoc/source/biblio.rst @@ -0,0 +1,15 @@ + +Bibliographie +============= + +*Anti-sèches* + +* `Anti-sèches de machine learning `_ + +*Blog* + +* `Freakeconometrics `_ + +*Livres* + +* `The Elements of Statistical Learning `_ diff --git a/_doc/sphinxdoc/source/index.rst b/_doc/sphinxdoc/source/index.rst index 69fc278..4cd9389 100644 --- a/_doc/sphinxdoc/source/index.rst +++ b/_doc/sphinxdoc/source/index.rst @@ -18,6 +18,7 @@ Petit voyage au pays du machine learning introduction rappel lectures/index + biblio api/index all_notebooks i_ex diff --git a/_doc/sphinxdoc/source/lectures/step1.rst b/_doc/sphinxdoc/source/lectures/step1.rst index f2f09c5..e69c9ad 100644 --- a/_doc/sphinxdoc/source/lectures/step1.rst +++ b/_doc/sphinxdoc/source/lectures/step1.rst @@ -36,8 +36,8 @@ ci-dessous, pour deux vins. :rst: from pyquickhelper.pandashelper import df2rst - from papierstat.datasets import load_wines_datasets - df = load_wines_datasets() + from papierstat.datasets import load_wines_dataset + df = load_wines_dataset() print(df2rst(df.head(n=2).T.reset_index(drop=False))) On part du principe que si deux vins différents ont la même @@ -75,14 +75,14 @@ Il peut être également obtenu avec le code suivant : :rst: from pyquickhelper.pandashelper import df2rst - from papierstat.datasets import load_wines_datasets - df = load_wines_datasets() + from papierstat.datasets import load_wines_dataset + df = load_wines_dataset() print(df2rst(df.head())) .. plot:: - from papierstat.datasets import load_wines_datasets - df = load_wines_datasets() + from papierstat.datasets import load_wines_dataset + df = load_wines_dataset() import matplotlib.pyplot as plt plt.close('all') diff --git a/_unittests/ut_datasets/test_wines.py b/_unittests/ut_datasets/test_wines.py index 33313d2..ddb508c 100644 --- a/_unittests/ut_datasets/test_wines.py +++ b/_unittests/ut_datasets/test_wines.py @@ -39,7 +39,7 @@ from pyquickhelper.loghelper import fLOG from pyquickhelper.pycode import ExtTestCase -from src.papierstat.datasets import load_wines_datasets +from src.papierstat.datasets import load_wines_dataset class TestWines(ExtTestCase): @@ -50,7 +50,7 @@ def test_wines_download(self): self._testMethodName, OutputPrint=__name__ == "__main__") - df = load_wines_datasets(download=True) + df = load_wines_dataset(download=True) self.assertEqual(df.shape, (6497, 13)) def test_wines_local(self): @@ -59,7 +59,7 @@ def test_wines_local(self): self._testMethodName, OutputPrint=__name__ == "__main__") - df = load_wines_datasets(download=False) + df = load_wines_dataset(download=False) self.assertEqual(df.shape, (6497, 13)) diff --git a/requirements.txt b/requirements.txt index 34a1e23..a2e4c9f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ openpyxl pandas_streaming pycodestyle>=2.0.0 pydocstyle -pyquickhelper>=1.6.2283 +pyquickhelper>=1.6.2284 sphinx-bootstrap-theme sphinx-gallery sphinx-redactor-theme diff --git a/src/papierstat/datasets/__init__.py b/src/papierstat/datasets/__init__.py index 0579412..b677644 100644 --- a/src/papierstat/datasets/__init__.py +++ b/src/papierstat/datasets/__init__.py @@ -16,7 +16,7 @@ def get_data_folder(): return os.path.abspath(this) -def load_wines_datasets(download=False): +def load_wines_dataset(download=False): """ Retourne le jeu de données `wines quality `_.