### 1) Définissons un whyqd Schéma pour décrire et spécifier les critères de validation de notre jeu de données.

In [1]:
import whyqd as _w
schema = _w.Schema()

In [2]:
details = {
        "name": "Carbon_dioxide_emissions_and_stocks",
        "title": "Carbon dioxide emissions and stocks in the world",
        "description": """
        Le dioxyde de carbone, aussi appelé gaz carbonique ou anhydride carbonique. À partir d'une certaine
        concentration dans l'air, ce gaz s'avère dangereux voire mortel à cause du risque d'asphyxie ou d'acidose
        Les propriétés du dioxyde de carbone furent étudiées plus en détail dans les années 1750 par le chimiste 
        et physicien écossais Joseph Black. Le dioxyde de carbone est l'un des premiers gaz (avec la vapeur d'eau) 
        à avoir été décrit comme étant une substance distincte de l'air."""
}
schema = _w.Schema()
schema.set_details(**details)

In [3]:
schema

Schema: `carbon_dioxide_emissions_and_stocks`

In [4]:
fields = [
    {
        "name": "Country Name",
        "title": "Country Name",
        "type": "string",
        "description": "Official country names.",
        "constraints": {
            "required": True
        }
    },
    {
        "name": "HDI Category",
        "title": "HDI Category",
        "type": "string",
        "description": "Human Development Index Category derived from the HDI Rank.",
    },
    {
        "name": "Indicator Name",
        "title": "Indicator Name",
        "type": "string",
        "description": "Indicator described in the data series.",
    },
    {
        "name": "Reference",
        "title": "Reference",
        "type": "string",
        "description": "Reference to data source.",
    },
    {
        "name": "Values",
        "title": "Values",
        "type": "number",
        "description": "Value for the Year and Indicator Name.",
        "constraints": {
            "required": True
        }
    },
]

In [5]:
for field in fields:
    schema.set_field(**field)

In [6]:
schema.default_field_types

['string',
 'number',
 'integer',
 'boolean',
 'object',
 'array',
 'date',
 'datetime',
 'year']

In [7]:
schema.field("country_name")

{'name': 'country_name',
 'type': 'string',
 'constraints': {'required': True},
 'title': 'Country Name',
 'description': 'Official country names.'}

In [8]:
# Enregistrons notre schéma dans un répertoire spécifique

directory = "C:/Users/HP 840 G2/Desktop/DCDJ/Cours_Boyera/Exercice_Leçon_2_2"

In [9]:
# Spécification d'un nom de fichier facultatif
filename = "Table_24-schema"

In [10]:
# Verification de l'existence du fichier
schema.save(directory, filename=filename, overwrite=True)

True

In [11]:
schema.all_field_names

['country_name', 'hdi_category', 'indicator_name', 'reference', 'values']

### 2) Morph et méthode pour transformer les données sources

In [12]:
# Les importations et paramètres suivants vous permettent d'obtenir un large éventail de résultats pour vos tableaux

from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

import numpy as np
import whyqd as _w

SCHEMA_SOURCE = "C:/Users/HP 840 G2/Desktop/DCDJ/Cours_Boyera/Exercice_Leçon_2_2Table_24-schema.json"
DIRECTORY = "C:/Users/HP 840 G2/Desktop/DCDJ/Cours_Boyera/Exercice_Leçon_2_2/"
INPUT_DATA = [
    "HDR 2007-2008 Table 24.xlsx"
]
method = _w.Method(SCHEMA_SOURCE, directory=DIRECTORY, input_data=INPUT_DATA)

In [13]:
print(method.print_input_data())



Data id: 33e7fdb1-4eff-4934-9004-b43f400591d4
Original source: HDR 2007-2008 Table 24.xlsx

  ..  Unnamed: 0                                Unnamed: 1  Unnamed: 2                                      Unnamed: 3    Unnamed: 4    Unnamed: 5    Unnamed: 6    Unnamed: 7    Unnamed: 8    Unnamed: 9    Unnamed: 10    Unnamed: 11    Unnamed: 12    Unnamed: 13    Unnamed: 14    Unnamed: 15    Unnamed: 16    Unnamed: 17    Unnamed: 18    Unnamed: 19    Unnamed: 20    Unnamed: 21    Unnamed: 22    Unnamed: 23    Unnamed: 24    Unnamed: 25    Unnamed: 26    Unnamed: 27
   0  nan                                              nan  …while preserving it for future generations…           nan           nan           nan           nan           nan           nan           nan            nan            nan            nan            nan            nan            nan            nan            nan            nan            nan            nan            nan            nan            nan            nan      

In [14]:
method.status

'Ready to Merge'

In [15]:
method.default_morph_types

['CATEGORISE', 'DEBLANK', 'DEDUPE', 'DELETE', 'MELT', 'REBASE', 'RENAME']

In [16]:
_id = method.input_data[0]["id"]
df = method.input_dataframe(_id)
df.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27
0,,,…while preserving it for future generations…,,,,,,,,...,,,,,,,,,,
1,24 Carbon dioxide emissions and stocks,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,Carbon dioxide emissionsa,,,,,,,,...,,,,,,,,,,
4,,,Total,,,,Annual change,,Share of \nworld totalb,,...,,,Carbon intensity \nof growth\nCO2 emissions \n...,,,,Carbon dioxide emissions from forest biomassc,,Carbon stocks in forest biomassd,


In [17]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27
0,,,…while preserving it for future generations…,,,,,,,,...,,,,,,,,,,
1,24 Carbon dioxide emissions and stocks,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,Carbon dioxide emissionsa,,,,,,,,...,,,,,,,,,,
4,,,Total,,,,Annual change,,Share of \nworld totalb,,...,,,Carbon intensity \nof growth\nCO2 emissions \n...,,,,Carbon dioxide emissions from forest biomassc,,Carbon stocks in forest biomassd,
5,,,(Mt CO2),,,,(%),,\n(%),,...,,,(kt of CO2 per million\n2000 PPP US$),,,,(Mt CO2 / year),,(Mt Carbon),
6,HDI rank,,1990,,2004.0,,1990-2004,,1990,,...,2004.0,,1990,,2004.0,,1990-2005,,2005,
7,HIGH HUMAN DEVELOPMENT,,,,,,,,,,...,,,,,,,,,,
8,1,Iceland,2.0,,2.2,,0.7,,(.),,...,0.64,,0.32,,0.24,,-0.1,,1.5,
9,2,Norway,33.2,,87.5,,11.7,,0.1,,...,3.17,,0.31,,0.53,,-15.6,,344.0,


In [18]:
method.add_input_data_morph(_id, ["REBASE", 7])

In [19]:
rows = [int(i) for i in np.arange(188, df.index[-1]+1)]
method.add_input_data_morph(_id, ["DELETE", rows])

In [20]:
df.tail()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27
214,NOTES,,,,,,,,,,...,,,,,,,,,,
215,,,,,,,,,,,...,,,,,,,,,,
216,a. Refers to carbon dioxide emissions stemming...,,,,,,,,,,...,,,,,,,,,,
217,,,,,,,,,,,...,,,,,,,,,,
218,"SOURCES\nColumns 1, 2 and 4—7: calculated base...",,,,,,,,,,...,,,,,,,,,,


In [21]:
df = method.input_dataframe(_id)
df.tail(20)

Unnamed: 0,HIGH HUMAN DEVELOPMENT,NaN,Unnamed: 3,NaN.1,NaN.2,NaN.3,NaN.4,NaN.5,NaN.6,NaN.7,...,NaN.8,NaN.9,NaN.10,NaN.11,NaN.12,NaN.13,NaN.14,NaN.15,NaN.16,NaN.17
167,158,Nigeria,45.3,,114.0,,10.8,,0.2,,...,0.5,,0.9,0.64,1.15,0.59,,0.92,181.6,1401.5
168,159,Tanzania (United Republic of),2.3,,4.3,,6.2,,(.),,...,0.1,,0.1,0.24,0.23,0.17,,0.18,167.3,2254.0
169,160,Guinea,1.0,,1.3,,2.3,,(.),,...,0.2,,0.1,..,..,0.09,,0.07,15.9,636.0
170,161,Rwanda,0.5,,0.6,,0.6,,(.),,...,0.1,,0.1,..,..,0.07,,0.06,-2.1,44.1
171,162,Angola,4.6,,7.9,,5.0,,(.),,...,0.5,,0.7,0.74,0.83,0.25,,0.29,37.6,4829.3
172,163,Benin,0.7,,2.4,,16.7,,(.),,...,0.1,,0.3,0.43,0.96,0.16,,0.29,..,..
173,164,Malawi,0.6,,1.0,,5.3,,(.),,...,0.1,,0.1,..,..,0.13,,0.14,5.6,161.0
174,165,Zambia,2.4,,2.3,,-0.5,,(.),,...,0.3,,0.2,0.45,0.33,0.31,,0.23,44.4,1156.1
175,166,Côte d'Ivoire,5.4,,5.2,,-0.3,,(.),,...,0.5,,0.3,1.22,0.74,0.26,,0.2,-9.0,1864.0
176,167,Burundi,0.2,,0.2,,0.9,,(.),,...,(.),,(.),..,..,0.04,,0.05,..,..


In [22]:
columns = [
    "HDI rank",
    "Country",
    "Total (Mt CO2)_1990",
    "Reference 1",
    "Total (Mt CO2)_2004",
    "Reference 2",
    "Annual change (%)_1990-2004",
    "Reference 3",
    "Share of world total (%)_1990",
    "Reference 4",
    "Share of world total (%)_2004",
    "Per capita (t CO2)_1990",
    "Reference 5",
    "Per capita (t CO2)_2004",
    "Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_1990",
    "Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_2004",
    "Carbon intensity of growth (kt of CO2 per million 2000 PPP US$)_1990",
    "Reference 6",
    "Carbon intensity of growth (kt of CO2 per million 2000 PPP US$)_2004",
    "Carbon dioxide emissions from forest biomass (Mt CO2 / year)_1990-2005",
    "Carbon stocks in forest biomass (Mt Carbon)_2005"
]

In [23]:
method.add_input_data_morph(_id, ["RENAME", columns])

In [24]:
df = method.input_dataframe(_id)
df.head()

Unnamed: 0,HDI rank,Country,Total (Mt CO2)_1990,Reference 1,Total (Mt CO2)_2004,Reference 2,Annual change (%)_1990-2004,Reference 3,Share of world total (%)_1990,Reference 4,...,Per capita (t CO2)_1990,Reference 5,Per capita (t CO2)_2004,Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_1990,Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_2004,Carbon intensity of growth (kt of CO2 per million 2000 PPP US$)_1990,Reference 6,Carbon intensity of growth (kt of CO2 per million 2000 PPP US$)_2004,Carbon dioxide emissions from forest biomass (Mt CO2 / year)_1990-2005,Carbon stocks in forest biomass (Mt Carbon)_2005
8,1,Iceland,2.0,,2.2,,0.7,,(.),,...,7.9,,7.6,0.93,0.64,0.32,,0.24,-0.1,1.5
9,2,Norway,33.2,,87.5,,11.7,,0.1,,...,7.8,,19.1,1.54,3.17,0.31,,0.53,-15.6,344.0
10,3,Australia,278.5,,326.6,,1.2,,1.2,,...,16.3,,16.2,3.18,2.82,0.81,,0.58,..,8339.0
11,4,Canada,415.8,,639.0,,3.8,,1.8,,...,15.0,,20.0,1.99,2.38,0.66,,0.69,..,..
12,5,Ireland,30.6,,42.3,,2.7,,0.1,,...,8.8,,10.5,2.94,2.78,0.55,,0.31,-1.0,19.8


In [25]:
hdi_categories = ["HIGH HUMAN DEVELOPMENT", "MEDIUM HUMAN DEVELOPMENT", "LOW HUMAN DEVELOPMENT"]
rows = df[df["HDI rank"].isin(hdi_categories)].index

In [26]:
rows

Int64Index([78, 164], dtype='int64')

In [27]:
# df.fillna(method='ffill')

In [28]:
# df.fillna(method='ffill', inplace=True)

In [29]:
method.default_morph_settings("CATEGORISE")

{'name': 'CATEGORISE',
 'title': 'Categorise',
 'type': 'morph',
 'description': 'Convert row-level categories into column categorisations.',
 'structure': ['rows', 'column_names']}

In [30]:
method.default_morph_settings("DEBLANK")

{'name': 'DEBLANK',
 'title': 'De-blank',
 'type': 'morph',
 'description': 'Remove all blank columns and rows from a DataFrame.',
 'structure': []}

In [31]:
method.add_input_data_morph(_id, ["CATEGORISE", list(rows), "HDI category"])

In [32]:
df = method.input_dataframe(_id)
df.head()

Unnamed: 0,HDI rank,Country,Total (Mt CO2)_1990,Reference 1,Total (Mt CO2)_2004,Reference 2,Annual change (%)_1990-2004,Reference 3,Share of world total (%)_1990,Reference 4,...,Reference 5,Per capita (t CO2)_2004,Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_1990,Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_2004,Carbon intensity of growth (kt of CO2 per million 2000 PPP US$)_1990,Reference 6,Carbon intensity of growth (kt of CO2 per million 2000 PPP US$)_2004,Carbon dioxide emissions from forest biomass (Mt CO2 / year)_1990-2005,Carbon stocks in forest biomass (Mt Carbon)_2005,HDI category
8,1,Iceland,2.0,,2.2,,0.7,,(.),,...,,7.6,0.93,0.64,0.32,,0.24,-0.1,1.5,
9,2,Norway,33.2,,87.5,,11.7,,0.1,,...,,19.1,1.54,3.17,0.31,,0.53,-15.6,344.0,
10,3,Australia,278.5,,326.6,,1.2,,1.2,,...,,16.2,3.18,2.82,0.81,,0.58,..,8339.0,
11,4,Canada,415.8,,639.0,,3.8,,1.8,,...,,20.0,1.99,2.38,0.66,,0.69,..,..,
12,5,Ireland,30.6,,42.3,,2.7,,0.1,,...,,10.5,2.94,2.78,0.55,,0.31,-1.0,19.8,


In [33]:
# Regroupons les indicateurs suivant la methode "melt"
columns = [
    "HDI rank",
    "Total (Mt CO2)_1990",
    "Total (Mt CO2)_2004",
    "Annual change (%)_1990-2004",
    "Share of world total (%)_1990",
    "Share of world total (%)_2004",
    "Per capita (t CO2)_1990",
    "Per capita (t CO2)_2004",
    "Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_1990",
    "Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_2004",
    "Carbon intensity of growth (kt of CO2 per million 2000 PPP US$)_1990",
    "Carbon intensity of growth (kt of CO2 per million 2000 PPP US$)_2004",
    "Carbon dioxide emissions from forest biomass (Mt CO2 / year)_1990-2005",
    "Carbon stocks in forest biomass (Mt Carbon)_2005"
]
method.add_input_data_morph(_id, ["MELT", columns, ["Indicator Name", "Indicator Value"]])

In [34]:
# Regroupons les réferences suivant la methode "melt"
columns = [
    "Reference 1",
    "Reference 2",
    "Reference 3",
    "Reference 4",
    "Reference 5",
    "Reference 6",
]
method.add_input_data_morph(_id, ["MELT", columns, ["Reference Name", "Reference"]])

In [35]:
method.add_input_data_morph(_id, ["DEBLANK"])

In [36]:
df = method.input_dataframe(_id)
df.head()

Unnamed: 0,Indicator Name,Country,HDI category,Indicator Value,Reference Name,Reference
0,HDI rank,Iceland,,1,Reference 1,
1,HDI rank,Norway,,2,Reference 1,
2,HDI rank,Australia,,3,Reference 1,
3,HDI rank,Canada,,4,Reference 1,
4,HDI rank,Ireland,,5,Reference 1,


In [37]:
df["HDI category"].fillna(method='ffill', inplace=True)

# df["HDI category"].fillna(method='backfill', inplace=True)

df.head(100)

Unnamed: 0,Indicator Name,Country,HDI category,Indicator Value,Reference Name,Reference
0,HDI rank,Iceland,,1,Reference 1,
1,HDI rank,Norway,,2,Reference 1,
2,HDI rank,Australia,,3,Reference 1,
3,HDI rank,Canada,,4,Reference 1,
4,HDI rank,Ireland,,5,Reference 1,
...,...,...,...,...,...,...
95,HDI rank,Georgia,MEDIUM HUMAN DEVELOPMENT,96,Reference 1,i
96,HDI rank,Guyana,MEDIUM HUMAN DEVELOPMENT,97,Reference 1,
97,HDI rank,Azerbaijan,MEDIUM HUMAN DEVELOPMENT,98,Reference 1,i
98,HDI rank,Sri Lanka,MEDIUM HUMAN DEVELOPMENT,99,Reference 1,


In [38]:
print(method.help("structure"))


`structure` is the core of the wrangling process and is the process where you define the actions
which must be performed to restructure your working data.

Create a list of methods of the form:

	{
		"schema_field1": ["action", "column_name1", ["action", "column_name2"]],
		"schema_field2": ["action", "column_name1", "modifier", ["action", "column_name2"]],
	}

The format for defining a `structure` is as follows::

	[action, column_name, [action, column_name]]

e.g.::

	["CATEGORISE", "+", ["ORDER", "column_1", "column_2"]]

This permits the creation of quite expressive wrangling structures from simple building
blocks.

The schema for this method consists of the following terms:

['country_name', 'hdi_category', 'indicator_name', 'reference', 'values']

The actions:

['CALCULATE', 'CATEGORISE', 'JOIN', 'NEW', 'ORDER', 'ORDER_NEW', 'ORDER_OLD', 'RENAME']

The columns from your working data:

[]


Current method status: `Ready to Merge`


In [39]:
%time method.merge(overwrite_working=True)

Wall time: 5.59 s


In [40]:
print(method.help("structure"))


`structure` is the core of the wrangling process and is the process where you define the actions
which must be performed to restructure your working data.

Create a list of methods of the form:

	{
		"schema_field1": ["action", "column_name1", ["action", "column_name2"]],
		"schema_field2": ["action", "column_name1", "modifier", ["action", "column_name2"]],
	}

The format for defining a `structure` is as follows::

	[action, column_name, [action, column_name]]

e.g.::

	["CATEGORISE", "+", ["ORDER", "column_1", "column_2"]]

This permits the creation of quite expressive wrangling structures from simple building
blocks.

The schema for this method consists of the following terms:

['country_name', 'hdi_category', 'indicator_name', 'reference', 'values']

The actions:

['CALCULATE', 'CATEGORISE', 'JOIN', 'NEW', 'ORDER', 'ORDER_NEW', 'ORDER_OLD', 'RENAME']

The columns from your working data:

['Indicator Name', 'Country', 'HDI category', 'Indicator Value', 'Reference Name', 'Reference']

In [41]:
print(method.help())


**whyqd** provides data wrangling simplicity, complete audit transparency, and at speed.

To get help, type:

	>>> method.help(option)

Where `option` can be any of:

	status
	merge
	structure
	category
	filter
	transform

`status` will return the current method status, and your mostly likely next steps. The other options
will return methodology, and output of that option's result (if appropriate). The `error` will
present an error trace and attempt to guide you to fix the process problem.

Current method status: `Ready to Structure`


In [42]:
df.dtypes

Indicator Name     object
Country            object
HDI category       object
Indicator Value    object
Reference Name     object
Reference          object
dtype: object

In [43]:
structure = {
    "country_name": ["RENAME", "Country"],
    "hdi_category": ["RENAME", "HDI category"],
    "indicator_name": ["RENAME", "Indicator Name"],
    "values": ["RENAME", "Indicator Value"],
}

In [44]:
method.set_structure(**structure)

In [45]:
method.transform(overwrite_output=True)
FILENAME = "Table_24"
method.save(directory, filename=FILENAME, overwrite=True)

In [46]:
method.input_data_morphs(_id)

[{'f84f8cac-352e-4923-aa3d-2c162ff06ac3': ['DEBLANK']},
 {'fff4860b-b957-4ce7-af28-443e76a87fdd': ['DEDUPE']},
 {'420f2029-303e-4be3-b539-353c33dc88e3': ['REBASE', [7]]},
 {'2d06b89e-73bb-4f91-be5a-31e38592b771': ['DELETE',
   [188,
    189,
    190,
    191,
    192,
    193,
    194,
    195,
    196,
    197,
    198,
    199,
    200,
    201,
    202,
    203,
    204,
    205,
    206,
    207,
    208,
    209,
    210,
    211,
    212,
    213,
    214,
    215,
    216,
    217,
    218]]},
 {'c0a09b42-0c90-499a-a1c0-08b29ce1a9b4': ['RENAME',
   ['HDI rank',
    'Country',
    'Total (Mt CO2)_1990',
    'Reference 1',
    'Total (Mt CO2)_2004',
    'Reference 2',
    'Annual change (%)_1990-2004',
    'Reference 3',
    'Share of world total (%)_1990',
    'Reference 4',
    'Share of world total (%)_2004',
    'Per capita (t CO2)_1990',
    'Reference 5',
    'Per capita (t CO2)_2004',
    'Carbon intensity of energy (kt of CO2 per kt of oil equivalent)_1990',
    'Carbon in

In [47]:
%time method.validates

Wall time: 4.58 s


True

### 3) Validation et manipulation via l'utilisation de PandasSchema pour examiner nos données de sortie

In [49]:
import pandas as pd
import numpy as np

source = "C:/Users/HP 840 G2/Desktop/DCDJ/Cours_Boyera/Exercice_Leçon_2_2/output_29b8dbae-9b1d-4eb7-943c-0c354ba4fd85.csv"

df = pd.read_csv(source)
df.head()

Unnamed: 0,reference,country_name,hdi_category,indicator_name,values
0,,Iceland,,HDI rank,1
1,,Norway,,HDI rank,2
2,,Australia,,HDI rank,3
3,,Canada,,HDI rank,4
4,,Ireland,,HDI rank,5


In [50]:
from pandas_schema import Column, Schema
from pandas_schema.validation import LeadingWhitespaceValidation, TrailingWhitespaceValidation, IsDtypeValidation, InListValidation

# Nous ne testerons que ces colonnes
columns = ["country_name", "hdi_category", "values"]
# Et ces catégories
hdi_categories = ["HIGH HUMAN DEVELOPMENT", "MEDIUM HUMAN DEVELOPMENT", "LOW HUMAN DEVELOPMENT"]

In [51]:
schema = Schema([
    Column("country_name", [LeadingWhitespaceValidation(), TrailingWhitespaceValidation()]),
    Column("hdi_category", [InListValidation(hdi_categories)]),
    Column("values", [IsDtypeValidation(np.dtype(float)), IsDtypeValidation(np.dtype(int))])
])

In [52]:
errors = schema.validate(df[columns])

print(F"Nombre d'erreurs :  {len(errors)}")
# Juste les 10 premiers
for error in errors[:10]:
    print(error)

Nombre d'erreurs :  1094
The column values has a dtype of object which is not a subclass of the required type float64
The column values has a dtype of object which is not a subclass of the required type int32
{row: 0, column: "hdi_category"}: "nan" is not in the list of legal options (HIGH HUMAN DEVELOPMENT, MEDIUM HUMAN DEVELOPMENT, LOW HUMAN DEVELOPMENT)
{row: 1, column: "hdi_category"}: "nan" is not in the list of legal options (HIGH HUMAN DEVELOPMENT, MEDIUM HUMAN DEVELOPMENT, LOW HUMAN DEVELOPMENT)
{row: 2, column: "hdi_category"}: "nan" is not in the list of legal options (HIGH HUMAN DEVELOPMENT, MEDIUM HUMAN DEVELOPMENT, LOW HUMAN DEVELOPMENT)
{row: 3, column: "hdi_category"}: "nan" is not in the list of legal options (HIGH HUMAN DEVELOPMENT, MEDIUM HUMAN DEVELOPMENT, LOW HUMAN DEVELOPMENT)
{row: 4, column: "hdi_category"}: "nan" is not in the list of legal options (HIGH HUMAN DEVELOPMENT, MEDIUM HUMAN DEVELOPMENT, LOW HUMAN DEVELOPMENT)
{row: 5, column: "hdi_category"}: "nan" i

### 4) Présentons notre citation et dressons la liste des fichiers, métadonnées et informations que nous avons l'intention de publier dans le cadre de cet exercice.

In [53]:
for l in method.citation.split(","):
    print(l)

2020-05-25
 Carbon dioxide emissions and stocks in the world
 21cadea18a1affce2ebdac9893b43f4f1b02e386575b45f073c71eebbe996d073bbb7a549b13bed5aca22277979bb8e8a0108fc802735df7073f9fec70cfe2b9
 [input sources: HDR 2007-2008 Table 24.xlsx
 862508122dcac562b598d1d9f178384b8ccd627d24573491e0f597a2eacae4a41ff7f5e909a31a9a06ac3a58017a9c07b1f2d62fdf7b17b8029fdf06baf268bf]
