In [38]:
# Tratamiento de datos
# -----------------------------------------------------------------------
import pandas as pd
import numpy as np

# Visualización
# ------------------------------------------------------------------------------
import matplotlib.pyplot as plt
import seaborn as sns

# Evaluar linealidad de las relaciones entre las variables
# y la distribución de las variables
# ------------------------------------------------------------------------------
#import scipy.stats as stats
import scipy.stats as stats
from scipy.stats import shapiro, kstest

# Configuración
# -----------------------------------------------------------------------
pd.set_option('display.max_columns', None) # para poder visualizar todas las columnas de los DataFrames 


# Gestión de los warnings
# -----------------------------------------------------------------------
import warnings
warnings.filterwarnings("ignore")

## Global Renewable Energy Production (2000-2023)
[Source](https://www.kaggle.com/datasets/ahmedgaitani/global-renewable-energy?resource=download)

Dataset Units: Gwh

Columns:
- Year: The year of data collection (e.g., 2000, 2001, etc.).
- Country: The name of the country.
- SolarEnergy: Annual solar energy production in gigawatt-hours (GWh).
- WindEnergy: Annual wind energy production in gigawatt-hours (GWh).
- HydroEnergy: Annual hydro energy production in gigawatt-hours (GWh).
- OtherRenewableEnergy: Annual energy production from other renewable sources (e.g., geothermal, biomass) in gigawatt-hours (GWh).
- TotalRenewableEnergy: Total annual renewable energy production in gigawatt-hours (GWh).

In [39]:
df_general = pd.read_csv("data/raw/global_renewable_energy_production.csv")
df_general.head()

Unnamed: 0,Year,Country,SolarEnergy,WindEnergy,HydroEnergy,OtherRenewableEnergy,TotalRenewableEnergy
0,2000,USA,437.086107,1435.928598,1544.389701,319.396318,3736.800724
1,2001,USA,240.416776,402.792876,398.742141,439.779266,1481.731059
2,2002,USA,641.003511,1120.494351,334.99364,486.459433,2582.950935
3,2003,USA,849.198377,476.040844,609.102444,132.532029,2066.873694
4,2004,USA,373.818019,882.183361,1034.306532,181.053113,2471.361025


In [3]:
df_general.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240 entries, 0 to 239
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Year                  240 non-null    int64  
 1   Country               240 non-null    object 
 2   SolarEnergy           240 non-null    float64
 3   WindEnergy            240 non-null    float64
 4   HydroEnergy           240 non-null    float64
 5   OtherRenewableEnergy  240 non-null    float64
 6   TotalRenewableEnergy  240 non-null    float64
dtypes: float64(5), int64(1), object(1)
memory usage: 13.3+ KB


In [4]:
df_general.isna().sum()/df_general.shape[0]*100

Year                    0.0
Country                 0.0
SolarEnergy             0.0
WindEnergy              0.0
HydroEnergy             0.0
OtherRenewableEnergy    0.0
TotalRenewableEnergy    0.0
dtype: float64

In [5]:
df_general.duplicated().sum()

0

In [6]:
for col in df_general.columns:
    print(col)
    print(df_general[col].unique())
    print('-'*50)

Year
[2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023]
--------------------------------------------------
Country
['USA' 'China' 'India' 'Germany' 'UK' 'France' 'Brazil' 'Canada'
 'Australia' 'Japan']
--------------------------------------------------
SolarEnergy
[437.08610696 240.4167764  641.00351057 849.19837672 373.81801866
 650.66760525 510.4629858  633.17311198 158.54643369 374.15239226
 209.83441136 332.90198344 592.03925141 945.54904741 179.64325185
 449.80956072 352.84105872 167.09557931 104.96990541 794.14331202
 876.79308329 379.88408954 898.49146832 784.70654376 570.45954644
 128.28626712 916.80982653 305.91834894 936.72788711 823.30486921
 826.69613965 305.14164629 106.25691748 207.8788306  566.91155957
 966.20256545 356.35644494 146.33087612 230.40538488 704.92199267
 431.00481945 181.26079305 136.6976274  560.88375247 721.84396429
 406.95971595 332.14746494 576.68552052 910.37625145 753.36011098
 67

In [None]:
df_general.to_csv("data/usable/global_renewable_energy_production.csv")


## Global primary energy consumption by source (1990-2023)
[Source](https://ourworldindata.org/grapher/global-energy-substitution?time=1990..2023)

- Dataset Units: Gwh
- Consumption

In [2]:
df_countries = pd.read_csv("data/raw/global-energy-substitution.csv")

In [3]:
df_countries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76 entries, 0 to 75
Data columns (total 13 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   Entity                                         76 non-null     object 
 1   Code                                           76 non-null     object 
 2   Year                                           76 non-null     int64  
 3   Other renewables (TWh, substituted energy)     76 non-null     float64
 4   Biofuels (TWh, substituted energy)             76 non-null     float64
 5   Solar (TWh, substituted energy)                76 non-null     float64
 6   Wind (TWh, substituted energy)                 76 non-null     float64
 7   Hydropower (TWh, substituted energy)           76 non-null     float64
 8   Nuclear (TWh, substituted energy)              76 non-null     float64
 9   Gas (TWh, substituted energy)                  76 non-nu

In [8]:
df_countries.head()

Unnamed: 0,Entity,Code,Year,"Other renewables (TWh, substituted energy)","Biofuels (TWh, substituted energy)","Solar (TWh, substituted energy)","Wind (TWh, substituted energy)","Hydropower (TWh, substituted energy)","Nuclear (TWh, substituted energy)","Gas (TWh, substituted energy)","Oil (TWh, substituted energy)","Coal (TWh, substituted energy)","Traditional biomass (TWh, substituted energy)"
0,World,OWID_WRL,1800,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,97.0,5556
1,World,OWID_WRL,1810,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,128.0,5833
2,World,OWID_WRL,1820,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,153.0,6111
3,World,OWID_WRL,1830,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,264.0,6389
4,World,OWID_WRL,1840,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,356.0,6944


In [4]:
df_countries.isna().sum()/df_countries.shape[0]*100

Entity                                           0.0
Code                                             0.0
Year                                             0.0
Other renewables (TWh, substituted energy)       0.0
Biofuels (TWh, substituted energy)               0.0
Solar (TWh, substituted energy)                  0.0
Wind (TWh, substituted energy)                   0.0
Hydropower (TWh, substituted energy)             0.0
Nuclear (TWh, substituted energy)                0.0
Gas (TWh, substituted energy)                    0.0
Oil (TWh, substituted energy)                    0.0
Coal (TWh, substituted energy)                   0.0
Traditional biomass (TWh, substituted energy)    0.0
dtype: float64

In [6]:
df_countries.duplicated().sum()

np.int64(0)

In [9]:
df_countries['Entity'].unique()

array(['World'], dtype=object)

In [10]:
df_world = df_countries[df_countries['Year'] >= 1990]

In [11]:
df_world['Year'].unique()

array([1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
       2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
       2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022,
       2023], dtype=int64)

In [12]:
df_world.to_csv('data/usable/global_energy_consumption.csv')

## Dataset: Renewable Energy World Wide : 1965~2022 
[Source](https://www.kaggle.com/datasets/belayethossainds/renewable-energy-world-wide-19652022/data?select=01+renewable-share-energy.csv)

### `12-solar-energy-consumption.csv`:

Ideas:
- Hacer un ranking de países o continentes que más producen y más consumen (una otra o las dos)
- Electricity from solar (TWh): potencia consumida que se ha generado a partir de energía solar.
- `definición`: energía de origen solar consumida al año. Hay dos tecnologías para energía solar: (1) mayoritaria: fotovotaica que es con paneles solares y (2) termosolar: mucho menos utilizada.


In [7]:
df_solar_consump = pd.read_csv("data/raw/12-solar-energy-consumption.csv")

df_solar_consump.head()

Unnamed: 0,Entity,Code,Year,Electricity from solar (TWh)
0,Afghanistan,AFG,2000,0.0
1,Afghanistan,AFG,2001,0.0
2,Afghanistan,AFG,2002,0.0
3,Afghanistan,AFG,2003,0.0
4,Afghanistan,AFG,2004,0.0


In [8]:
df_solar_consump.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8683 entries, 0 to 8682
Data columns (total 4 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Entity                        8683 non-null   object 
 1   Code                          7227 non-null   object 
 2   Year                          8683 non-null   int64  
 3   Electricity from solar (TWh)  8683 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 271.5+ KB


In [9]:
df_solar_consump.isna().sum()/df_solar_consump.shape[0]*100

Entity                           0.000000
Code                            16.768398
Year                             0.000000
Electricity from solar (TWh)     0.000000
dtype: float64

In [10]:
df_solar_consump['Entity'][df_solar_consump['Code'].isna()].unique()

array(['Africa', 'Africa (BP)', 'Africa (Ember)', 'Asia', 'Asia (Ember)',
       'Asia Pacific (BP)', 'CIS (BP)', 'Central America (BP)',
       'Eastern Africa (BP)', 'Europe', 'Europe (BP)', 'Europe (Ember)',
       'European Union (27)', 'European Union (27) (Ember)',
       'G20 (Ember)', 'G7 (Ember)', 'High-income countries',
       'Latin America and Caribbean (Ember)', 'Low-income countries',
       'Lower-middle-income countries', 'Middle Africa (BP)',
       'Middle East (BP)', 'Non-OECD (BP)', 'North America',
       'North America (BP)', 'North America (Ember)', 'OECD (BP)',
       'OECD (Ember)', 'Oceania', 'Oceania (Ember)', 'South America',
       'South and Central America (BP)', 'Upper-middle-income countries',
       'Western Africa (BP)'], dtype=object)

--> no nos cargamos datos, por si acaso, y ya luego filtraremos en Tableau

In [11]:
df_solar_consump[df_solar_consump["Code"].notna()] 

Unnamed: 0,Entity,Code,Year,Electricity from solar (TWh)
0,Afghanistan,AFG,2000,0.00
1,Afghanistan,AFG,2001,0.00
2,Afghanistan,AFG,2002,0.00
3,Afghanistan,AFG,2003,0.00
4,Afghanistan,AFG,2004,0.00
...,...,...,...,...
8678,Zimbabwe,ZWE,2017,0.01
8679,Zimbabwe,ZWE,2018,0.02
8680,Zimbabwe,ZWE,2019,0.03
8681,Zimbabwe,ZWE,2020,0.03


In [12]:
df_solar_consump['Electricity from solar (TWh)'].describe()

count    8683.000000
mean        5.277942
std        40.098862
min         0.000000
25%         0.000000
50%         0.000000
75%         0.010000
max      1040.500000
Name: Electricity from solar (TWh), dtype: float64

In [19]:
#creamos columna GWh para poder compararla luego con la capacity que está en esta unidad
df_solar_consump['Electricity from solar (GWh)'] = df_solar_consump['Electricity from solar (TWh)']/1000

In [14]:
df_solar_consump.duplicated().sum()

np.int64(0)

In [16]:
df_solar_consump.isnull().sum()/df_solar_consump.shape[0]*100

Entity                           0.000000
Code                            16.768398
Year                             0.000000
Electricity from solar (TWh)     0.000000
dtype: float64

In [20]:
df_solar_consump.to_csv('data/usable/territorial_solar_consumption.csv')

### `13-installed-solar-PV-capacity.csv`:

Ideas:
- Solar Capacity: no tenemos ni idea en qué unidades está esto. En principio decimos que son GWp (Giga Watt pico). Al estar por países/zonas, debería ser así, ya que los MW se quedarían cortos.


- `definición`: potencia solar fotovoltaica (no hay termosolar) instalada por territorio. No puede ser GWh , le sobra la hora porque estamos ante una medida de potencia no una medida de energía (la energía si lleva la hora).

In [18]:
df_solar_installed = pd.read_csv('data/raw/13-installed-solar-PV-capacity.csv')

In [19]:
df_solar_installed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1659 entries, 0 to 1658
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Entity          1659 non-null   object 
 1   Code            1243 non-null   object 
 2   Year            1659 non-null   int64  
 3   Solar Capacity  1659 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 52.0+ KB


In [22]:
#mirando los datos de españa de esta tabla y comparándolos con los datos de España del 2021, 
# llegamos a la conclusion de que esta medida son GW y que en en el 2021 en la fuente contrastada son 14,8 GW
df_solar_installed[df_solar_installed['Entity'] == 'Spain']

Unnamed: 0,Entity,Code,Year,Solar Capacity
1325,Spain,ESP,1996,0.001
1326,Spain,ESP,1997,0.001
1327,Spain,ESP,1998,0.001
1328,Spain,ESP,1999,0.002
1329,Spain,ESP,2000,0.01
1330,Spain,ESP,2001,0.013
1331,Spain,ESP,2002,0.017
1332,Spain,ESP,2003,0.022
1333,Spain,ESP,2004,0.033
1334,Spain,ESP,2005,0.052


In [23]:
df_solar_installed.head()

Unnamed: 0,Entity,Code,Year,Solar Capacity
0,Africa,,1996,0.001112
1,Africa,,1997,0.003137
2,Africa,,1998,0.005162
3,Africa,,1999,0.008199
4,Africa,,2000,0.010927


In [45]:
df_solar_installed.isna().sum()/df_solar_installed.shape[0]*100

Entity                  0.000000
Code                   25.075347
Year                    0.000000
Solar Capacity (GW)     0.000000
dtype: float64

In [51]:
df_solar_installed['Entity'][df_solar_installed["Code"].isna()]

0                              Africa
1                              Africa
2                              Africa
3                              Africa
4                              Africa
                    ...              
1606    Upper-middle-income countries
1607    Upper-middle-income countries
1608    Upper-middle-income countries
1609    Upper-middle-income countries
1610    Upper-middle-income countries
Name: Entity, Length: 416, dtype: object

In [24]:
df_solar_installed.rename(columns = {'Solar Capacity': 'Solar Capacity (GW)'}, inplace = True)

In [25]:
df_solar_installed.to_csv('data/usable/territorial_solar_capacity.csv')

----> hacer en tableau: `Merge consumption// capacity solar`:

- potencia real consumida entre el máximo de capacidad solar de cada territorio.

- Para que tengan ambas unidades coincidentes: la potencia hay que dividira entre 1000 para sacar la proporción.

### `15-share-electricity-solar.csv`:

- Solar (% electricity): porcentaje sobre total de electricidad producida. Me comenta mi padre que podría ser de electricidad consumida en lugar de producida, pero que normalmente cuando se habla en genérico se entiende que es `producida o generada` y ya luego la consumida puede variar un poco en función de lo exportado/importado, pero suele ir bastante parejo con lo producido.

In [23]:
df_elec_solar = pd.read_csv('data/raw/15-share-electricity-solar.csv')

In [27]:
#Buscamos España igualmente, para hacer la comparación con datos reales de otras fuentes, y es correcto.
df_elec_solar[df_elec_solar['Entity'] == 'Spain']

Unnamed: 0,Entity,Code,Year,Solar (% electricity)
5800,Spain,ESP,1985,0.0
5801,Spain,ESP,1986,0.0
5802,Spain,ESP,1987,0.0
5803,Spain,ESP,1988,0.0
5804,Spain,ESP,1989,0.004121
5805,Spain,ESP,1990,0.006613
5806,Spain,ESP,1991,0.006462
5807,Spain,ESP,1992,0.006382
5808,Spain,ESP,1993,0.006433
5809,Spain,ESP,1994,0.006215


In [28]:
df_elec_solar.head()

Unnamed: 0,Entity,Code,Year,Solar (% electricity)
0,Afghanistan,AFG,2000,0.0
1,Afghanistan,AFG,2001,0.0
2,Afghanistan,AFG,2002,0.0
3,Afghanistan,AFG,2003,0.0
4,Afghanistan,AFG,2004,0.0


In [24]:
df_elec_solar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6871 entries, 0 to 6870
Data columns (total 4 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Entity                 6871 non-null   object 
 1   Code                   5781 non-null   object 
 2   Year                   6871 non-null   int64  
 3   Solar (% electricity)  6871 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 214.8+ KB


In [48]:
df_elec_solar.isna().sum()/df_elec_solar.shape[0]*100

Entity                    0.000000
Code                     15.863775
Year                      0.000000
Solar (% electricity)     0.000000
dtype: float64

In [25]:
df_elec_solar.duplicated().sum()

np.int64(0)

In [29]:
df_elec_solar.to_csv('data/usable/territorial_solar_electricity.csv')

### `08-wind-generation.csv`:
 

- `definición`: energía eólica generada en un año y en un territorio específico

In [26]:
df_generation_wind = pd.read_csv('data/raw/08-wind-generation.csv')

In [31]:
df_generation_wind.head()

Unnamed: 0,Entity,Code,Year,Electricity from wind (TWh)
0,Afghanistan,AFG,2000,0.0
1,Afghanistan,AFG,2001,0.0
2,Afghanistan,AFG,2002,0.0
3,Afghanistan,AFG,2003,0.0
4,Afghanistan,AFG,2004,0.0


In [28]:
df_generation_wind.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8676 entries, 0 to 8675
Data columns (total 4 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Entity                       8676 non-null   object 
 1   Code                         7217 non-null   object 
 2   Year                         8676 non-null   int64  
 3   Electricity from wind (TWh)  8676 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 271.3+ KB


In [27]:
df_generation_wind.isna().sum()/df_generation_wind.shape[0]*100

Entity                          0.000000
Code                           16.816505
Year                            0.000000
Electricity from wind (TWh)     0.000000
dtype: float64

In [33]:
df_generation_wind['Year'].unique()

array([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
       2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021,
       1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975,
       1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,
       1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
       1998, 1999, 2022], dtype=int64)

In [29]:
df_generation_wind.duplicated().sum()

np.int64(0)

In [34]:
df_generation_wind[df_generation_wind['Entity'] == "Spain"]

Unnamed: 0,Entity,Code,Year,Electricity from wind (TWh)
7278,Spain,ESP,1965,0.0
7279,Spain,ESP,1966,0.0
7280,Spain,ESP,1967,0.0
7281,Spain,ESP,1968,0.0
7282,Spain,ESP,1969,0.0
7283,Spain,ESP,1970,0.0
7284,Spain,ESP,1971,0.0
7285,Spain,ESP,1972,0.0
7286,Spain,ESP,1973,0.0
7287,Spain,ESP,1974,0.0


In [35]:
df_generation_wind.to_csv('data/usable/territorial_wind_generation.csv')

Mismo número de nulos en Code que en los de energía solar. Los datos de España y las unidades cuadran con otras fuentes.

### `11-share-electricity-wind.csv`:
- Wind (% electricity): porcentaje total de viento sobre total de electricidad producida


In [30]:
df_elec_wind = pd.read_csv('data/raw/11-share-electricity-wind.csv')

In [31]:
df_elec_wind.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6871 entries, 0 to 6870
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Entity                6871 non-null   object 
 1   Code                  5781 non-null   object 
 2   Year                  6871 non-null   int64  
 3   Wind (% electricity)  6871 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 214.8+ KB


In [37]:
df_elec_wind.head()

Unnamed: 0,Entity,Code,Year,Wind (% electricity)
0,Afghanistan,AFG,2000,0.0
1,Afghanistan,AFG,2001,0.0
2,Afghanistan,AFG,2002,0.0
3,Afghanistan,AFG,2003,0.0
4,Afghanistan,AFG,2004,0.0


In [32]:
df_elec_wind.duplicated().sum()

np.int64(0)

In [33]:
df_elec_wind.isna().sum()/df_elec_wind.shape[0]*100

Entity                   0.000000
Code                    15.863775
Year                     0.000000
Wind (% electricity)     0.000000
dtype: float64

In [38]:
df_elec_wind.to_csv('data/usable/territorial_wind_electricity.csv')

In [39]:
df_elec_wind[(df_elec_wind['Entity'] == "Spain") & (df_elec_wind['Year'] == 2022)]

Unnamed: 0,Entity,Code,Year,Wind (% electricity)
5837,Spain,ESP,2022,21.716232


Los datos cuadran con otras fuentes.

Podría ser interesante ver la variación de generación en el tiempo (para esta y otras) sólo para los países que consideremos "clave" en la producción de energías renovables.

### `09-cumulative-installed-wind-energy-capacity-gigawatts`:

- `definición`: potencia eólica instalada por territorio en GW.

- Máximo que se puede generar a cierta fecha: es decir, total de potencia instalada a lo largo del tiempo.
- preguntas de viento : cuánto produce, capacidad de producir.. nos centramos más en la capacidad y producción que en el consumo, porque no tenemos ese dato.

In [34]:
df_installed_wind = pd.read_csv('data/raw/09-cumulative-installed-wind-energy-capacity-gigawatts.csv')

In [35]:
df_installed_wind.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1540 entries, 0 to 1539
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Entity         1540 non-null   object 
 1   Code           1143 non-null   object 
 2   Year           1540 non-null   int64  
 3   Wind Capacity  1540 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 48.3+ KB


In [36]:
df_installed_wind.isna().sum()/df_installed_wind.shape[0]*100

Entity            0.000000
Code             25.779221
Year              0.000000
Wind Capacity     0.000000
dtype: float64

In [37]:
df_installed_wind.duplicated().sum()

np.int64(0)

In [41]:
#Buscamos España igualmente, para hacer la comparación con datos reales:.
df_installed_wind[(df_installed_wind['Entity'] == 'Spain')]

Unnamed: 0,Entity,Code,Year,Wind Capacity
1241,Spain,ESP,1997,0.512
1242,Spain,ESP,1998,0.723
1243,Spain,ESP,1999,1.408
1244,Spain,ESP,2000,2.206
1245,Spain,ESP,2001,3.397
1246,Spain,ESP,2002,4.891
1247,Spain,ESP,2003,5.945
1248,Spain,ESP,2004,8.317
1249,Spain,ESP,2005,9.918
1250,Spain,ESP,2006,11.722


In [42]:
df_installed_wind.head()

Unnamed: 0,Entity,Code,Year,Wind Capacity
0,Africa,,1997,0.006
1,Africa,,1998,0.01
2,Africa,,1999,0.064
3,Africa,,2000,0.13905
4,Africa,,2001,0.13905


In [43]:
df_installed_wind.to_csv('data/usable/territorial_wind_cumulative_capacity.csv')

Parece que los datos y las unidades para España cuadran con otras fuentes. Los nulos de code parece que tienen sentido, no existe el código para áreas que no sean países.