In [1]:
import pandas as pd
import numpy as np

data = {
    "Energy Source": ["Solar", "Wind", "Hydropower", "Geothermal", "Biomass", "Nuclear"],
    "Energy Consumption (MWh)": [1200, np.nan, 2900, np.nan, 2500, 3200],
    "Cost (Million $)": [200, 400, np.nan, 150, 250, np.nan]
}

energy_df = pd.DataFrame(data)

cleaned_df = energy_df.dropna()

cleaned_df.head()

print("Original DataFrame:")
energy_df.head()

Original DataFrame:


Unnamed: 0,Energy Source,Energy Consumption (MWh),Cost (Million $)
0,Solar,1200.0,200.0
1,Wind,,400.0
2,Hydropower,2900.0,
3,Geothermal,,150.0
4,Biomass,2500.0,250.0


In [2]:
cleaned_df=energy_df.dropna()

cleaned_df.head()

Unnamed: 0,Energy Source,Energy Consumption (MWh),Cost (Million $)
0,Solar,1200.0,200.0
4,Biomass,2500.0,250.0



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.



In [4]:
ec_mean=energy_df["Energy Consumption (MWh)"].mean()
ec_mean

2450.0

In [5]:
cost_mean=energy_df["Cost (Million $)"].mean()
cost_mean

250.0

In [6]:
energy_df["Energy Consumption (MWh)"].fillna(ec_mean,inplace=True)
energy_df["Cost (Million $)"].fillna(cost_mean,inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  energy_df["Energy Consumption (MWh)"].fillna(ec_mean,inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  energy_df["Cost (Million $)"].fillna(cost_mean,inplace=True)


In [7]:
energy_df.head()

Unnamed: 0,Energy Source,Energy Consumption (MWh),Cost (Million $)
0,Solar,1200.0,200.0
1,Wind,2450.0,400.0
2,Hydropower,2900.0,250.0
3,Geothermal,2450.0,150.0
4,Biomass,2500.0,250.0


In [10]:
forward_filled_df=energy_df.fillna(method="ffill")
print("Data before forward filling")
energy_df.head()

print("\nDataFrame after Forward Filling:")
forward_filled_df.head()

Data before forward filling

DataFrame after Forward Filling:


  forward_filled_df=energy_df.fillna(method="ffill")


Unnamed: 0,Energy Source,Energy Consumption (MWh),Cost (Million $)
0,Solar,1200.0,200.0
1,Wind,2450.0,400.0
2,Hydropower,2900.0,250.0
3,Geothermal,2450.0,150.0
4,Biomass,2500.0,250.0


In [11]:
energy_df["Missing Consumption"] = energy_df["Energy Consumption (MWh)"].isna().astype(int)
energy_df["Missing Cost"] = energy_df["Cost (Million $)"].isna().astype(int)

print("\nData with Missing Values Flagged:")
energy_df.head()


Data with Missing Values Flagged:


Unnamed: 0,Energy Source,Energy Consumption (MWh),Cost (Million $),Missing Consumption,Missing Cost
0,Solar,1200.0,200.0,0,0
1,Wind,2450.0,400.0,0,0
2,Hydropower,2900.0,250.0,0,0
3,Geothermal,2450.0,150.0,0,0
4,Biomass,2500.0,250.0,0,0


In [12]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

energy_df[["Energy Consumption (MWh)", "Cost (Million $)"]] = scaler.fit_transform(
    energy_df[["Energy Consumption (MWh)", "Cost (Million $)"]]
)
print("\nNormalized Data:")
energy_df.head()


Normalized Data:


Unnamed: 0,Energy Source,Energy Consumption (MWh),Cost (Million $),Missing Consumption,Missing Cost
0,Solar,0.0,0.2,0,0
1,Wind,0.625,1.0,0,0
2,Hydropower,0.85,0.4,0,0
3,Geothermal,0.625,0.0,0,0
4,Biomass,0.65,0.4,0,0


In [13]:
from sklearn.preprocessing import StandardScaler

scaler=StandardScaler()

energy_df[["Energy Consumption (MWh)","Cost (Million $)"]]=scaler.fit_transform(energy_df[["Energy Consumption (MWh)","Cost (Million $)"]])
print("\nStandardized Data:")
energy_df.head()


Standardized Data:


Unnamed: 0,Energy Source,Energy Consumption (MWh),Cost (Million $),Missing Consumption,Missing Cost
0,Solar,-2.005893,-0.6546537,0,0
1,Wind,3.563181e-16,1.963961,0,0
2,Hydropower,0.7221213,1.817029e-16,0,0
3,Geothermal,3.563181e-16,-1.309307,0,0
4,Biomass,0.0802357,1.817029e-16,0,0
