# Renewable Energy & AI - Green Skills Project
This notebook loads, cleans, and processes renewable energy data from Kaggle for EduNet Green Skills project.

In [None]:
# Install Kaggle API 
# !pip install kaggle

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler


In [None]:
# Download dataset from Kaggle (requires kaggle.json setup)
# Example dataset: sudalairajkumar/renewable-energy
# !kaggle datasets download -d sudalairajkumar/renewable-energy -p ./data --unzip

df = pd.read_csv('./data/renewable_energy.csv')  # Adjust filename after download
df.head()

In [None]:
# Explore dataset
df.info()
df.describe()
df.isnull().sum()

In [None]:
# Data Cleaning
df = df.dropna()
# Convert energy to consistent units if needed
# Example: df['energy_mwh'] = df['energy_gwh'] * 1000

# Standardize column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
df.head()

In [None]:
# Feature Engineering: Green Skills Context
if 'renewable_energy_production' in df.columns and 'total_energy_production' in df.columns:
    df['renewable_share'] = (df['renewable_energy_production'] / df['total_energy_production']) * 100

# Example growth calculation
if 'year' in df.columns and 'renewable_energy_production' in df.columns:
    df['yearly_growth'] = df.groupby('country')['renewable_energy_production'].pct_change() * 100

df.head()

In [None]:
# Save cleaned dataset
Path('./data').mkdir(exist_ok=True)
df.to_csv('./data/renewable_energy_clean.csv', index=False)
print('Clean dataset saved to data/renewable_energy_clean.csv')