In [19]:
# Step 1: Import necessary libraries
import pandas as pd

# Step 2: Load the dataset
file_path = "/content/renewable_energy_dataset (1).csv"
df = pd.read_csv(file_path)

# Step 3: View the first few rows to understand the structure
df.head()


Unnamed: 0,Datetime,Site,Energy_Type,Energy_Generated_MWh,Capacity_Utilization_%,Temperature_C,Cloud_Cover_%,Wind_Speed_m_s,Day_Type,Season
0,2023-01-01 16:30:00,Site_A,Solar,28.86,28.86,21.57,74.61,,Weekend,Winter
1,2023-01-01 09:15:00,Site_A,Wind,43.2,43.2,4.97,,8.0,Weekend,Winter
2,2023-01-01 06:00:00,Site_B,Solar,66.99,66.99,25.73,20.48,,Weekend,Winter
3,2023-01-01 17:45:00,Site_B,Wind,67.63,67.63,19.1,,12.56,Weekend,Winter
4,2023-01-01 07:00:00,Site_C,Solar,67.94,67.94,27.57,37.11,,Weekend,Winter


# Deep cleaning steps:
# 1. Parse datetime correctly
# 2. Check for missing values
# 3. Standardize column names
# 4. Remove duplicates
# 5. Handle missing data appropriately
# 6. Correct data types
# 7. Sort the dataset

In [20]:
# Step 4: Standardize column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

# Step 5: Parse 'datetime' properly
df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')

# Step 6: Drop rows where 'datetime' could not be parsed (if any)
df = df.dropna(subset=['datetime'])

# Step 7: Drop fully empty rows
df = df.dropna(how='all')

# Step 8: Drop fully empty columns
df = df.dropna(axis=1, how='all')

# Step 9: Remove duplicate rows
df = df.drop_duplicates()

# Step 10: Check and handle missing values
df['wind_speed_m_s'] = df.apply(
    lambda row: 0 if (row['energy_type'].lower() == 'solar' and pd.isna(row['wind_speed_m_s'])) else row['wind_speed_m_s'],
    axis=1
)

# Step 11: Fill missing 'cloud_cover_%' with median based on 'site' and 'energy_type'
df['cloud_cover_%'] = df.groupby(['site', 'energy_type'])['cloud_cover_%'].transform(
    lambda x: x.fillna(x.median())
)

# Step 12: Sort dataset by 'datetime'
df = df.sort_values('datetime')

# Step 13: Reset index
df = df.reset_index(drop=True)

# Step 14: Create new time-related columns
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.day_name()

# Step 15: Reorder columns logically
df = df[
    [
        'datetime', 'year', 'month', 'day', 'hour', 'day_of_week',
        'site', 'energy_type', 'energy_generated_mwh',
        'capacity_utilization_%', 'temperature_c', 'cloud_cover_%',
        'wind_speed_m_s', 'day_type', 'season'
    ]
]

# Step 16: Interpolate any remaining missing values in 'cloud_cover_%' linearly
df['cloud_cover_%'] = df['cloud_cover_%'].interpolate(method='linear', limit_direction='both')

# Step 17: Final check for missing values
missing_summary_final = df.isnull().sum()

# Step 18: Display the cleaned dataset's missing value summary
missing_summary_final


Unnamed: 0,0
datetime,0
year,0
month,0
day,0
hour,0
day_of_week,0
site,0
energy_type,0
energy_generated_mwh,0
capacity_utilization_%,0


#Here’s a quick summary of everything done:

#1.Standardized and cleaned column names.

#2.Proper datetime parsing and dropped unparseable records.

#3.Dropped empty rows and columns.

#4.Removed all duplicates.

#5.Filled missing wind_speed_m_s (0 for Solar).

# Filled missing cloud_cover_% (using group median, then interpolated smoothly).

# Added useful time features: year, month, day, hour, day_of_week.

# Reordered columns logically.

# Confirmed there are zero missing values left!

#📥 Step 2: Download the CSV file to your computer

In [21]:
# Import Colab's file module
from google.colab import files

# Define the path to the file you want to download
final_cleaned_path = "/content/cleaned_renewable_energy_data.csv"  # Example path, change as needed

# Save the DataFrame to a CSV file first
df.to_csv(final_cleaned_path, index=False) # Save the DataFrame 'df' as a CSV file. 'index=False' prevents saving the index.

# Download the file
files.download(final_cleaned_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#Let’s upgrade our saving process by adding today’s date automatically into the filename.
This way, every file you save will be uniquely named and well-organized!

In [22]:
import datetime


🗓️ Step 2: Generate today's date in YYYY-MM-DD format

In [23]:
# Get today's date
today = datetime.datetime.today().strftime('%Y-%m-%d')


💾 Step 3: Create the filename with the date

In [24]:
# Create the filename dynamically
final_cleaned_path = f'/content/renewable_energy_dataset_cleaned_{today}.csv'


📄 Step 4: Save the cleaned dataset

In [25]:
# Save to CSV
df.to_csv(final_cleaned_path, index=False)


📥 Step 5: Download the file

In [26]:
# Download the file
from google.colab import files
files.download(final_cleaned_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [27]:
# Set the path to the file you just moved
final_cleaned_path = '/content/renewable_energy_dataset_cleaned_2025-04-27.csv'



In [28]:
final_cleaned_path = '/content/renewable_energy_dataset_cleaned_2025-04-27.csv'


In [29]:
import shutil
import os

# Define the source and destination paths
src = "/content/renewable_energy_dataset_cleaned_2025-04-27.csv"
dst = "/content/destination_folder/renewable_energy_dataset_cleaned_2025-04-27.csv"  # Replace with your desired destination

# Create the destination directory if it doesn't exist
os.makedirs(os.path.dirname(dst), exist_ok=True)

# Move the uploaded file
shutil.copy(src, dst)

print("✅ File moved successfully!")

✅ File moved successfully!


In [30]:
# Set the path to the file you just moved
final_cleaned_path = '/content/renewable_energy_dataset_cleaned_2025-04-27.csv'


In [32]:
# Step 1: Import necessary libraries
import smtplib
import ssl
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.application import MIMEApplication

# Step 2: Set your Gmail address and App Password
sender_email = "rfondufe@gmail.com"         # Your Gmail
sender_password = "nzmzvxqarzrqgwok"         # Your App Password (no spaces)

# Step 3: Define a list of (Name, Email) pairs
contacts = [
    ("Faith", "Faithyesohe@gmail.com"),
    ("Ayomi", "ayomi26315@gmail.com"),
    ("Rotimi", "akinrinderotimi@gmail.com"),
    ("Stelz", "stelzjulagho@gmail.com"),
    ("Damilola", "agunbiadedamilola99@gmail.com"),
    ("Naaham", "Naaham24@gmail.com"),
    ("Etanimowo", "etanimowo@gmail.com"),
    ("Kehinde", "Kehinde.shoremekun@gmail.com"),
    ("Toyin", "toyinosemwenkhae@gmail.com"),
    ("Heyyo", "heeyyoo456@gmail.com")
]

# Step 4: File path to the cleaned CSV file
final_cleaned_path = '/content/renewable_energy_dataset_cleaned_2025-04-27.csv'

# Step 5: Email sending setup
context = ssl.create_default_context()

# Step 6: Prepare tracking lists
success_list = []
fail_list = []

# Step 7: Loop through each contact and send personalized email
with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as server:
    server.login(sender_email, sender_password)

    for name, email in contacts:
        try:
            # Create a new email message for each person
            message = MIMEMultipart()
            message["Subject"] = "🌱 Cleaned Renewable Energy Dataset Ready!"
            message["From"] = sender_email
            message["To"] = email

            # Personalized HTML body
            html = f"""
            <html>
            <body style="font-family: Arial, sans-serif;">

            <h2 style="color: #2E86C1;">Hello {name},</h2>

            <p>I hope you're doing well.</p>

            <p>Please find attached the <b>final cleaned renewable energy dataset</b> you requested.</p>

            <ul>
            <li>✔️ Fully deep cleaned</li>
            <li>✔️ No missing values</li>
            <li>✔️ Time features added</li>
            </ul>

            <p>Feel free to reach out if you have any questions or need further analysis.</p>

            <br>

            <p>Best regards,</p>
            <p><b>Fondufe</b></p>

            </body>
            </html>
            """

            # Attach HTML body
            message.attach(MIMEText(html, "html"))

            # Attach the CSV file
            with open(final_cleaned_path, "rb") as attachment:
                part = MIMEApplication(attachment.read(), Name=final_cleaned_path.split('/')[-1])
            part['Content-Disposition'] = f'attachment; filename="{final_cleaned_path.split("/")[-1]}"'
            message.attach(part)

            # Send the email
            server.sendmail(sender_email, email, message.as_string())

            # Track success
            success_list.append(email)
            print(f"✅ Email sent successfully to {name} ({email})")

        except Exception as e:
            # Track failure
            fail_list.append((email, str(e)))
            print(f"❌ Failed to send email to {name} ({email}): {e}")

# Step 8: Final report
print("\n📋 Email Sending Summary:")
print(f"✅ Successfully sent: {len(success_list)}")
for email in success_list:
    print(f"   - {email}")

if fail_list:
    print(f"\n❌ Failed to send: {len(fail_list)}")
    for email, error in fail_list:
        print(f"   - {email}: {error}")
else:
    print("\n🎉 All emails sent successfully!")


✅ Email sent successfully to Faith (Faithyesohe@gmail.com)
✅ Email sent successfully to Ayomi (ayomi26315@gmail.com)
✅ Email sent successfully to Rotimi (akinrinderotimi@gmail.com)
✅ Email sent successfully to Stelz (stelzjulagho@gmail.com)
✅ Email sent successfully to Damilola (agunbiadedamilola99@gmail.com)
✅ Email sent successfully to Naaham (Naaham24@gmail.com)
✅ Email sent successfully to Etanimowo (etanimowo@gmail.com)
✅ Email sent successfully to Kehinde (Kehinde.shoremekun@gmail.com)
✅ Email sent successfully to Toyin (toyinosemwenkhae@gmail.com)
✅ Email sent successfully to Heyyo (heeyyoo456@gmail.com)

📋 Email Sending Summary:
✅ Successfully sent: 10
   - Faithyesohe@gmail.com
   - ayomi26315@gmail.com
   - akinrinderotimi@gmail.com
   - stelzjulagho@gmail.com
   - agunbiadedamilola99@gmail.com
   - Naaham24@gmail.com
   - etanimowo@gmail.com
   - Kehinde.shoremekun@gmail.com
   - toyinosemwenkhae@gmail.com
   - heeyyoo456@gmail.com

🎉 All emails sent successfully!
