In [None]:
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH

# Create a new Document
doc = Document()

# Title
title = doc.add_heading('Exploratory Data Analysis Report', level=1)
title.alignment = WD_ALIGN_PARAGRAPH.CENTER

subtitle = doc.add_paragraph('Solar Energy Potential in Benin, Sierra Leone, and Togo')
subtitle.alignment = WD_ALIGN_PARAGRAPH.CENTER
p = subtitle.style.paragraph_format
p.space_after = Pt(20)

# Executive Summary
doc.add_heading('1. Executive Summary', level=2)
doc.add_paragraph(
    "MoonLight Energy Solutions aims to identify high-potential regions for solar installation across West Africa using environmental data from three countries: "
    "Benin Malanville, Sierra Leone Bumbuna, and Togo Dapaong QC. This report presents an exploratory data analysis (EDA) of each dataset, focusing on trends, correlations, "
    "outliers, and key relationships between solar irradiance, ambient temperature, humidity, and wind conditions."
)

# Section: Data Overview
doc.add_heading('2. Data Overview', level=2)
doc.add_paragraph("Each dataset contains one year of hourly measurements, totaling 525,600 rows per country.")
doc.add_paragraph("Key variables include:")
points = [
    "GHI: Global Horizontal Irradiance",
    "DNI: Direct Normal Irradiance",
    "DHI: Diffuse Horizontal Irradiance",
    "Tamb: Ambient Temperature",
    "RH: Relative Humidity",
    "WS/WSgust: Wind Speed and Gusts",
    "Sensor readings (ModA, ModB)",
    "Barometric Pressure (BP)",
    "Cleaning flag and Precipitation"
]
for point in points:
    p = doc.add_paragraph(point, style='List Bullet')

# Missing Values
doc.add_heading('3. Missing Values', level=2)
doc.add_paragraph(
    "All datasets have complete data except for the 'Comments' column, which is entirely missing and can be safely dropped."
)

# Outlier Detection
doc.add_heading('4. Outlier Detection & Data Cleaning', level=2)
doc.add_paragraph(
    "Using Z-scores (>3 standard deviations), outliers were identified and removed:"
)
table = doc.add_table(rows=1, cols=4)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Country'
hdr_cells[1].text = 'Total Rows'
hdr_cells[2].text = 'Outlier Rows'
hdr_cells[3].text = '% Removed'

data = (
    ('Benin', '525,600', '7,740', '1.47%'),
    ('Sierra Leone', '525,600', '16,292', '3.09%'),
    ('Togo', '525,600', '9,251', '1.76%')
)

for row in data:
    row_cells = table.add_row().cells
    for i in range(4):
        row_cells[i].text = row[i]

doc.add_paragraph(
    "Outlier removal rates were relatively low, making dropping them acceptable for now. Future work could explore Winsorization or imputation if needed.",
    style='Intense Quote'
)

# Time Series Analysis
doc.add_heading('5. Time Series Analysis', level=2)
doc.add_paragraph(
    "All regions show clear daily and seasonal patterns in solar irradiance:\n"
    "- GHI peaks during midday and drops to zero at night.\n"
    "- Dry season months exhibit higher irradiance levels due to clearer skies.\n"
    "- Rainy season shows reduced irradiance due to cloud cover and precipitation."
)

doc.add_paragraph(
    "Benin and Togo show higher average GHI values compared to Sierra Leone, suggesting better solar potential.",
    style='Intense Quote'
)

# Correlation Heatmaps
doc.add_heading('6. Correlation Insights', level=2)
doc.add_paragraph(
    "Strong correlations observed between key variables:\n"
    "- GHI has strong positive correlation with DNI and DHI (>0.8)\n"
    "- TModA and TModB are nearly perfectly correlated (>0.98)\n"
    "- GHI and ambient temperature also show moderate positive correlation (~0.6–0.7)"
)

doc.add_paragraph(
    "This confirms that GHI can serve as the main proxy for solar radiation in modeling.",
    style='Intense Quote'
)

# Wind Rose Plots
doc.add_heading('7. Wind Rose Analysis', level=2)
doc.add_paragraph(
    "Dominant wind directions vary slightly but do not pose significant risk to solar panel installations:\n"
    "- Benin: Northerly and Northeast winds\n"
    "- Sierra Leone: Predominantly Northerly\n"
    "- Togo: Northeast and East winds\n"
    "Wind speeds are consistently below 5 m/s across all regions, indicating generally calm conditions."
)

doc.add_paragraph(
    "Recommendation: Orient panels perpendicular to dominant wind direction to optimize cooling and reduce mechanical stress.",
    style='Intense Quote'
)

# Bubble Chart Analysis
doc.add_heading('8. Bubble Chart: GHI vs Tamb with RH', level=2)
doc.add_paragraph(
    "GHI increases with temperature up to ~35°C, then plateaus or declines.\n"
    "Relative Humidity (color gradient) shows inverse relationship with GHI:\n"
    "- High RH correlates with lower GHI (likely due to cloud cover)\n"
    "- Low RH correlates with high GHI (clear skies)"
)

doc.add_paragraph(
    "Optimal solar generation occurs at temperatures between 25°C and 35°C with RH < 50%.",
    style='Intense Quote'
)

# Strategic Recommendation
doc.add_heading('9. Strategic Regional Comparison', level=2)
table = doc.add_table(rows=1, cols=5)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Rank'
hdr_cells[1].text = 'Region'
hdr_cells[2].text = 'Avg GHI'
hdr_cells[3].text = 'Humidity'
hdr_cells[4].text = 'Notes'

data = (
    ('1', 'Benin Malanville', '240.56 W/m²', 'Moderate (54%)', 'Most stable conditions'),
    ('2', 'Togo Dapaong QC', '230.56 W/m²', 'Moderate (55%)', 'Slightly more variability'),
    ('3', 'Sierra Leone Bumbuna', '201.96 W/m²', 'High (79%)', 'Frequent anomalies')
)

for rank, region, avg_ghi, hum, notes in data:
    cells = table.add_row().cells
    cells[0].text = rank
    cells[1].text = region
    cells[2].text = avg_ghi
    cells[3].text = hum
    cells[4].text = notes

# Final Business Strategy
doc.add_heading('10. Business Strategy & Recommendations', level=2)
strategies = [
    "Focus large-scale installations in Benin and Togo due to consistent solar irradiance and favorable weather conditions.",
    "Use historical data to forecast energy production and manage grid load during rainy seasons.",
    "Implement real-time monitoring systems for Tamb, RH, and WS to predict dips in solar output.",
    "Use either ModA or ModB consistently since both sensors show near-perfect agreement.",
    "Include humidity in predictive models — it's a major factor reducing solar radiation.",
    "Consider hybrid solar-wind systems in Togo where moderate winds occur from multiple directions.",
    "Regular maintenance and cleaning recommended based on the 'Cleaning' flag in the dataset."
]

for strategy in strategies:
    p = doc.add_paragraph(strategy, style='List Bullet')

# Save the document
doc.save('Solar_EDA_Strategic_Report.docx')
print("✅ Report saved as 'Solar_EDA_Strategic_Report.docx'")

✅ Report saved as 'Solar_EDA_Strategic_Report.docx'


: 