In [7]:
# 📌 PHOTONIX: Solar Power Prediction Project
# Domain: AIML | Topic: Sustainable Energy & Efficiency

# -----------------------------
# 1. Problem Definition
# -----------------------------
# Goal: Predict solar panel power output (Watts)
# based on environmental & panel conditions.
# Features:
# - Sunlight_Hours 🌞
# - Temperature 🌡️
# - Cloud_Cover ☁️
# - Panel_Angle 📐
# - Humidity 💧
# - Wind_Speed 🍃
# Target:
# - Power_Output_Watts ⚡

# -----------------------------
# 2. Data Collection & Understanding
# -----------------------------
import pandas as pd

# Creating Photonix dataset
data = {
    "Sunlight_Hours": [6, 8, 7, 9, 10, 5, 11, 4, 8, 9],
    "Temperature":    [24, 27, 25, 29, 31, 22, 33, 20, 28, 30],
    "Cloud_Cover":    [30, 20, 25, 15, 10, 40, 5, 50, 18, 12],
    "Panel_Angle":    [25, 30, 20, 35, 30, 15, 40, 10, 28, 32],
    "Humidity":       [60, 55, 65, 50, 45, 70, 40, 75, 58, 48],
    "Wind_Speed":     [10, 12, 8, 14, 11, 9, 15, 7, 13, 12],
    "Power_Output_Watts": [220, 310, 280, 360, 400, 180, 420, 150, 330, 390]
}

df = pd.DataFrame(data)

# Save dataset as CSV (for portal submission)
df.to_csv("photonix_dataset.csv", index=False)
print("✅ Photonix dataset saved as photonix_dataset.csv")

# Display first 5 rows
print("\n📊 Sample Dataset:")
print(df.head())

# Basic dataset info
print("\n📈 Dataset Info:")
print(df.info())

# Summary statistics
print("\n🔎 Summary Statistics:")
print(df.describe())

# -----------------------------
# 3. Data Preprocessing
# -----------------------------
# Checking for missing values
print("\n❓ Missing Values Check:")
print(df.isnull().sum())

# -----------------------------
# 4. Data Splitting
# -----------------------------
from sklearn.model_selection import train_test_split

# Features (X) and Target (y)
X = df.drop("Power_Output_Watts", axis=1)
y = df["Power_Output_Watts"]

# Splitting dataset into Train (80%) & Test (20%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("\n✅ Data Split Done:")
print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape)

✅ Photonix dataset saved as photonix_dataset.csv

📊 Sample Dataset:
   Sunlight_Hours  Temperature  Cloud_Cover  Panel_Angle  Humidity  \
0               6           24           30           25        60   
1               8           27           20           30        55   
2               7           25           25           20        65   
3               9           29           15           35        50   
4              10           31           10           30        45   

   Wind_Speed  Power_Output_Watts  
0          10                 220  
1          12                 310  
2           8                 280  
3          14                 360  
4          11                 400  

📈 Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype
---  ------              --------------  -----
 0   Sunlight_Hours      10 non-null     int64
 1   Temperature         10 non-null