In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv("Solar_Power_Generation.csv")
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100457 entries, 0 to 100456
Data columns (total 10 columns):
 #   Column                       Non-Null Count   Dtype  
---  ------                       --------------   -----  
 0   distance-to-solar-noon       100457 non-null  float64
 1   temperature                  100457 non-null  int64  
 2   wind-direction               100457 non-null  int64  
 3   wind-speed                   100457 non-null  float64
 4   sky-cover                    100457 non-null  int64  
 5   visibility                   100457 non-null  float64
 6   humidity                     100457 non-null  int64  
 7   average-wind-speed-(period)  100422 non-null  float64
 8   average-pressure-(period)    100457 non-null  float64
 9   power-generated              100457 non-null  int64  
dtypes: float64(5), int64(5)
memory usage: 7.7 MB


Unnamed: 0,distance-to-solar-noon,temperature,wind-direction,wind-speed,sky-cover,visibility,humidity,average-wind-speed-(period),average-pressure-(period),power-generated
0,0.859897,69,28,7.5,0,10.0,75,8.0,29.82,0
1,0.628535,69,28,7.5,0,10.0,77,5.0,29.85,0
2,0.397172,69,28,7.5,0,10.0,70,0.0,29.89,5418
3,0.16581,69,28,7.5,0,10.0,33,0.0,29.91,25477
4,0.065553,69,28,7.5,0,10.0,21,3.0,29.89,30069


In [5]:
def categorize_power(value):
    if value <= 10000:
        return 0 
    elif value <= 30000:
        return 1  
    else:
        return 2 

df['power_category'] = df['power-generated'].apply(categorize_power)

In [7]:
X = df.drop(columns=['power-generated', 'power_category'])
y = df['power_category']
X.fillna(X.mean(), inplace=True)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [13]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

Model Accuracy: 1.0000


In [16]:
from sklearn.preprocessing import StandardScaler
import joblib

# Create and fit scaler
scaler = StandardScaler()
scaler.fit(X_train)

# Save model and scaler
joblib.dump(model, "solar_model.pkl")
joblib.dump(scaler, "scaler.pkl")

# Verify feature names match app.py
print("Model expects features in this order:", X_train.columns.tolist())

Model expects features in this order: ['distance-to-solar-noon', 'temperature', 'wind-direction', 'wind-speed', 'sky-cover', 'visibility', 'humidity', 'average-wind-speed-(period)', 'average-pressure-(period)']
