In [11]:

class WeatherForecaster:
    def __init__(self, data_path, pipeline_path="pipeline_ensemble.pkl"):
        self.data_path = data_path
        self.pipeline_path = pipeline_path
        self.df, _ = self._wrangle(data_path, 'humidity_specific')
        self.columns = self.df.columns
        self.targets = self.columns.drop(["date", "day_of_year", "lat", "lon", "did_rain"])
        self.models = None
        self.pipeline = None

    def _wrangle(self, path, target=None):
        df = pd.read_csv(path)
        df['date'] = pd.to_datetime(df['date'])
        df['day_of_year'] = df['date'].dt.dayofyear

        df_prophet = df[['date', target]].rename(columns={'date': 'ds', target: 'y'})

        did_rain = [0 if i == 0 else 1 for i in df['precipitation_total']]
        df['did_rain'] = did_rain

        df.replace(-999.0000, np.nan, inplace=True)
        df_prophet.replace(-999.0000, np.nan, inplace=True)

        df.dropna(inplace=True)
        df_prophet.dropna(inplace=True)

        return df, df_prophet

    def train_and_save_prophet(self):
        for target in self.targets:
            _, df_prophet = self._wrangle(self.data_path, target)
            model = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
            model.fit(df_prophet)
            filename = f'prophet_model_{target}.pkl'
            with open(filename, 'wb') as file:
                pk.dump(model, file)
            print(f"model for {target} is saved")

    def gathering_models(self):
        all_models = []
        for target in self.targets:
            filename = f'prophet_model_{target}.pkl'
            with open(filename, 'rb') as file:
                all_models.append(pk.load(file))
        self.models = all_models
        return all_models

    def predict_func(self):
        date = input("enter the date (YYYY-MM-DD): ")
        date = pd.to_datetime(date)
        print(f"predictions for {date}")
        print("=" * 50)

        predictions = []
        for model in self.models:
            future = pd.DataFrame({'ds': [date]})
            pred = model.predict(future)['yhat'].values[0]
            predictions.append(pred)

        predictions_df = pd.DataFrame(predictions, index=self.targets, columns=["Predictions"])
        return predictions_df, date

    class ProphetWrapper(BaseEstimator, TransformerMixin):
        def __init__(self, model):
            self.model = model

        def fit(self, X, y=None):
            return self

        def transform(self, X):
            future = pd.DataFrame({'ds': X.flatten()})
            forecast = self.model.predict(future)
            return forecast[['yhat']].values

    def save_pipeline(self):
        p_models = self.models
        wrapped_prophets = [
            (f'prophet_{i}', self.ProphetWrapper(model))
            for i, model in enumerate(p_models)]
        # Features and target
        X = self.df['date'].values.reshape(-1, 1)
        y = self.df['did_rain'].values

        
        neg, pos = np.bincount(y)   # count 0s and 1s in did_rain
        imbalance_ratio = neg / pos
        prophet_stage = ('prophet_forecasters', FeatureUnion(wrapped_prophets))
        xgb_stage = ('xgb_meta_model', XGBClassifier(n_estimators=100, random_state=42,scale_pos_weight=imbalance_ratio))

        ensemble_pipeline = Pipeline([prophet_stage, xgb_stage])

        # Train/test split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, shuffle=False
        )

        # Fit on train data
        ensemble_pipeline.fit(X_train, y_train)
        joblib.dump(ensemble_pipeline, self.pipeline_path)

        # ---- Evaluation ----
        y_pred = ensemble_pipeline.predict(X_test)
        y_proba = ensemble_pipeline.predict_proba(X_test)[:, 1]

        print("\nðŸ“Š Model Evaluation on Test Set")
        print("Accuracy:", accuracy_score(y_test, y_pred))
        print("Precision:", precision_score(y_test, y_pred))
        print("Recall:", recall_score(y_test, y_pred))
        print("F1 Score:", f1_score(y_test, y_pred))
        print("ROC AUC:", roc_auc_score(y_test, y_proba))
        print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

    def model(self):
        df,_ = self._wrangle(self.data_path, "temp_max")
        
        self.pipeline = joblib.load(self.pipeline_path)
        predict_df, date = self.predict_func()

        temp_max_q = df['temp_max'].quantile(0.8)
        temp_min_q = df['temp_min'].quantile(0.2)
        humidity_specific_q = df['humidity_specific'].quantile(0.9)
        solar_radiation_q = df['solar_radiation'].quantile(0.9)
        wind_speed_q = df['wind_speed'].quantile(0.9)

        # Recommendations
        recs = pd.Series({
            "temp_max": "too hot" if predict_df.loc['temp_max'].values[0] > temp_max_q else "normal",
            "temp_min": "too cold" if predict_df.loc['temp_min'].values[0] < temp_min_q else "normal",
            "humidity_specific": "humid" if predict_df.loc['humidity_specific'].values[0] > humidity_specific_q else "dry",
            "pressure": "",
            "precipitation_total": "",
            "solar_radiation": "use sunscreen" if predict_df.loc['solar_radiation'].values[0] > solar_radiation_q else "safe",
            "wind_speed": "windy" if predict_df.loc['wind_speed'].values[0] > wind_speed_q else "calm"
        })

        predict_df['recomendations'] = recs

        interval = int(input("enter the wanted interval (for 1 day =1): "))
        future_dates = pd.date_range(date, periods=interval, freq="D")
        future_input = np.array(future_dates).reshape(-1, 1)
        predictions = self.pipeline.predict_proba(future_input).round(2) * 100
        
        indexes = ['Maximum Temperature ','Minimum Temperature','Specific humidity','Pressure','Total precipitation','Solar radiation','Wind speed']
        predict_df.index = indexes
        
        proba_df = pd.DataFrame(predictions.round(2), 
                                columns=["probability of no rain", "probability of rain"])
        proba_df["rec"] = proba_df['probability of rain'].apply(
            lambda x: "take care" if x >= 50 else "have a beautiful day"
        )
        return proba_df, predict_df
wf = WeatherForecaster("nasa_daily_weather_data.csv")

# Train Prophet models and save them
wf.train_and_save_prophet()

# Load Prophet models into memory
wf.gathering_models()

# Save ensemble pipeline
wf.save_pipeline()

# Make predictions
proba_df, weather_preds = wf.model()

print("\nWeather predictions:")
print(weather_preds)
print("\nRain probabilities:")
print(proba_df)

11:07:28 - cmdstanpy - INFO - Chain [1] start processing
11:07:30 - cmdstanpy - INFO - Chain [1] done processing


model for temp_max is saved


11:07:31 - cmdstanpy - INFO - Chain [1] start processing
11:07:34 - cmdstanpy - INFO - Chain [1] done processing


model for temp_min is saved


11:07:35 - cmdstanpy - INFO - Chain [1] start processing
11:07:37 - cmdstanpy - INFO - Chain [1] done processing


model for humidity_specific is saved


11:07:38 - cmdstanpy - INFO - Chain [1] start processing
11:07:41 - cmdstanpy - INFO - Chain [1] done processing


model for pressure is saved


11:07:42 - cmdstanpy - INFO - Chain [1] start processing
11:07:44 - cmdstanpy - INFO - Chain [1] done processing


model for precipitation_total is saved


11:07:45 - cmdstanpy - INFO - Chain [1] start processing
11:07:46 - cmdstanpy - INFO - Chain [1] done processing


model for solar_radiation is saved


11:07:48 - cmdstanpy - INFO - Chain [1] start processing
11:07:50 - cmdstanpy - INFO - Chain [1] done processing


model for wind_speed is saved

ðŸ“Š Model Evaluation on Test Set
Accuracy: 0.699308142629058
Precision: 0.45297805642633227
Recall: 0.5722772277227722
F1 Score: 0.5056867891513561
ROC AUC: 0.7250745816939772
Confusion Matrix:
 [[1025  349]
 [ 216  289]]


enter the date (YYYY-MM-DD):  2025-12-01


predictions for 2025-12-01 00:00:00


enter the wanted interval (for 1 day =1):  14



Weather predictions:
                      Predictions recomendations
Maximum Temperature     23.718100         normal
Minimum Temperature     12.121284         normal
Specific humidity        7.184982            dry
Pressure               100.193625               
Total precipitation      0.667219               
Solar radiation          3.630589           safe
Wind speed               2.353609           calm

Rain probabilities:
    probability of no rain  probability of rain                   rec
0                     95.0                  5.0  have a beautiful day
1                     97.0                  3.0  have a beautiful day
2                     97.0                  3.0  have a beautiful day
3                     92.0                  8.0  have a beautiful day
4                     78.0                 22.0  have a beautiful day
5                     64.0                 36.0  have a beautiful day
6                     65.0                 35.0  have a beautiful day
7    

In [12]:
weather_preds

Unnamed: 0,Predictions,recomendations
Maximum Temperature,23.7181,normal
Minimum Temperature,12.121284,normal
Specific humidity,7.184982,dry
Pressure,100.193625,
Total precipitation,0.667219,
Solar radiation,3.630589,safe
Wind speed,2.353609,calm


In [13]:
proba_df

Unnamed: 0,probability of no rain,probability of rain,rec
0,95.0,5.0,have a beautiful day
1,97.0,3.0,have a beautiful day
2,97.0,3.0,have a beautiful day
3,92.0,8.0,have a beautiful day
4,78.0,22.0,have a beautiful day
5,64.0,36.0,have a beautiful day
6,65.0,35.0,have a beautiful day
7,40.0,60.0,take care
8,50.0,50.0,take care
9,97.0,3.0,have a beautiful day
