In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib


In [31]:
df = pd.read_csv('shuffled_weather_data.csv')
df.head()


Unnamed: 0,datetime,temp,humidity,windgust,windspeed,sealevelpressure,cloudcover,precip,conditions,city,will_rain,city_encoded,conditions_encoded
0,2024-04-30,30.5,37.6,42.1,16.6,1007.5,14.9,0.0,Clear,Mahabaleshwar,0,1,0
1,2024-05-10,30.1,72.3,35.3,21.1,1008.4,33.7,0.0,Partially cloudy,Mumbai,0,2,1
2,2024-04-30,32.7,51.7,36.0,20.5,1006.5,8.5,0.0,Clear,Bhayander,0,0,0
3,2024-05-18,31.4,80.1,31.7,20.4,1004.8,38.1,0.0,Partially cloudy,Bhayander,0,0,1
4,2024-06-04,27.2,77.5,37.8,15.1,1008.5,87.5,68.0,"Rain, Partially cloudy",Pune,1,3,4


In [32]:
features = ['temp', 'humidity', 'windgust', 'windspeed',
            'sealevelpressure', 'cloudcover', 'city_encoded', 'conditions_encoded']

X = df[features]
y = df['will_rain']


In [33]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


In [34]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)  # Instantiate the model
model.fit(X_train, y_train)                # Train the model


In [35]:
joblib.dump(model, 'logistic_model.pkl')


['logistic_model.pkl']

In [36]:
y_pred = model.predict(X_test)


In [37]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.989010989010989

Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99        48
           1       1.00      0.98      0.99        43

    accuracy                           0.99        91
   macro avg       0.99      0.99      0.99        91
weighted avg       0.99      0.99      0.99        91



In [38]:
model.predict([[25.0, 90, 15.0, 10.0, 1010, 80, 1, 1]])




array([0])

In [39]:
# Your input features (example for tomorrow at 6 AM in Bhayander)
input_data = [[25.0, 90, 15.0, 10.0, 1010, 80, 1, 1]]

# Get probability for 'will_rain = 1' (i.e., rain)
rain_probability = model.predict_proba(input_data)[0][1]

# Print nicely formatted
print(f"Probability of rain : {round(rain_probability * 100, 2)}%")


Probability of rain : 8.73%




In [40]:
input_data_1 = [27.62, 81, 10.97, 8.2, 1005, 100, 1, 1]
rain_probability_1 = model.predict_proba([input_data_1])[0][1]
print(f"Probability of rain : {round(rain_probability_1 * 100, 2)}%")

Probability of rain : 6.06%




In [None]:
import requests
import pandas as pd
import numpy as np
import joblib
from datetime import datetime
import pytz  # Required for IST timezone handling

# --- Load your trained logistic regression model ---
model = joblib.load("logistic_model.pkl")  # Update this path if needed

# --- Define city name and coordinates ---
city_name = "Bhayander"
lat = 19.3016
lon = 72.8512

# --- Encoders from training ---
city_encoder = {'Mumbai': 2, 'Thane': 4, 'Pune': 3, 'Bhayander': 1, 'Mahabaleshwar': 0}
condition_encoder = {'Clear': 0, 'Clouds': 1, 'Rain': 2, 'Mist': 3, 'Drizzle': 4}

# --- Features used in model ---
features = ['temp', 'humidity', 'windgust', 'windspeed', 'sealevelpressure', 'cloudcover', 'city_encoded', 'conditions_encoded']

# --- OpenWeather API Config ---
API_KEY = "Your API"
url = f"http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={API_KEY}&units=metric"

# --- Fetch current weather data ---
response = requests.get(url)

if response.status_code == 200:
    data = response.json()

    # Extract features
    temp = data['main']['temp']
    humidity = data['main']['humidity']
    windspeed = data['wind']['speed']
    windgust = data['wind'].get('gust', windspeed)
    pressure = data['main']['pressure']
    cloudcover = data['clouds']['all']
    condition = data['weather'][0]['main']

    # Encode inputs
    city_encoded = city_encoder.get(city_name, -1)
    condition_encoded = condition_encoder.get(condition, -1)

    if city_encoded == -1 or condition_encoded == -1:
        print("⚠️ Unknown city or condition. Cannot predict.")
    else:
        # Prepare input for model
        input_data = pd.DataFrame([[temp, humidity, windgust, windspeed, pressure, cloudcover, city_encoded, condition_encoded]], columns=features)

        # Get IST time
        ist = pytz.timezone('Asia/Kolkata')
        current_time_ist = datetime.now(ist).strftime('%Y-%m-%d %H:%M')

        # Predict rain probability
        rain_probability = model.predict_proba(input_data)[0][1]
        rain_prediction = model.predict(input_data)[0]

        # Output
        print(f"\n📍 Location: {city_name} ({lat}, {lon}) @ {current_time_ist} IST")
        print(f"📦 Input to Model: {input_data.values.tolist()[0]}")
        print(f"📊 Probability of rain (Logistic Regression): {round(rain_probability * 100, 2)}%")
else:
    print(f"❌ API Request Failed: {response.status_code} - {response.text}")




📍 Location: Bhayander (19.3016, 72.8512) @ 2025-07-04 10:25 IST
📦 Input to Model: [28.33, 77.0, 10.71, 8.58, 1006.0, 86.0, 1.0, 1.0]
📊 Probability of rain (Logistic Regression): 3.75%
