<a href="https://colab.research.google.com/github/vishakha711/WeatherPredictionApp/blob/main/WeatherPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Weather Prediction ML model**

## Required Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import joblib
import requests
import datetime

### Load Dataset

In [2]:
from google.colab import files
uploaded = files.upload()


Saving archive (1).zip to archive (1).zip


In [3]:
import zipfile
import os

# Get the name of the uploaded zip file
zip_file_name = list(uploaded.keys())[0]

# Extract the zip file
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall()

csv_file_path = 'seattle-weather.csv'

# Check if the CSV file exists after extraction
if os.path.exists(csv_file_path):
    df = pd.read_csv(csv_file_path)
    display(df.head())
else:
    print(f"Error: '{csv_file_path}' not found after extraction.")

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [4]:
df.isnull().sum()    #Checking missing values

Unnamed: 0,0
date,0
precipitation,0
temp_max,0
temp_min,0
wind,0
weather,0


### Feature Enginnering

In [5]:
df['humidity'] = 100 - (df['temp_max'] - df['temp_min']) * 2     # Derive humidity (approx formula)
df['humidity'] = df['humidity'].clip(0, 100)

In [6]:
# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')

In [7]:
# Select Features and Targets
features = ['temp_max', 'temp_min', 'precipitation', 'wind', 'humidity', 'day_of_year']
target_numeric = ['temp_max', 'temp_min', 'humidity', 'wind']
target_weather = 'weather'

In [8]:
# ==========================
# Create Lag Features (past 7 days)
# ==========================
lag=14
for lag in range(1, 15):
    df[f'temp_max_lag_{lag}'] = df['temp_max'].shift(lag)
    df[f'temp_min_lag_{lag}'] = df['temp_min'].shift(lag)
    df[f'humidity_lag_{lag}'] = df['humidity'].shift(lag)
    df[f'wind_lag_{lag}'] = df['wind'].shift(lag)

df = df.dropna().reset_index(drop=True)

In [9]:
# ==========================
# Split Data (Time-based)
# ==========================
train_size = int(len(df) * 0.8)
train = df.iloc[:train_size]
test = df.iloc[train_size:]

features = [col for col in df.columns if 'lag' in col]
targets = ['temp_max', 'temp_min', 'humidity', 'wind']

X_train = train[features]
y_train = train[targets]
X_test = test[features]
y_test = test[targets]

In [10]:
# ==========================
# Scaling
# ==========================
scaler_x = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_x.fit_transform(X_train)
X_test_scaled = scaler_x.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train)

### LINEAR REGRESSION (Numeric)

In [11]:
# ==========================
# Train Model
# ==========================
model = LinearRegression()
model.fit(X_train_scaled, y_train_scaled)

In [12]:
# ==========================
#  Predict & Evaluate
# ==========================
y_pred_scaled = model.predict(X_test_scaled)
y_pred = scaler_y.inverse_transform(y_pred_scaled)

In [13]:
print("\nModel Evaluation:")
print("R2 Score:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))


Model Evaluation:
R2 Score: 0.6075007246150387
MAE: 2.2043061981100007
MSE: 10.87965140089398


In [19]:
# Save trained model
joblib.dump(model, "seattle_model.pkl")
joblib.dump(scaler_x, "seattle_scaler_x.pkl")
joblib.dump(scaler_y, "seattle_scaler_y.pkl")

print("Model trained and saved successfully!")

Model trained and saved successfully!


In [23]:
# =============== CONFIG ===============
API_KEY = "92a24d2e7f25f11ca36f13af4e4f9359"
CITY = "Seattle"
# =====================================

def fetch_real_time_weather(city, api_key, days=14):
    """Seattle ke last 14 din ka weather data API se fetch karo"""
    base_url = "https://api.openweathermap.org/data/2.5/weather"
    lat, lon = 47.6062, -122.3321  # Seattle coordinates
    data = []

    for i in range(days, 0, -1):
        date = datetime.datetime.now() - datetime.timedelta(days=i)
        timestamp = int(date.timestamp())
        url = f"https://api.openweathermap.org/data/2.5/onecall/timemachine?lat={lat}&lon={lon}&dt={timestamp}&appid={api_key}&units=metric"
        r = requests.get(url).json()
        temp = r['current']['temp']
        humidity = r['current']['humidity']
        precip = r['current'].get('rain', {}).get('1h', 0)
        data.append([date, temp, humidity, precip])

    df = pd.DataFrame(data, columns=['date', 'temp', 'humidity', 'precip'])
    return df


def predict_weather(target_date=None):
    """Model load karo, recent data lo aur predict karo"""
    # Load model and scalers
    model = joblib.load("seattle_model.pkl")
    scaler_x = joblib.load("seattle_scaler_x.pkl")
    scaler_y = joblib.load("seattle_scaler_y.pkl")

    # Get real-time last 14 days data
    df = fetch_real_time_weather(CITY, API_KEY, days=14)

    # Lag features banaye
    latest_data = []
    for lag in range(1, 15):
        latest_data.extend([
            df['temp'].iloc[-lag],
            df['humidity'].iloc[-lag],
            df['precip'].iloc[-lag]
        ])
    # Current date
    today = datetime.datetime.now()

    # Next 7 days
    next_7_days = [today + datetime.timedelta(days=i) for i in range(1, 8)]

    # Get weekday names
    day_names = [d.strftime("%A") for d in next_7_days]

    # Predict next 7 days
    predictions = []
    input_data = np.array(latest_data).reshape(1, -1)

    for i in range(7):
        scaled_input = scaler_x.transform(input_data)
        scaled_pred = model.predict(scaled_input)
        pred_temp = scaler_y.inverse_transform(scaled_pred.reshape(-1, 1))[0][0]
        predictions.append(pred_temp)
        input_data = np.roll(input_data, -3)
        input_data[0, -3:] = [pred_temp, df['humidity'].iloc[-1], df['precip'].iloc[-1]]

    # =======================
    # Daily Alerts
    # =======================
    print("\n7-Day Forecast for Seattle:\n")
    alerts = []
    for i, t in enumerate(predictions):
        alert = ""
        # Alert logic same as before
        recent_humidity = df['humidity'].iloc[-1]
        recent_precip = df['precip'].iloc[-1]

        if t > 30:
            alert += "Heat Alert"
            alerts.append("heat")
        elif t < 5:
            alert += "Cold Alert"
            alerts.append("cold")

        if recent_humidity > 85:
            alert += " High Humidity"
            alerts.append("humidity")
        if recent_precip > 5:
            alert += " Heavy Rain"
            alerts.append("rain")

        # Replace "Day 1, Day 2" with real weekday name
        print(f"{day_names[i]}: {t:.2f}°C {alert if alert else ''}")

    # =======================
    # Smart Summary Alerts
    # =======================
    print("\nSummary Alert:")
    if "rain" in alerts:
        print("Heavy rainfall expected this week.")
    if "heat" in alerts:
        print("Possible heatwave conditions ahead.")
    if "cold" in alerts:
        print("Cold wave expected this week.")
    if "humidity" in alerts and "rain" not in alerts:
        print("High humidity detected, mild rain possible.")
    if not alerts:
        print("Weather looks normal this week — no major alerts!")

    # If specific target date
    if target_date:
        if target_date.lower() == "tomorrow":
            print(f"Tomorrow's predicted temp in Seattle: {predictions[0]:.2f}°C")
        else:
            try:
                today = datetime.datetime.now()
                target = datetime.datetime.strptime(target_date, "%Y-%m-%d")
                delta = (target - today).days
                if 0 < delta <= 7:
                    print(f" {target_date} prediction: {predictions[delta-1]:.2f}°C")
                else:
                    print("Enter a date within the next 7 days!")
            except:
                print("Invalid date format! Use YYYY-MM-DD.")
    else:
        print("Next 7-day temperature forecast (Seattle):")
        for i, t in enumerate(predictions):
            print(f"Day {i+1}: {t:.2f}°C")