In [6]:
pip install requests scikit-learn pandas numpy joblib

Note: you may need to restart the kernel to use updated packages.


In [7]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib

# Your OpenWeatherMap API Key
API_KEY = "1573876b7b9629f4ec85517706ad8177"

# Function to Fetch Past Weather Data
def get_historical_weather(city, start_date, end_date):
    url = f"http://api.openweathermap.org/data/2.5/onecall/timemachine?lat={city['lat']}&lon={city['lon']}&dt={int(start_date.timestamp())}&appid={API_KEY}&units=metric"
    
    response = requests.get(url)
    data = response.json()

    if "hourly" not in data:
        print(f"No data found for {city['name']} on {start_date}")
        return None

    weather_data = []
    for hourly_data in data["hourly"]:
        weather_data.append({
            "date": datetime.utcfromtimestamp(hourly_data["dt"]).date(),
            "temp": hourly_data["temp"],
            "humidity": hourly_data["humidity"],
            "wind_speed": hourly_data["wind_speed"]
        })

    return pd.DataFrame(weather_data)

# List of Cities with Latitude & Longitude
cities = {
    "Pune": {"lat": 18.5204, "lon": 73.8567},
    "Mumbai": {"lat": 19.0760, "lon": 72.8777},
    "Delhi": {"lat": 28.7041, "lon": 77.1025}
}

# Collect Weather Data for Multiple Cities (Past 7 Days)
weather_frames = []
for city, coords in cities.items():
    for days in range(7):
        date = datetime.utcnow() - timedelta(days=days+1)
        df = get_historical_weather(coords, date, date)
        if df is not None:
            df["city"] = city  # Add City Column
            weather_frames.append(df)

# Combine Data
if weather_frames:
    weather_df = pd.concat(weather_frames, ignore_index=True)
    print(weather_df.head())
else:
    print("No data fetched. Check API key and connectivity.")
    exit()

# Split Data into Features and Target
X = weather_df[['humidity', 'wind_speed']]
y = weather_df['temp']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate Model
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
print(f"Model MAE: {mae:.2f}°C")  # Lower MAE = Better Model

# Save Model
joblib.dump(model, 'weather_model.pkl')
print("Model trained and saved as 'weather_model.pkl'")



KeyError: 'name'

In [None]:
H9G4MQB6VYXWHCWRSUWUZUCQN



In [1]:
import pandas as pd
import requests
from datetime import datetime, timedelta

# Function to fetch weather data
def fetch_weather(city):
    base_url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline"
    api_key = "H9G4MQB6VYXWHCWRSUWUZUCQN"  # Replace with your actual API key

    # Define the date range (yesterday to +2 days)
    start_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
    end_date = (datetime.now() + timedelta(days=2)).strftime("%Y-%m-%d")

    url = f"{base_url}/{city}/{start_date}/{end_date}?unitGroup=metric&include=days&key={api_key}&contentType=json"
    
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raise an error for bad responses (4xx, 5xx)
        data = response.json()
        
        weather_data = []
        for entry in data.get("days", []):
            weather_data.append({
                "city": city,
                "date": entry["datetime"],
                "tempmax": entry.get("tempmax", "N/A"),
                "tempmin": entry.get("tempmin", "N/A"),
                "humidity": entry.get("humidity", "N/A"),
                "windspeed": entry.get("windspeed", "N/A")
            })
        return weather_data
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for {city}: {e}")
        return []

# Get user input for multiple cities
city_input = input("Enter city names separated by commas: ")
cities = [city.strip() for city in city_input.split(",")]

# Fetch data for all cities
all_weather_data = []
for city in cities:
    weather_info = fetch_weather(city)
    if weather_info:  # Only add data if it's successfully fetched
        all_weather_data.extend(weather_info)

# Convert to DataFrame and save as CSV
if all_weather_data:
    df = pd.DataFrame(all_weather_data)
    df.to_csv("weather_data.csv", index=False)
    print("Weather data saved to weather_data.csv")
    print(df)
else:
    print("No weather data was fetched. Check API key and city names.")


Enter city names separated by commas:  kolkata


Weather data saved to weather_data.csv
      city        date  tempmax  tempmin  humidity  windspeed
0  kolkata  2025-03-28     40.0     28.0      43.7       14.8
1  kolkata  2025-03-29     41.0     25.2      42.4       17.3
2  kolkata  2025-03-30     39.8     25.0      54.0       26.6
3  kolkata  2025-03-31     37.5     24.5      59.7       29.2


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import pickle

# Load dataset
df = pd.read_csv("weather_data.csv")

# Convert date to numerical features
df["date"] = pd.to_datetime(df["date"])
df["day"] = df["date"].dt.day
df["month"] = df["date"].dt.month
df["year"] = df["date"].dt.year

# Encode city names
df["city"] = df["city"].astype("category").cat.codes  # Convert cities to numbers

# Define features and target
X = df[["city", "day", "month", "year"]]
y = df[["tempmax", "tempmin", "humidity", "windspeed"]]  # Multi-output regression

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
print(f"Mean Absolute Error: {mae}")

# Save model
with open("weather_model.pkl", "wb") as file:
    pickle.dump(model, file)
print("Model saved successfully!")


Mean Absolute Error: 3.3302499999999924
Model saved successfully!


In [7]:
import os
print(os.path.exists("weather_model.pkl"))  # Should return True


True


In [8]:
import shutil

# Define source and destination paths
source_path = "weather_model.pkl"  # Current location
destination_path = "data_visualization_project/weather_model.pkl"  # New folder

# Move the file
shutil.move(source_path, destination_path)

print("File moved successfully!")


File moved successfully!


In [12]:
import os

# Define the destination folder
destination_folder = "data_visualization_project"

# Create the folder if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)
    print(f"✅ Created folder: {destination_folder}")
else:
    print(f"✅ Folder already exists: {destination_folder}")

print(destination_folder)


✅ Folder already exists: data_visualization_project
data_visualization_project


In [10]:
import shutil

# Define source and destination paths
source_path = "weather_model.pkl"  # Update if file is in another location
destination_path = os.path.join(destination_folder, "weather_model.pkl")

# Move the file
shutil.move(source_path, destination_path)

print("✅ File moved successfully!")


FileNotFoundError: [WinError 2] The system cannot find the file specified

In [11]:
print(os.listdir("data_visualization_project"))  # Should list 'weather_model.pkl'


['venv', 'weather_model.pkl']
